From f6c059f962f86aa50a8217048a320f40d2ce3ec9 Mon Sep 17 00:00:00 2001 From: Balakrishnan Unnithan Date: Tue, 4 Feb 2025 21:59:13 +0530 Subject: [PATCH 01/62] tlb refactor. tlb variables are moved into a struct for clean and readable code. Signed-off-by: Balakrishnan Unnithan --- src/x86/api.h | 63 +++++++--------------- src/x86/cache/descriptor.c | 106 ++++++++++++++++--------------------- src/x86/cache/init.c | 38 ++----------- src/x86/init.c | 22 +------- 4 files changed, 74 insertions(+), 155 deletions(-) diff --git a/src/x86/api.h b/src/x86/api.h index 1331ed04..3d276347 100644 --- a/src/x86/api.h +++ b/src/x86/api.h @@ -33,6 +33,22 @@ struct cpuinfo_x86_caches { uint32_t prefetch_size; }; +struct cpuinfo_x86_tlbs { + struct cpuinfo_tlb itlb_4KB; + struct cpuinfo_tlb itlb_2MB; + struct cpuinfo_tlb itlb_4MB; + struct cpuinfo_tlb dtlb0_4KB; + struct cpuinfo_tlb dtlb0_2MB; + struct cpuinfo_tlb dtlb0_4MB; + struct cpuinfo_tlb dtlb_4KB; + struct cpuinfo_tlb dtlb_2MB; + struct cpuinfo_tlb dtlb_4MB; + struct cpuinfo_tlb dtlb_1GB; + struct cpuinfo_tlb stlb2_4KB; + struct cpuinfo_tlb stlb2_2MB; + struct cpuinfo_tlb stlb2_1GB; +}; + struct cpuinfo_x86_model_info { uint32_t model; uint32_t family; @@ -61,21 +77,7 @@ struct cpuinfo_x86_processor { int linux_id; #endif struct cpuinfo_x86_caches cache; - struct { - struct cpuinfo_tlb itlb_4KB; - struct cpuinfo_tlb itlb_2MB; - struct cpuinfo_tlb itlb_4MB; - struct cpuinfo_tlb dtlb0_4KB; - struct cpuinfo_tlb dtlb0_2MB; - struct cpuinfo_tlb dtlb0_4MB; - struct cpuinfo_tlb dtlb_4KB; - struct cpuinfo_tlb dtlb_2MB; - struct cpuinfo_tlb dtlb_4MB; - struct cpuinfo_tlb dtlb_1GB; - struct cpuinfo_tlb stlb2_4KB; - struct cpuinfo_tlb stlb2_2MB; - struct cpuinfo_tlb stlb2_1GB; - } tlb; + struct cpuinfo_x86_tlbs tlb; struct cpuinfo_x86_topology topology; char brand_string[CPUINFO_PACKAGE_NAME_MAX]; }; @@ -109,40 +111,15 @@ CPUINFO_INTERNAL void cpuinfo_x86_detect_cache( enum cpuinfo_vendor vendor, const struct cpuinfo_x86_model_info* model_info, struct cpuinfo_x86_caches* cache, - struct cpuinfo_tlb* itlb_4KB, - struct cpuinfo_tlb* itlb_2MB, - struct cpuinfo_tlb* itlb_4MB, - struct cpuinfo_tlb* dtlb0_4KB, - struct cpuinfo_tlb* dtlb0_2MB, - struct cpuinfo_tlb* dtlb0_4MB, - struct cpuinfo_tlb* dtlb_4KB, - struct cpuinfo_tlb* dtlb_2MB, - struct cpuinfo_tlb* dtlb_4MB, - struct cpuinfo_tlb* dtlb_1GB, - struct cpuinfo_tlb* stlb2_4KB, - struct cpuinfo_tlb* stlb2_2MB, - struct cpuinfo_tlb* stlb2_1GB, - uint32_t* log2_package_cores_max); + struct cpuinfo_x86_tlbs* tlb, + struct cpuinfo_x86_topology* topology); CPUINFO_INTERNAL void cpuinfo_x86_decode_cache_descriptor( uint8_t descriptor, enum cpuinfo_vendor vendor, const struct cpuinfo_x86_model_info* model_info, struct cpuinfo_x86_caches* cache, - struct cpuinfo_tlb* itlb_4KB, - struct cpuinfo_tlb* itlb_2MB, - struct cpuinfo_tlb* itlb_4MB, - struct cpuinfo_tlb* dtlb0_4KB, - struct cpuinfo_tlb* dtlb0_2MB, - struct cpuinfo_tlb* dtlb0_4MB, - struct cpuinfo_tlb* dtlb_4KB, - struct cpuinfo_tlb* dtlb_2MB, - struct cpuinfo_tlb* dtlb_4MB, - struct cpuinfo_tlb* dtlb_1GB, - struct cpuinfo_tlb* stlb2_4KB, - struct cpuinfo_tlb* stlb2_2MB, - struct cpuinfo_tlb* stlb2_1GB, - uint32_t* prefetch_size); + struct cpuinfo_x86_tlbs* tlb); CPUINFO_INTERNAL bool cpuinfo_x86_decode_deterministic_cache_parameters( struct cpuid_regs regs, diff --git a/src/x86/cache/descriptor.c b/src/x86/cache/descriptor.c index 93d855ae..404c0235 100644 --- a/src/x86/cache/descriptor.c +++ b/src/x86/cache/descriptor.c @@ -8,20 +8,8 @@ void cpuinfo_x86_decode_cache_descriptor( enum cpuinfo_vendor vendor, const struct cpuinfo_x86_model_info* model_info, struct cpuinfo_x86_caches* cache, - struct cpuinfo_tlb* itlb_4KB, - struct cpuinfo_tlb* itlb_2MB, - struct cpuinfo_tlb* itlb_4MB, - struct cpuinfo_tlb* dtlb0_4KB, - struct cpuinfo_tlb* dtlb0_2MB, - struct cpuinfo_tlb* dtlb0_4MB, - struct cpuinfo_tlb* dtlb_4KB, - struct cpuinfo_tlb* dtlb_2MB, - struct cpuinfo_tlb* dtlb_4MB, - struct cpuinfo_tlb* dtlb_1GB, - struct cpuinfo_tlb* stlb2_4KB, - struct cpuinfo_tlb* stlb2_2MB, - struct cpuinfo_tlb* stlb2_1GB, - uint32_t* prefetch_size) { + struct cpuinfo_x86_tlbs* tlb) +{ /* * Descriptors are parsed according to: * - Application Note 485: Intel Processor Indentification and CPUID @@ -41,7 +29,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-KB Pages, 4-way set * associative, 32 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -55,7 +43,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-MB Pages, fully associative, * 2 entries" */ - *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 2, .associativity = 2, .pages = CPUINFO_PAGE_SIZE_4MB, @@ -68,7 +56,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 64 entries" Application Note 485: "Data TLB: 4-KB * Pages, 4-way set associative, 64 entries" */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -81,7 +69,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 8 entries" Application Note 485: "Data TLB: 4-MB * Pages, 4-way set associative, 8 entries" */ - *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 8, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4MB, @@ -94,7 +82,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 32 entries" Application Note 485: "Data TLB: 4-MB * Pages, 4-way set associative, 32 entries" */ - *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4MB, @@ -172,7 +160,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-MB pages, 4-way set * associative, 4 entries" */ - *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 4, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4MB, @@ -710,7 +698,7 @@ void cpuinfo_x86_decode_cache_descriptor( * Application Note 485: * "Instruction TLB: 4-KB pages, 32 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 32, /* Assume full associativity from nearby * entries: manual lacks detail @@ -727,7 +715,7 @@ void cpuinfo_x86_decode_cache_descriptor( * TLB: 4-KB, 2-MB or 4-MB pages, fully associative, 64 * entries" */ - *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = tlb->itlb_2MB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 64, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -741,7 +729,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-KB, 2-MB or 4-MB pages, fully * associative, 128 entries" */ - *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = tlb->itlb_2MB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 128, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -755,7 +743,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-KB, 2-MB or 4-MB pages, fully * associative, 256 entries" */ - *itlb_4KB = *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = tlb->itlb_2MB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 256, .associativity = 256, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -769,7 +757,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 2-MB or 4-MB pages, fully * associative, 7 entries" */ - *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_2MB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 7, .associativity = 7, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -782,7 +770,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 16 entries" Application Note 485: "L1 Data TLB: 4-MB * pages, 4-way set associative, 16 entries" */ - *dtlb0_4MB = (struct cpuinfo_tlb){ + tlb->dtlb0_4MB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4MB, @@ -795,7 +783,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "L1 Data TLB: 4-KB * pages, 4-way set associative, 16 entries" */ - *dtlb0_4KB = (struct cpuinfo_tlb){ + tlb->dtlb0_4KB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -808,7 +796,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "Data TLB0: 4-KB * pages, fully associative, 16 entries" */ - *dtlb0_4KB = (struct cpuinfo_tlb){ + tlb->dtlb0_4KB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 16, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -822,7 +810,7 @@ void cpuinfo_x86_decode_cache_descriptor( * TLB0: 2-MB or 4-MB pages, 4-way associative, 32 * entries" */ - *dtlb0_2MB = *dtlb0_4MB = (struct cpuinfo_tlb){ + tlb->dtlb0_2MB = tlb->dtlb0_4MB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -836,7 +824,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Data TLB: 4-KB or 4-MB pages, fully associative, * 64 entries" */ - *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 64, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, @@ -849,7 +837,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "Data TLB: 4-KB or * 4-MB pages, fully associative, 128 entries" */ - *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 128, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, @@ -862,7 +850,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "Data TLB: 4-KB or * 4-MB pages, fully associative, 256 entries" */ - *dtlb_4KB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 256, .associativity = 256, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, @@ -888,7 +876,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4 KByte pages, fully * associative, 48 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 48, .associativity = 48, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -901,12 +889,12 @@ void cpuinfo_x86_decode_cache_descriptor( * associative, 32 entries and a separate array with 1 * GByte pages, 4-way set associative, 4 entries" */ - *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_2MB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, }; - *dtlb_1GB = (struct cpuinfo_tlb){ + tlb->dtlb_1GB = (struct cpuinfo_tlb) { .entries = 4, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_1GB, @@ -919,7 +907,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 512 entries" * */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 512, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -977,7 +965,7 @@ void cpuinfo_x86_decode_cache_descriptor( /* uTLB is, an fact, a normal 1-level DTLB on Silvermont * & Knoghts Landing */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -989,7 +977,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "DTLB: 4 KByte pages, 8-way set associative, 256 * entries" */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 256, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1001,7 +989,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "DTLB: 2M/4M pages, 8-way set associative, 128 * entries" */ - *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_2MB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -1013,7 +1001,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "DTLB: 1 GByte pages, fully associative, 16 * entries" */ - *dtlb_1GB = (struct cpuinfo_tlb){ + tlb->dtlb_1GB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 16, .pages = CPUINFO_PAGE_SIZE_1GB, @@ -1033,7 +1021,7 @@ void cpuinfo_x86_decode_cache_descriptor( #if CPUINFO_ARCH_X86 case cpuinfo_vendor_cyrix: case cpuinfo_vendor_nsc: - *dtlb_4KB = *itlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1085,7 +1073,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 8 entries" Application Note 485: "Instruction TLB: * 2M/4M pages, fully associative, 8 entries" */ - *itlb_2MB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_2MB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 8, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -1352,7 +1340,7 @@ void cpuinfo_x86_decode_cache_descriptor( * Intel ISA Reference: * "DTLB: 4k pages, fully associative, 32 entries" */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 32, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1366,7 +1354,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-KB Pages, 4-way set associative, * 128 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1380,12 +1368,12 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 2-MB pages, 4-way, 8 entries or 4M * pages, 4-way, 4 entries" */ - *itlb_2MB = (struct cpuinfo_tlb){ + tlb->itlb_2MB = (struct cpuinfo_tlb) { .entries = 8, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, }; - *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 4, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -1399,7 +1387,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4-KB pages, 4-way set * associative, 64 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1412,7 +1400,7 @@ void cpuinfo_x86_decode_cache_descriptor( * 128 entries" Application Note 485: "Data TLB: 4-KB * Pages, 4-way set associative, 128 entries" */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1425,7 +1413,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "Data TLB: 4-KB Pages, * 4-way set associative, 256 entries" */ - *dtlb_4KB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = (struct cpuinfo_tlb) { .entries = 256, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1437,7 +1425,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4KByte pages, 8-way set * associative, 64 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1449,7 +1437,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Instruction TLB: 4KByte pages, 8-way set * associative, 128 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 128, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1462,7 +1450,7 @@ void cpuinfo_x86_decode_cache_descriptor( * entries" Application Note 485: "Data TLB: 4-KB Pages, * 4-way set associative, 64 entries" */ - *itlb_4KB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = (struct cpuinfo_tlb) { .entries = 64, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, @@ -1476,7 +1464,7 @@ void cpuinfo_x86_decode_cache_descriptor( * TLB: 4-KB or 4-MB Pages, 4-way set associative, 8 * entries" */ - *itlb_4KB = *itlb_4MB = (struct cpuinfo_tlb){ + tlb->itlb_4KB = tlb->itlb_4MB = (struct cpuinfo_tlb) { .entries = 8, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_4MB, @@ -1488,7 +1476,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Shared 2nd-Level TLB: 4 KByte/2MByte pages, * 8-way associative, 1024 entries" */ - *stlb2_4KB = *stlb2_2MB = (struct cpuinfo_tlb){ + tlb->stlb2_4KB = tlb->stlb2_2MB = (struct cpuinfo_tlb) { .entries = 1024, .associativity = 8, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, @@ -1500,7 +1488,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "DTLB: 4 KByte/2 MByte pages, 4-way associative, * 16 entries" */ - *dtlb_4KB = *dtlb_2MB = (struct cpuinfo_tlb){ + tlb->dtlb_4KB = tlb->dtlb_2MB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, @@ -1513,12 +1501,12 @@ void cpuinfo_x86_decode_cache_descriptor( * 6-way associative, 1536 entries. Also 1GBbyte pages, * 4-way, 16 entries." */ - *stlb2_4KB = *stlb2_2MB = (struct cpuinfo_tlb){ + tlb->stlb2_4KB = tlb->stlb2_2MB = (struct cpuinfo_tlb) { .entries = 1536, .associativity = 6, .pages = CPUINFO_PAGE_SIZE_4KB | CPUINFO_PAGE_SIZE_2MB, }; - *stlb2_1GB = (struct cpuinfo_tlb){ + tlb->stlb2_1GB = (struct cpuinfo_tlb) { .entries = 16, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_1GB, @@ -1530,7 +1518,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "DTLB: 2M/4M Byte pages, 4-way associative, 32 * entries" */ - *dtlb_2MB = *dtlb_4MB = (struct cpuinfo_tlb){ + tlb->dtlb_2MB = tlb->dtlb_4MB = (struct cpuinfo_tlb) { .entries = 32, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_2MB | CPUINFO_PAGE_SIZE_4MB, @@ -1544,7 +1532,7 @@ void cpuinfo_x86_decode_cache_descriptor( * "Shared 2nd-level TLB: 4 KB pages, 4-way set * associative, 512 entries" */ - *stlb2_4KB = (struct cpuinfo_tlb){ + tlb->stlb2_4KB = (struct cpuinfo_tlb) { .entries = 512, .associativity = 4, .pages = CPUINFO_PAGE_SIZE_4KB, diff --git a/src/x86/cache/init.c b/src/x86/cache/init.c index 34af90e7..04d95491 100644 --- a/src/x86/cache/init.c +++ b/src/x86/cache/init.c @@ -25,20 +25,9 @@ void cpuinfo_x86_detect_cache( enum cpuinfo_vendor vendor, const struct cpuinfo_x86_model_info* model_info, struct cpuinfo_x86_caches* cache, - struct cpuinfo_tlb* itlb_4KB, - struct cpuinfo_tlb* itlb_2MB, - struct cpuinfo_tlb* itlb_4MB, - struct cpuinfo_tlb* dtlb0_4KB, - struct cpuinfo_tlb* dtlb0_2MB, - struct cpuinfo_tlb* dtlb0_4MB, - struct cpuinfo_tlb* dtlb_4KB, - struct cpuinfo_tlb* dtlb_2MB, - struct cpuinfo_tlb* dtlb_4MB, - struct cpuinfo_tlb* dtlb_1GB, - struct cpuinfo_tlb* stlb2_4KB, - struct cpuinfo_tlb* stlb2_2MB, - struct cpuinfo_tlb* stlb2_1GB, - uint32_t* log2_package_cores_max) { + struct cpuinfo_x86_tlbs* tlb, + struct cpuinfo_x86_topology* topology) +{ if (max_base_index >= 2) { union cpuinfo_x86_cache_descriptors descriptors; descriptors.regs = cpuid(2); @@ -49,24 +38,7 @@ void cpuinfo_x86_detect_cache( const uint8_t descriptor = descriptors.as_bytes[i]; if (descriptor != 0) { cpuinfo_x86_decode_cache_descriptor( - descriptor, - vendor, - model_info, - cache, - itlb_4KB, - itlb_2MB, - itlb_4MB, - dtlb0_4KB, - dtlb0_2MB, - dtlb0_4MB, - dtlb_4KB, - dtlb_2MB, - dtlb_4MB, - dtlb_1GB, - stlb2_4KB, - stlb2_2MB, - stlb2_1GB, - &cache->prefetch_size); + descriptor, vendor, model_info, cache, tlb); } } if (--iterations != 0) { @@ -83,7 +55,7 @@ void cpuinfo_x86_detect_cache( leaf4 = cpuidex(4, input_ecx++); } while (cpuinfo_x86_decode_deterministic_cache_parameters(leaf4, cache, &package_cores_max)); if (package_cores_max != 0) { - *log2_package_cores_max = bit_length(package_cores_max); + topology->core_bits_length = bit_length(package_cores_max); } } } diff --git a/src/x86/init.c b/src/x86/init.c index adc5d361..c87c27a1 100644 --- a/src/x86/init.c +++ b/src/x86/init.c @@ -41,26 +41,8 @@ void cpuinfo_x86_init_processor(struct cpuinfo_x86_processor* processor) { const bool amd_topology_extensions = !!(leaf0x80000001.ecx & UINT32_C(0x00400000)); cpuinfo_x86_detect_cache( - max_base_index, - max_extended_index, - amd_topology_extensions, - vendor, - &model_info, - &processor->cache, - &processor->tlb.itlb_4KB, - &processor->tlb.itlb_2MB, - &processor->tlb.itlb_4MB, - &processor->tlb.dtlb0_4KB, - &processor->tlb.dtlb0_2MB, - &processor->tlb.dtlb0_4MB, - &processor->tlb.dtlb_4KB, - &processor->tlb.dtlb_2MB, - &processor->tlb.dtlb_4MB, - &processor->tlb.dtlb_1GB, - &processor->tlb.stlb2_4KB, - &processor->tlb.stlb2_2MB, - &processor->tlb.stlb2_1GB, - &processor->topology.core_bits_length); + max_base_index, max_extended_index, amd_topology_extensions, vendor, + &model_info, &processor->cache, &processor->tlb, &processor->topology); cpuinfo_x86_detect_topology(max_base_index, max_extended_index, leaf1, &processor->topology); From 41d511ff9e742f98b1991c76a2ac0bcdf1ccd031 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 12:07:50 +0200 Subject: [PATCH 02/62] Add support for `sme2` detection on `aarch64` --- include/cpuinfo.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 8bb1db4e..d6647167 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -1671,6 +1671,7 @@ struct cpuinfo_arm_isa { bool sve2; bool i8mm; bool sme; + bool sme2; uint32_t svelen; #endif bool rdm; @@ -2061,6 +2062,14 @@ static inline bool cpuinfo_has_arm_sme(void) { #endif } +static inline bool cpuinfo_has_arm_sme2(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2; +#else + return false; +#endif +} + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 /* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions * instead. */ From fd626d658e8546fa636868c51a0f24157368ecbe Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:43:25 +0200 Subject: [PATCH 03/62] Add support for `sme2`. --- src/arm/linux/aarch64-isa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index 3352db29..c035aff5 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -7,7 +7,7 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, const struct cpuinfo_arm_chipset chipset[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]) { @@ -147,6 +147,9 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME) { isa->sme = true; } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2) { + isa->sme2 = true; + } // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 // feature flag was added in Linux kernel before the BF16 feature flag, // so we check for either. From 29ad3be2a7d53a55ddfbc239f7cb6b91dd451030 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:44:23 +0200 Subject: [PATCH 04/62] Update api.h --- src/arm/linux/api.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index d60f2a25..9c001ced 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -138,6 +138,7 @@ struct cpuinfo_arm_linux_proc_cpuinfo_cache { #define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000) #define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000) #define CPUINFO_ARM_LINUX_FEATURE2_SME UINT32_C(0x00800000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME2 UINT64_C(0x0000002000000000) #endif #define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000) @@ -316,7 +317,7 @@ CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval( CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, const struct cpuinfo_arm_chipset chipset[restrict static 1], struct cpuinfo_arm_isa isa[restrict static 1]); From 973bff1b20ce13893c3d6e0a3d589c925150c576 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:46:24 +0200 Subject: [PATCH 05/62] Update hwcap.c --- src/arm/linux/hwcap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arm/linux/hwcap.c b/src/arm/linux/hwcap.c index e836548d..55241020 100644 --- a/src/arm/linux/hwcap.c +++ b/src/arm/linux/hwcap.c @@ -31,8 +31,8 @@ void cpuinfo_set_hwcap(uint32_t hwcap) { mock_hwcap = hwcap; } -static uint32_t mock_hwcap2 = 0; -void cpuinfo_set_hwcap2(uint32_t hwcap2) { +static uint64_t mock_hwcap2 = 0; +void cpuinfo_set_hwcap2(uint64_t hwcap2) { mock_hwcap2 = hwcap2; } #endif @@ -40,7 +40,7 @@ void cpuinfo_set_hwcap2(uint32_t hwcap2) { #if CPUINFO_ARCH_ARM typedef unsigned long (*getauxval_function_t)(unsigned long); -bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; From 3906efd10f29bf37e4a73bbca719641b8110a162 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:51:46 +0200 Subject: [PATCH 06/62] Update init.c --- src/arm/linux/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index 6e2024d1..c57cba55 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -246,8 +246,9 @@ void cpuinfo_arm_linux_init(void) { cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0); #endif + uint32_t isa_features = 0; + uint64_t isa_features2 = 0; #if CPUINFO_ARCH_ARM - uint32_t isa_features = 0, isa_features2 = 0; #ifdef __ANDROID__ /* * On Android before API 20, libc.so does not provide getauxval From 69561b001a437f0f6af68cebdd6f4994e8f656ee Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:53:50 +0200 Subject: [PATCH 07/62] Update init.c --- src/arm/linux/init.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/arm/linux/init.c b/src/arm/linux/init.c index c57cba55..1eab69d5 100644 --- a/src/arm/linux/init.c +++ b/src/arm/linux/init.c @@ -246,9 +246,9 @@ void cpuinfo_arm_linux_init(void) { cpuinfo_arm_linux_decode_chipset(proc_cpuinfo_hardware, proc_cpuinfo_revision, valid_processors, 0); #endif +#if CPUINFO_ARCH_ARM uint32_t isa_features = 0; uint64_t isa_features2 = 0; -#if CPUINFO_ARCH_ARM #ifdef __ANDROID__ /* * On Android before API 20, libc.so does not provide getauxval @@ -300,7 +300,8 @@ void cpuinfo_arm_linux_init(void) { &chipset, &cpuinfo_isa); #elif CPUINFO_ARCH_ARM64 - uint32_t isa_features = 0, isa_features2 = 0; + uint32_t isa_features = 0; + uint64_t isa_features2 = 0; /* getauxval is always available on ARM64 Android */ cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2); cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( From 3513f11df1c1264e80137f451383c04b78a425fc Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 14:55:20 +0200 Subject: [PATCH 08/62] Update hwcap.c --- src/arm/linux/hwcap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/arm/linux/hwcap.c b/src/arm/linux/hwcap.c index 55241020..5b537280 100644 --- a/src/arm/linux/hwcap.c +++ b/src/arm/linux/hwcap.c @@ -89,7 +89,7 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint *hwcap2 = mock_hwcap2; return true; #else - uint32_t hwcaps[2] = {0, 0}; + uint64_t hwcaps[2] = {0, 0}; bool result = false; int file = -1; @@ -113,7 +113,7 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint hwcaps[0] = (uint32_t)elf_auxv.a_un.a_val; break; case AT_HWCAP2: - hwcaps[1] = (uint32_t)elf_auxv.a_un.a_val; + hwcaps[1] = (uint64_t)elf_auxv.a_un.a_val; break; } } else { @@ -141,13 +141,13 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint } #endif /* __ANDROID__ */ #elif CPUINFO_ARCH_ARM64 -void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; #else *hwcap = (uint32_t)getauxval(AT_HWCAP); - *hwcap2 = (uint32_t)getauxval(AT_HWCAP2); + *hwcap2 = (uint64_t)getauxval(AT_HWCAP2); return; #endif } From 14a46393a3ca27bbbe560c8cc86e031347d8f088 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 15:00:26 +0200 Subject: [PATCH 09/62] Update api.h --- src/arm/linux/api.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index 9c001ced..c83497c3 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -174,7 +174,7 @@ struct cpuinfo_arm_linux_processor { struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache; #endif uint32_t features; - uint32_t features2; + uint64_t features2; /** * Main ID Register value. */ @@ -297,14 +297,14 @@ CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo( #if CPUINFO_ARCH_ARM CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, uint32_t architecture_version, uint32_t architecture_flags, @@ -313,7 +313,7 @@ CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( #elif CPUINFO_ARCH_ARM64 CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval( uint32_t hwcap[restrict static 1], - uint32_t hwcap2[restrict static 1]); + uint64_t hwcap2[restrict static 1]); CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( uint32_t features, From af3ddb43c1f2b940fc1ed373fb48005da6df321d Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 16:20:28 +0200 Subject: [PATCH 10/62] Tabs instead of spaces --- include/cpuinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index d6647167..5031e2d9 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -1671,7 +1671,7 @@ struct cpuinfo_arm_isa { bool sve2; bool i8mm; bool sme; - bool sme2; + bool sme2; uint32_t svelen; #endif bool rdm; From ed1a59199a2ce13053a94dc500838c4ecbdb9e20 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 16:24:19 +0200 Subject: [PATCH 11/62] Update api.h Add feature bits from [here](https://github.com/torvalds/linux/blob/abf2050f51fdca0fd146388f83cddd95a57a008d/arch/arm64/include/uapi/asm/hwcap.h#L99-L104). --- src/arm/linux/api.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/arm/linux/api.h b/src/arm/linux/api.h index c83497c3..14fed7ce 100644 --- a/src/arm/linux/api.h +++ b/src/arm/linux/api.h @@ -139,6 +139,11 @@ struct cpuinfo_arm_linux_proc_cpuinfo_cache { #define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000) #define CPUINFO_ARM_LINUX_FEATURE2_SME UINT32_C(0x00800000) #define CPUINFO_ARM_LINUX_FEATURE2_SME2 UINT64_C(0x0000002000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME2P1 UINT64_C(0x0000004000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32 UINT64_C(0x0000008000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32 UINT64_C(0x0000010000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16 UINT64_C(0x0000020000000000) +#define CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16 UINT64_C(0x0000040000000000) #endif #define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000) From 2179b069575475f6e417ee9ae56d5050d9abae1e Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 16:29:40 +0200 Subject: [PATCH 12/62] Update hwcap.c --- src/arm/linux/hwcap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm/linux/hwcap.c b/src/arm/linux/hwcap.c index 5b537280..7f7b4dfd 100644 --- a/src/arm/linux/hwcap.c +++ b/src/arm/linux/hwcap.c @@ -83,7 +83,7 @@ bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], u } #ifdef __ANDROID__ -bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) { +bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) { #if CPUINFO_MOCK *hwcap = mock_hwcap; *hwcap2 = mock_hwcap2; From 6c0517e2b99a21222788e5d6ce05b998596ea87f Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 16:32:44 +0200 Subject: [PATCH 13/62] Update aarch32-isa.c --- src/arm/linux/aarch32-isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index bd5020c7..cd22d1bd 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -24,7 +24,7 @@ void cpuinfo_set_wcid(uint32_t wcid) { void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( uint32_t features, - uint32_t features2, + uint64_t features2, uint32_t midr, uint32_t architecture_version, uint32_t architecture_flags, From 9d99b4af18ee17cb389e22c116f771405fd22fa2 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 24 Sep 2024 18:56:11 +0200 Subject: [PATCH 14/62] Update cpuinfo-mock.h --- include/cpuinfo-mock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/cpuinfo-mock.h b/include/cpuinfo-mock.h index 5e129aa6..7bb6d1ee 100644 --- a/include/cpuinfo-mock.h +++ b/include/cpuinfo-mock.h @@ -60,7 +60,7 @@ ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity); void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap); #endif #if CPUINFO_ARCH_ARM -void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2); +void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2); #endif #endif From e78569e0a2d2e3f6d7e36d86bf8ee6e41aa2df2e Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Wed, 25 Sep 2024 12:35:03 +0200 Subject: [PATCH 15/62] Update cpuinfo.h --- include/cpuinfo.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 5031e2d9..387611cc 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -1672,6 +1672,11 @@ struct cpuinfo_arm_isa { bool i8mm; bool sme; bool sme2; + bool sme2p1; + bool sme_i16i32; + bool sme_bi32i32; + bool sme_b16b16; + bool sme_f16f16; uint32_t svelen; #endif bool rdm; @@ -2070,6 +2075,46 @@ static inline bool cpuinfo_has_arm_sme2(void) { #endif } +static inline bool cpuinfo_has_arm_sme2p1(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme2p1; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_i16i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_i16i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_bi32i32(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_bi32i32; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_b16b16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_b16b16; +#else + return false; +#endif +} + +static inline bool cpuinfo_has_arm_sme_f16f16(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.sme_f16f16; +#else + return false; +#endif +} + #if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64 /* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions * instead. */ From 7da22d2fd6c9f93961f5f66106a6fd9793e7ec25 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Wed, 25 Sep 2024 12:42:21 +0200 Subject: [PATCH 16/62] Update cpuinfo.h From 5ed696a2894464b37e31d94899b57e1de57f7fc6 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Wed, 25 Sep 2024 12:45:07 +0200 Subject: [PATCH 17/62] Update aarch64-isa.c --- src/arm/linux/aarch64-isa.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index c035aff5..bc2186f6 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -150,6 +150,21 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2) { isa->sme2 = true; } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2P1) { + isa->sme2p1 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32) { + isa->sme_i16i32 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32) { + isa->sme_bi32i32 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16) { + isa->sme_b16b16 = true; + } + if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16) { + isa->sme_f16f16 = true; + } // SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16 // feature flag was added in Linux kernel before the BF16 feature flag, // so we check for either. From cefeefcd173e7a9099845ee2afcf1bc0d0239477 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 22 Oct 2024 12:07:53 +0200 Subject: [PATCH 18/62] Add `sme`/`sme2` detection to `mach/init.c`. I forgot to add this for `mach` when I did it for `linux` a while back (#262). I could not find `hw.optional.arm.FEAT_SME_*` features for all the `sme`-related bits in `cpuinfo_arm_isa`, but I've added the ones I could find. --- src/arm/mach/init.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index 9d83c05a..0bb31b89 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -399,6 +399,31 @@ void cpuinfo_arm_mach_init(void) { cpuinfo_isa.i8mm = true; } + const uint32_t has_feat_sme = get_sys_info_by_name("hw.optional.arm.FEAT_SME"); + if (has_feat_sme != 0) { + cpuinfo_isa.sme = true; + } + + const uint32_t has_feat_sme2 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2"); + if (has_feat_sme2 != 0) { + cpuinfo_isa.sme2 = true; + } + + const uint32_t has_feat_sme2p1 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2p1"); + if (has_feat_sme2p1 != 0) { + cpuinfo_isa.sme2p1 = true; + } + + const uint32_t has_feat_sme_b16b16 = get_sys_info_by_name("hw.optional.arm.FEAT_SME_B16B16"); + if (has_feat_sme_b16b16 != 0) { + cpuinfo_isa.sme_b16b16 = true; + } + + const uint32_t has_feat_sme_f16f16 = get_sys_info_by_name("hw.optional.arm.FEAT_SME_F16F16"); + if (has_feat_sme_f16f16 != 0) { + cpuinfo_isa.sme_f16f16 = true; + } + uint32_t num_clusters = 1; for (uint32_t i = 0; i < mach_topology.cores; i++) { cores[i] = (struct cpuinfo_core){ From 94c276e1692a02e71a77a594bfa4105b4bff7be1 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 22 Oct 2024 12:18:51 +0200 Subject: [PATCH 19/62] Fix formatting. --- src/arm/mach/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index 0bb31b89..c3fe7605 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -423,7 +423,7 @@ void cpuinfo_arm_mach_init(void) { if (has_feat_sme_f16f16 != 0) { cpuinfo_isa.sme_f16f16 = true; } - + uint32_t num_clusters = 1; for (uint32_t i = 0; i < mach_topology.cores; i++) { cores[i] = (struct cpuinfo_core){ From f2c1c9fb9d88abc5e41332328e0700f3be3c7a06 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 22 Oct 2024 12:29:59 +0200 Subject: [PATCH 20/62] Remove unrecognized features. --- src/arm/mach/init.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index c3fe7605..3fb62414 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -409,21 +409,6 @@ void cpuinfo_arm_mach_init(void) { cpuinfo_isa.sme2 = true; } - const uint32_t has_feat_sme2p1 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2p1"); - if (has_feat_sme2p1 != 0) { - cpuinfo_isa.sme2p1 = true; - } - - const uint32_t has_feat_sme_b16b16 = get_sys_info_by_name("hw.optional.arm.FEAT_SME_B16B16"); - if (has_feat_sme_b16b16 != 0) { - cpuinfo_isa.sme_b16b16 = true; - } - - const uint32_t has_feat_sme_f16f16 = get_sys_info_by_name("hw.optional.arm.FEAT_SME_F16F16"); - if (has_feat_sme_f16f16 != 0) { - cpuinfo_isa.sme_f16f16 = true; - } - uint32_t num_clusters = 1; for (uint32_t i = 0; i < mach_topology.cores; i++) { cores[i] = (struct cpuinfo_core){ From f3ecbbc05d962fa7dad02fecc20d254de1779a95 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Fri, 25 Oct 2024 23:13:26 -0700 Subject: [PATCH 21/62] Add device info for Astro 55R --- test/build.prop/astro_55r.log | 31 +++++++++++++++++++++++++ test/cpuinfo/astro-55r.log | 43 +++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 test/build.prop/astro_55r.log create mode 100644 test/cpuinfo/astro-55r.log diff --git a/test/build.prop/astro_55r.log b/test/build.prop/astro_55r.log new file mode 100644 index 00000000..358a780b --- /dev/null +++ b/test/build.prop/astro_55r.log @@ -0,0 +1,31 @@ +# begin build properties +# autogenerated by buildinfo.sh +ro.build.id=RP1A.201005.001 +ro.build.display.id=Maxwest_Astro_55R_V07_01052022 +ro.build.version.incremental=1634868762 +ro.build.version.sdk=30 +ro.build.version.codename=REL +ro.build.version.release=11 +ro.build.date=Wed Jan 5 10:40:24 CST 2022 +ro.build.date.utc=1641350424 +ro.build.type=user +ro.build.user=lxc +ro.build.host=Astro__55R +ro.build.tags=release-keys +ro.product.model=Astro 55R +ro.product.brand=Maxwest +ro.product.name=Astro_55R +ro.product.device=Astro_55R +ro.product.board=Maxwest +ro.product.cpu.abi=armeabi-v7a +ro.product.manufacturer=Maxwest +ro.product.locale.language= +ro.product.locale.region= +ro.wifi.channels= +ro.board.platform=ums312 +# ro.build.product is obsolete; use ro.product.device +ro.build.product=Astro__55R +# Do not try to parse ro.build.description or .fingerprint +ro.build.description=ums312_2h10_1239SWQ_T5513A_A1_MV1616-user 11 RP1A.201005.001 1634868762 release-keys +ro.build.fingerprint=Maxwest/Astro_55R/Astro_55R:11/RP1A.201005.001/1634868762:user/release-keys +# end build properties diff --git a/test/cpuinfo/astro-55r.log b/test/cpuinfo/astro-55r.log new file mode 100644 index 00000000..a6f0ed66 --- /dev/null +++ b/test/cpuinfo/astro-55r.log @@ -0,0 +1,43 @@ +processor : 0 +model name : ARMv8 Processor +BogoMIPS : 41.60 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd05 +CPU revision : 0 + +processor : 1 +model name : ARMv8 Processor +BogoMIPS : 41.60 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd05 +CPU revision : 0 + +processor : 2 +model name : ARMv8 Processor +BogoMIPS : 41.60 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd05 +CPU revision : 0 + +processor : 3 +model name : ARMv8 Processor +BogoMIPS : 62.71 +Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpv4 idiva idivt vfpd32 lpae evtstrm aes pmull sha1 sha2 crc32 +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x3 +CPU part : 0xd0a +CPU revision : 1 + +Hardware : Unisoc UMS312 +Revision : 0000 +Serial : c9fb2689fd35cfe34d8d2523c9d3c439e8250fb06aaaafb732fe57478326d6cd From 74ead1484c818103d74c1e84d91d01be2c4fcb40 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Sat, 26 Oct 2024 21:24:01 -0700 Subject: [PATCH 22/62] Recognize Unisoc UMS, disable neon dot for UMS312 --- src/arm/api.h | 1 + src/arm/linux/aarch32-isa.c | 2 + src/arm/linux/chipset.c | 76 ++++++++++++++++++++++++++++++ test/name/proc-cpuinfo-hardware.cc | 5 ++ 4 files changed, 84 insertions(+) diff --git a/src/arm/api.h b/src/arm/api.h index 9cfedf60..ac735e3e 100644 --- a/src/arm/api.h +++ b/src/arm/api.h @@ -64,6 +64,7 @@ enum cpuinfo_arm_chipset_series { cpuinfo_arm_chipset_series_telechips_tcc, cpuinfo_arm_chipset_series_texas_instruments_omap, cpuinfo_arm_chipset_series_unisoc_t, + cpuinfo_arm_chipset_series_unisoc_ums, cpuinfo_arm_chipset_series_wondermedia_wm, cpuinfo_arm_chipset_series_max, }; diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index cd22d1bd..29663a70 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -147,6 +147,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( "VDOT instructions disabled: cause occasional SIGILL on Spreadtrum SC9863A"); } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_t && chipset->model == 310) { cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310"); + } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_ums && chipset->model == 312) { + cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc UMS312"); } else { switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case UINT32_C(0x4100D0B0): /* Cortex-A76 */ diff --git a/src/arm/linux/chipset.c b/src/arm/linux/chipset.c index 1f93351d..c4977c38 100644 --- a/src/arm/linux/chipset.c +++ b/src/arm/linux/chipset.c @@ -85,6 +85,7 @@ static enum cpuinfo_arm_chipset_vendor chipset_series_vendor[cpuinfo_arm_chipset [cpuinfo_arm_chipset_series_telechips_tcc] = cpuinfo_arm_chipset_vendor_telechips, [cpuinfo_arm_chipset_series_texas_instruments_omap] = cpuinfo_arm_chipset_vendor_texas_instruments, [cpuinfo_arm_chipset_series_unisoc_t] = cpuinfo_arm_chipset_vendor_unisoc, + [cpuinfo_arm_chipset_series_unisoc_ums] = cpuinfo_arm_chipset_vendor_unisoc, [cpuinfo_arm_chipset_series_wondermedia_wm] = cpuinfo_arm_chipset_vendor_wondermedia, }; @@ -959,6 +960,70 @@ static bool match_t(const char* start, const char* end, struct cpuinfo_arm_chips return true; } +/** + * Tries to match, case-sentitively, /Unisoc UMS\d{3,4}/ signature for Unisoc UMS + * chipset. If match successful, extracts model information into \p chipset + * argument. + * + * @param start - start of the platform identifier (/proc/cpuinfo Hardware + * string, ro.product.board, ro.board.platform, or ro.chipname) to match. + * @param end - end of the platform identifier (/proc/cpuinfo Hardware string, + * ro.product.board, ro.board.platform, or ro.chipname) to match. + * @param[out] chipset - location where chipset information will be stored upon + * a successful match. + * + * @returns true if signature matched, false otherwise. + */ +static bool match_ums(const char* start, const char* end, struct cpuinfo_arm_chipset chipset[restrict static 1]) { + /* Expect 13-14 symbols: "Unisoc UMS" (10 symbols) + 3-4-digit model number + */ + const size_t length = end - start; + switch (length) { + case 13: + case 14: + break; + default: + return false; + } + + /* Check that string starts with "Unisoc UMS". The first four characters + * are loaded as 32-bit little endian word */ + const uint32_t expected_unis = load_u32le(start); + if (expected_unis != UINT32_C(0x73696E55) /* "sinU" = reverse("Unis") */) { + return false; + } + + /* The next four characters are loaded as 32-bit little endian word */ + const uint32_t expected_oc_u = load_u32le(start + 4); + if (expected_oc_u != UINT32_C(0x5520636F) /* "U co" = reverse("oc U") */) { + return false; + } + + /* The next four characters are loaded as 16-bit little endian word */ + const uint16_t expected_ms = load_u16le(start + 8); + if (expected_ms != UINT16_C(0x534D) /* "SM" = reverse("MS") */) { + return false; + } + + /* Validate and parse 3-4 digit model number */ + uint32_t model = 0; + for (uint32_t i = 10; i < length; i++) { + const uint32_t digit = (uint32_t)(uint8_t)start[i] - '0'; + if (digit >= 10) { + /* Not really a digit */ + return false; + } + model = model * 10 + digit; + } + + *chipset = (struct cpuinfo_arm_chipset){ + .vendor = cpuinfo_arm_chipset_vendor_unisoc, + .series = cpuinfo_arm_chipset_series_unisoc_ums, + .model = model, + }; + return true; +} + /** * Tries to match /lc\d{4}[a-z]?$/ signature for Leadcore LC chipsets. * If match successful, extracts model information into \p chipset argument. @@ -2508,6 +2573,16 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha return chipset; } + /* Check Unisoc UMS signature */ + if (match_ums(hardware, hardware_end, &chipset)) { + cpuinfo_log_debug( + "matched Unisoc UMS signature in /proc/cpuinfo Hardware string \"%.*s\"", + (int)hardware_length, + hardware); + + return chipset; + } + #if CPUINFO_ARCH_ARM /* Check Marvell PXA signature */ if (match_pxa(hardware, hardware_end, &chipset)) { @@ -3726,6 +3801,7 @@ static const char* chipset_series_string[cpuinfo_arm_chipset_series_max] = { [cpuinfo_arm_chipset_series_telechips_tcc] = "TCC", [cpuinfo_arm_chipset_series_texas_instruments_omap] = "OMAP", [cpuinfo_arm_chipset_series_unisoc_t] = "T", + [cpuinfo_arm_chipset_series_unisoc_ums] = "UMS", [cpuinfo_arm_chipset_series_wondermedia_wm] = "WM", }; diff --git a/test/name/proc-cpuinfo-hardware.cc b/test/name/proc-cpuinfo-hardware.cc index 8a3418d8..dec14cae 100644 --- a/test/name/proc-cpuinfo-hardware.cc +++ b/test/name/proc-cpuinfo-hardware.cc @@ -458,6 +458,11 @@ TEST(PROC_CPUINFO_HARDWARE, telechips) { EXPECT_EQ("Telechips TCC893X", parse_proc_cpuinfo_hardware("tcc893x")); } +TEST(PROC_CPUINFO_HARDWARE, unisoc) { + EXPECT_EQ("Unisoc T301", parse_proc_cpuinfo_hardware("Unisoc T301", 4, 1800000)); + EXPECT_EQ("Unisoc UMS312", parse_proc_cpuinfo_hardware("Unisoc UMS312", 4, 1800000)); +} + #if CPUINFO_ARCH_ARM TEST(PROC_CPUINFO_HARDWARE, texas_instruments_omap) { EXPECT_EQ("Texas Instruments OMAP4430", parse_proc_cpuinfo_hardware("OMAP4430")); From f0ed5601cf9e255073cb99dc5a413e72a8c09c2a Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Wed, 13 Nov 2024 19:00:30 -0800 Subject: [PATCH 23/62] Detect AMD Zen 5 microarchitecture --- include/cpuinfo.h | 2 ++ src/x86/uarch.c | 2 ++ tools/cpu-info.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 387611cc..9ed5d924 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -419,6 +419,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_zen3 = 0x0020010B, /** AMD Zen 4 microarchitecture. */ cpuinfo_uarch_zen4 = 0x0020010C, + /** AMD Zen 5 microarchitecture. */ + cpuinfo_uarch_zen5 = 0x0020010D, /** NSC Geode and AMD Geode GX and LX. */ cpuinfo_uarch_geode = 0x00200200, diff --git a/src/x86/uarch.c b/src/x86/uarch.c index b291ebcf..a21eabb0 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -387,6 +387,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( return cpuinfo_uarch_zen4; } break; + case 0x1a: + return cpuinfo_uarch_zen5; } break; case cpuinfo_vendor_hygon: diff --git a/tools/cpu-info.c b/tools/cpu-info.c index b0fec240..b896b270 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -132,6 +132,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Zen 3"; case cpuinfo_uarch_zen4: return "Zen 4"; + case cpuinfo_uarch_zen5: + return "Zen 5"; case cpuinfo_uarch_geode: return "Geode"; case cpuinfo_uarch_bobcat: From d24ec8aaabe0e087a4fff171e44b54a604254efe Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Fri, 6 Dec 2024 14:39:52 +0100 Subject: [PATCH 24/62] Set the correct L2 size for Ampere Altra (`aarch64`). --- src/arm/cache.c | 4 +++- src/arm/midr.h | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/arm/cache.c b/src/arm/cache.c index dd199193..9a0343b3 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1341,7 +1341,9 @@ void cpuinfo_arm_decode_cache( * information, please refer to the technical manuals * linked above */ - const uint32_t min_l2_size_KB = uarch == cpuinfo_uarch_neoverse_v2 ? 1024 : 256; + const uint32_t min_l2_size_KB = uarch == cpuinfo_uarch_neoverse_v2 || midr_is_ampere_altra(midr) + ? 1024 + : 256; const uint32_t min_l3_size_KB = 0; *l1i = (struct cpuinfo_cache){ diff --git a/src/arm/midr.h b/src/arm/midr.h index 89ebbb58..5530d5a9 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -34,6 +34,7 @@ #define CPUINFO_ARM_MIDR_KRYO_SILVER_820 UINT32_C(0x510F2110) #define CPUINFO_ARM_MIDR_EXYNOS_M1_M2 UINT32_C(0x530F0010) #define CPUINFO_ARM_MIDR_DENVER2 UINT32_C(0x4E0F0030) +#define CPUINFO_ARM_MIDR_AMPERE_ALTRA UINT32_C(0x413fd0c1) inline static uint32_t midr_set_implementer(uint32_t midr, uint32_t implementer) { return (midr & ~CPUINFO_ARM_MIDR_IMPLEMENTER_MASK) | @@ -167,6 +168,11 @@ inline static bool midr_is_kryo_gold(uint32_t midr) { return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_KRYO_GOLD & uarch_mask); } +inline static bool midr_is_ampere_altra(uint32_t midr) { + const uint32_t uarch_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; + return (midr & uarch_mask) == (CPUINFO_ARM_MIDR_AMPERE_ALTRA & uarch_mask); +} + inline static uint32_t midr_score_core(uint32_t midr) { const uint32_t core_mask = CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK; switch (midr & core_mask) { From d4e4eb9c7e592bb73a631a448d66fb3d81da5ea8 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Fri, 6 Dec 2024 15:47:59 +0100 Subject: [PATCH 25/62] Fix formatting --- src/arm/cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/arm/cache.c b/src/arm/cache.c index 9a0343b3..6551d256 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1341,7 +1341,8 @@ void cpuinfo_arm_decode_cache( * information, please refer to the technical manuals * linked above */ - const uint32_t min_l2_size_KB = uarch == cpuinfo_uarch_neoverse_v2 || midr_is_ampere_altra(midr) + const uint32_t min_l2_size_KB = + uarch == cpuinfo_uarch_neoverse_v2 || midr_is_ampere_altra(midr) ? 1024 : 256; const uint32_t min_l3_size_KB = 0; From f608a19efb5540b65c7057d2e0a7af00fb3640a9 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Fri, 6 Dec 2024 16:20:17 +0100 Subject: [PATCH 26/62] Fix formatting. --- src/arm/cache.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/arm/cache.c b/src/arm/cache.c index 6551d256..97740c43 100644 --- a/src/arm/cache.c +++ b/src/arm/cache.c @@ -1342,9 +1342,7 @@ void cpuinfo_arm_decode_cache( * linked above */ const uint32_t min_l2_size_KB = - uarch == cpuinfo_uarch_neoverse_v2 || midr_is_ampere_altra(midr) - ? 1024 - : 256; + (uarch == cpuinfo_uarch_neoverse_v2 || midr_is_ampere_altra(midr)) ? 1024 : 256; const uint32_t min_l3_size_KB = 0; *l1i = (struct cpuinfo_cache){ From 5831a6a2215593b262a2ef53bb02ac6c1c2af898 Mon Sep 17 00:00:00 2001 From: "richard.winterton" Date: Mon, 9 Dec 2024 18:01:11 -0700 Subject: [PATCH 27/62] Added changes to cpuinfo.h, isa.c and isa-info.c detect AVX10.1 ISA --- include/cpuinfo.h | 9 +++++++++ src/x86/isa.c | 5 +++++ tools/isa-info.c | 4 ++++ 3 files changed, 18 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 9ed5d924..6eb4b8c3 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -820,6 +820,7 @@ struct cpuinfo_x86_isa { bool avx512vp2intersect; bool avx512_4vnniw; bool avx512_4fmaps; + bool avx10_1; bool amx_bf16; bool amx_tile; bool amx_int8; @@ -1435,6 +1436,14 @@ static inline bool cpuinfo_has_x86_avx_ne_convert(void) { #endif } +static inline bool cpuinfo_has_x86_avx10_1(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx10_1; +#else + return false; +#endif +} + static inline bool cpuinfo_has_x86_hle(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.hle; diff --git a/src/x86/isa.c b/src/x86/isa.c index bfd5e776..10da3dc3 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -429,6 +429,11 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); + /* + * AVX 10.1 instructions: + */ + isa.avx10_1 = (structured_feature_info1.edx & UINT32_C(1 << 19)); + /* * AVX512PF instructions: * - Intel: ebx[bit 26] in structured feature info (ecx = 0). diff --git a/tools/isa-info.c b/tools/isa-info.c index 2c40a5ec..afb82a0e 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -70,6 +70,10 @@ int main(int argc, char** argv) { printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no"); printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no"); printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no"); + + //avx10.1 (in future will add other avx10 version) + printf("\tAVX10_1: %s\n", cpuinfo_has_x86_avx10_1() ? "yes" : "no"); + printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no"); printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no"); printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no"); From c29571000560d259580be13de16da95feb112c3b Mon Sep 17 00:00:00 2001 From: "richard.winterton" Date: Wed, 11 Dec 2024 18:19:28 -0700 Subject: [PATCH 28/62] added check for avx512_regs and hex representation change per request --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 10da3dc3..0a10be74 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -432,7 +432,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * AVX 10.1 instructions: */ - isa.avx10_1 = (structured_feature_info1.edx & UINT32_C(1 << 19)); + isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); /* * AVX512PF instructions: From c413b7875936d96fbaf643745ecabe1ec74027b3 Mon Sep 17 00:00:00 2001 From: rrwinterton Date: Tue, 7 Jan 2025 16:06:55 -0700 Subject: [PATCH 29/62] change spaces to tab and fix int to bool for RISC. --- src/x86/isa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 0a10be74..ee65f83e 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -429,10 +429,10 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); - /* + /* * AVX 10.1 instructions: */ - isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); + isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); /* * AVX512PF instructions: From 510deeb10bc9184475ce79b3774db2603a4a2381 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:39:28 -0800 Subject: [PATCH 30/62] Update tools/isa-info.c --- tools/isa-info.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/isa-info.c b/tools/isa-info.c index afb82a0e..96bcdd7a 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -70,10 +70,7 @@ int main(int argc, char** argv) { printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no"); printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no"); printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no"); - - //avx10.1 (in future will add other avx10 version) printf("\tAVX10_1: %s\n", cpuinfo_has_x86_avx10_1() ? "yes" : "no"); - printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no"); printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no"); printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no"); From 0fb2fd6acb69b2f3f96ba114cfbf693e935ee361 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Wed, 8 Jan 2025 12:45:17 -0800 Subject: [PATCH 31/62] Update src/x86/isa.c --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index ee65f83e..47a6afa3 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -433,7 +433,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( * AVX 10.1 instructions: */ isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); - + /* * AVX512PF instructions: * - Intel: ebx[bit 26] in structured feature info (ecx = 0). From 551cd201d32608347edd0a4593b39d1bb9743912 Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Thu, 30 Jan 2025 22:58:07 -0700 Subject: [PATCH 32/62] Update cpuinfo to support AVX10.2 ISA detection --- include/cpuinfo.h | 9 +++++++++ src/x86/isa.c | 7 +++++++ tools/cpuid-dump.c | 9 +++++++++ tools/isa-info.c | 1 + 4 files changed, 26 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 6eb4b8c3..ea3c1d07 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -821,6 +821,7 @@ struct cpuinfo_x86_isa { bool avx512_4vnniw; bool avx512_4fmaps; bool avx10_1; + bool avx10_2; bool amx_bf16; bool amx_tile; bool amx_int8; @@ -1444,6 +1445,14 @@ static inline bool cpuinfo_has_x86_avx10_1(void) { #endif } +static inline bool cpuinfo_has_x86_avx10_2(void) { +#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 + return cpuinfo_isa.avx10_2; +#else + return false; +#endif +} + static inline bool cpuinfo_has_x86_hle(void) { #if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64 return cpuinfo_isa.hle; diff --git a/src/x86/isa.c b/src/x86/isa.c index 47a6afa3..c8b1c997 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -46,6 +46,8 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( (max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs){0, 0, 0, 0}; const struct cpuid_regs structured_feature_info1 = (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs){0, 0, 0, 0}; + const struct cpuid_regs structured_feature_info2 = + (max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0}; const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); const struct cpuid_regs processor_capacity_info = (max_extended_index >= processor_capacity_info_index) @@ -434,6 +436,11 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( */ isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); + /* + * AVX 10.2 instructions: + */ + isa.avx10_2 = !!((structured_feature_info2.ebx & UINT32_C(0x0000007F)) >= 2) && isa.avx10_1; + /* * AVX512PF instructions: * - Intel: ebx[bit 26] in structured feature info (ecx = 0). diff --git a/tools/cpuid-dump.c b/tools/cpuid-dump.c index 87c403d5..1a65510f 100644 --- a/tools/cpuid-dump.c +++ b/tools/cpuid-dump.c @@ -123,6 +123,15 @@ int main(int argc, char** argv) { print_cpuidex(regs, eax, ecx); } break; + case UINT32_C(0x00000024): + for (uint32_t ecx = 0; ecx <= max_socid_index; ecx++) { + const struct cpuid_regs regs = cpuidex(eax, ecx); + if (ecx == 0) { + max_socid_index = regs.eax; + } + print_cpuidex(regs, eax, ecx); + } + break; default: print_cpuid(cpuidex(eax, 0), eax); break; diff --git a/tools/isa-info.c b/tools/isa-info.c index 96bcdd7a..21f94a5f 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -71,6 +71,7 @@ int main(int argc, char** argv) { printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no"); printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no"); printf("\tAVX10_1: %s\n", cpuinfo_has_x86_avx10_1() ? "yes" : "no"); + printf("\tAVX10_2: %s\n", cpuinfo_has_x86_avx10_2() ? "yes" : "no"); printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no"); printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no"); printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no"); From 27bd612af1e841698a2cbd6a199ab8abf84af0bb Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Thu, 30 Jan 2025 23:17:31 -0700 Subject: [PATCH 33/62] Update isa.c This is the version for the AVX 10 in this case we are testing for AVX 10.2 but it covers the whole 8 bits. --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index c8b1c997..36383f52 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -439,7 +439,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * AVX 10.2 instructions: */ - isa.avx10_2 = !!((structured_feature_info2.ebx & UINT32_C(0x0000007F)) >= 2) && isa.avx10_1; + isa.avx10_2 = !!((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; /* * AVX512PF instructions: From 3de9f8abd36f02cda08d8321811821c22d1e2c9d Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Thu, 30 Jan 2025 23:27:30 -0700 Subject: [PATCH 34/62] Update isa.c added comments to AVX 10 detection for both AVX10.1 and AVX10.2 --- src/x86/isa.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 36383f52..37c62172 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -432,12 +432,14 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( isa.avx512f = avx512_regs && !!(structured_feature_info0.ebx & UINT32_C(0x00010000)); /* - * AVX 10.1 instructions: + * AVX 10.1 instructions: avx 10 isa supported. + * - Intel: edx[bit 19] in structured feature info (ecx = 1). */ isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); /* - * AVX 10.2 instructions: + * AVX 10.2 instructions: avx 10 version information. + * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). */ isa.avx10_2 = !!((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; From 64f22b3f45aeba4477f24289abb78215520430f6 Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Fri, 31 Jan 2025 09:27:04 -0700 Subject: [PATCH 35/62] Update isa.c per Frank's comment of unneeded !! for comparing version number >= 2 --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 37c62172..d56982fe 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -441,7 +441,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( * AVX 10.2 instructions: avx 10 version information. * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). */ - isa.avx10_2 = !!((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; + isa.avx10_2 = ((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; /* * AVX512PF instructions: From 293cfc75a9783d3b289a9535a59ee7035e1cadc5 Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Mon, 3 Feb 2025 15:04:58 -0700 Subject: [PATCH 36/62] Update isa.c fixed white spaces changing incorrect tab and space for alignment. --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index d56982fe..30be4538 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -46,7 +46,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( (max_base_index >= 7) ? cpuidex(7, 0) : (struct cpuid_regs){0, 0, 0, 0}; const struct cpuid_regs structured_feature_info1 = (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs){0, 0, 0, 0}; - const struct cpuid_regs structured_feature_info2 = + const struct cpuid_regs structured_feature_info2 = (max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0}; const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); From 6e37a3ed143519c2e2943f65a18823946e59ca0a Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Mon, 3 Feb 2025 15:10:22 -0700 Subject: [PATCH 37/62] Update isa.c Update spaces to tabs to fix alignment issues. --- src/x86/isa.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 30be4538..7f466b78 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -433,14 +433,14 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * AVX 10.1 instructions: avx 10 isa supported. - * - Intel: edx[bit 19] in structured feature info (ecx = 1). + * - Intel: edx[bit 19] in structured feature info (ecx = 1). */ isa.avx10_1 = avx512_regs && !!(structured_feature_info1.edx & UINT32_C(0x00080000)); /* * AVX 10.2 instructions: avx 10 version information. - * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). - */ + * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). + */ isa.avx10_2 = ((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; /* From c9d236e87298b3366d19601b18be1928949d4177 Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Mon, 3 Feb 2025 15:37:06 -0700 Subject: [PATCH 38/62] Update isa.c fix the spaces to tab at line 50 to fix indenting issue. --- src/x86/isa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86/isa.c b/src/x86/isa.c index 7f466b78..377583b3 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -47,7 +47,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( const struct cpuid_regs structured_feature_info1 = (max_base_index >= 7) ? cpuidex(7, 1) : (struct cpuid_regs){0, 0, 0, 0}; const struct cpuid_regs structured_feature_info2 = - (max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0}; + (max_base_index >= 7) ? cpuidex(0x24, 0) : (struct cpuid_regs){0, 0, 0, 0}; const uint32_t processor_capacity_info_index = UINT32_C(0x80000008); const struct cpuid_regs processor_capacity_info = (max_extended_index >= processor_capacity_info_index) From cc3f418fa296ecfe7d7fbf3541e3f8bcaa717c5b Mon Sep 17 00:00:00 2001 From: Christopher Degawa Date: Tue, 11 Feb 2025 16:41:28 -0600 Subject: [PATCH 39/62] pkg-config: use PROJECT_NAME instead of CMAKE_PROJECT_NAME [`PROJECT_NAME`][1] refers to the most recent `project()` call, while [`CMAKE_PROJECT_NAME`][2] refers to the top-most `project()` call. In the cases where cpuinfo is installed as a standalone project, this is perfectly fine and works as intended where the installed pkg-config file contains libcpuinfo as the name. However, if cpuinfo is used as a subproject, such as when using FetchContent, the name of the calling project would be used instead, leading to something like libOuterProject rather than libcpuinfo. [1]: https://cmake.org/cmake/help/latest/variable/PROJECT_NAME.html#variable:PROJECT_NAME [2]: https://cmake.org/cmake/help/latest/variable/CMAKE_PROJECT_NAME.html#variable:CMAKE_PROJECT_NAME Signed-off-by: Christopher Degawa --- libcpuinfo.pc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcpuinfo.pc.in b/libcpuinfo.pc.in index 3027da34..f9a9db62 100644 --- a/libcpuinfo.pc.in +++ b/libcpuinfo.pc.in @@ -3,7 +3,7 @@ exec_prefix=@CMAKE_INSTALL_PREFIX@ libdir=@libdir_for_pc_file@ includedir=@includedir_for_pc_file@ -Name: lib@CMAKE_PROJECT_NAME@ +Name: lib@PROJECT_NAME@ Description: Library to detect essential performance optimization information about host CPU. Version: URL: @PROJECT_HOMEPAGE_URL@ From e51ddbfb9c72ebdc74056310fe343e030a040221 Mon Sep 17 00:00:00 2001 From: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com> Date: Fri, 14 Mar 2025 13:57:31 +0100 Subject: [PATCH 40/62] - refactor for auto detection --- CMakeLists.txt | 3 +- include/cpuinfo.h | 2 + src/arm/api.h | 28 +-- src/arm/uarch.c | 7 +- src/arm/windows/init-by-logical-sys-info.c | 13 +- src/arm/windows/init.c | 194 +++++++++++---------- src/arm/windows/windows-arm-init.h | 18 -- 7 files changed, 135 insertions(+), 130 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd9f77f8..aedc9831 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,7 +187,8 @@ IF(CPUINFO_SUPPORTED_PLATFORM) ELSEIF(CMAKE_SYSTEM_NAME MATCHES "^Windows" AND CPUINFO_TARGET_PROCESSOR MATCHES "^(ARM64|arm64)$") LIST(APPEND CPUINFO_SRCS src/arm/windows/init-by-logical-sys-info.c - src/arm/windows/init.c) + src/arm/windows/init.c + src/arm/uarch.c) ELSEIF(CPUINFO_TARGET_PROCESSOR MATCHES "^(armv[5-8].*|aarch64|arm64.*)$" OR IOS_ARCH MATCHES "^(armv7.*|arm64.*)$") LIST(APPEND CPUINFO_SRCS src/arm/uarch.c diff --git a/include/cpuinfo.h b/include/cpuinfo.h index ea3c1d07..5f93819e 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -522,6 +522,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_falkor = 0x00400103, /** Qualcomm Saphira. */ cpuinfo_uarch_saphira = 0x00400104, + /** Qualcomm Oryon. */ + cpuinfo_uarch_oryon = 0x00400105, /** Nvidia Denver. */ cpuinfo_uarch_denver = 0x00500100, diff --git a/src/arm/api.h b/src/arm/api.h index ac735e3e..b1be6392 100644 --- a/src/arm/api.h +++ b/src/arm/api.h @@ -1,5 +1,11 @@ #pragma once +#ifdef _MSC_VER + #define RESTRICT_STATIC /* nothing for MSVC */ +#else + #define RESTRICT_STATIC restrict static +#endif + #include #include @@ -82,11 +88,11 @@ struct cpuinfo_arm_chipset { #ifndef __cplusplus CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( - const struct cpuinfo_arm_chipset chipset[restrict static 1], - char name[restrict static CPUINFO_ARM_CHIPSET_NAME_MAX]); + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( - struct cpuinfo_arm_chipset chipset[restrict static 1], + struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], uint32_t cores, uint32_t max_cpu_freq_max); @@ -95,23 +101,23 @@ CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]); + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, - const struct cpuinfo_arm_chipset chipset[restrict static 1], + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], uint32_t cluster_id, uint32_t arch_version, - struct cpuinfo_cache l1i[restrict static 1], - struct cpuinfo_cache l1d[restrict static 1], - struct cpuinfo_cache l2[restrict static 1], - struct cpuinfo_cache l3[restrict static 1]); + struct cpuinfo_cache l1i[RESTRICT_STATIC 1], + struct cpuinfo_cache l1d[RESTRICT_STATIC 1], + struct cpuinfo_cache l2[RESTRICT_STATIC 1], + struct cpuinfo_cache l3[RESTRICT_STATIC 1]); CPUINFO_INTERNAL uint32_t -cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[restrict static 1]); +cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); #else /* defined(__cplusplus) */ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( enum cpuinfo_uarch uarch, diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 68531e4d..9679f500 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -9,8 +9,8 @@ void cpuinfo_arm_decode_vendor_uarch( #if CPUINFO_ARCH_ARM bool has_vfpv4, #endif /* CPUINFO_ARCH_ARM */ - enum cpuinfo_vendor vendor[restrict static 1], - enum cpuinfo_uarch uarch[restrict static 1]) { + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]) { switch (midr_get_implementer(midr)) { case 'A': *vendor = cpuinfo_vendor_arm; @@ -332,6 +332,9 @@ void cpuinfo_arm_decode_vendor_uarch( *uarch = cpuinfo_uarch_cortex_a55; break; #if CPUINFO_ARCH_ARM64 + case 0x001: + *uarch = cpuinfo_uarch_oryon; + break; case 0xC00: *uarch = cpuinfo_uarch_falkor; break; diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c index 6ce6efe8..ecb238e1 100644 --- a/src/arm/windows/init-by-logical-sys-info.c +++ b/src/arm/windows/init-by-logical-sys-info.c @@ -750,11 +750,14 @@ void store_core_info_per_processor( if (cores) { processors[processor_global_index].core = cores + core_id; cores[core_id].core_id = core_id; - get_core_uarch_for_efficiency( - chip_info->chip_name, - core_info->Processor.EfficiencyClass, - &(cores[core_id].uarch), - &(cores[core_id].frequency)); + + if (chip_info->uarchs == NULL) { + cpuinfo_log_error("uarch is NULL for core %d", core_id); + return; + } + + cores[core_id].uarch = chip_info->uarchs[0].uarch; + cores[core_id].frequency = chip_info->uarchs[0].frequency; /* We don't have cluster information, so we handle it as * fixed 1 to (cluster / cores). diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c index de2f6ccd..84098433 100644 --- a/src/arm/windows/init.c +++ b/src/arm/windows/init.c @@ -7,6 +7,9 @@ #include #include +#include +#include + #include "windows-arm-init.h" struct cpuinfo_arm_isa cpuinfo_isa; @@ -14,62 +17,7 @@ struct cpuinfo_arm_isa cpuinfo_isa; static void set_cpuinfo_isa_fields(void); static struct woa_chip_info* get_system_info_from_registry(void); -static struct woa_chip_info woa_chip_unknown = { - L"Unknown", - woa_chip_name_unknown, - {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; - -/* Please add new SoC/chip info here! */ -static struct woa_chip_info woa_chips[woa_chip_name_last] = { - /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */ - [woa_chip_name_microsoft_sq_1] = - {L"Microsoft SQ1", - woa_chip_name_microsoft_sq_1, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a55, - 1800000000, - }, - { - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a76, - 3000000000, - }}}, - /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */ - [woa_chip_name_microsoft_sq_2] = - {L"Microsoft SQ2", - woa_chip_name_microsoft_sq_2, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a55, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_a76, 3150000000}}}, - /* Snapdragon (TM) 8cx Gen 3 @ 3.0 GHz */ - [woa_chip_name_microsoft_sq_3] = - {L"Snapdragon (TM) 8cx Gen 3", - woa_chip_name_microsoft_sq_3, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a78, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}}, - /* Microsoft Windows Dev Kit 2023 */ - [woa_chip_name_microsoft_sq_3_devkit] = - {L"Snapdragon Compute Platform", - woa_chip_name_microsoft_sq_3_devkit, - {{ - cpuinfo_vendor_arm, - cpuinfo_uarch_cortex_a78, - 2420000000, - }, - {cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}}, - /* Ampere Altra */ - [woa_chip_name_ampere_altra] = { - L"Ampere(R) Altra(R) Processor", - woa_chip_name_ampere_altra, - {{cpuinfo_vendor_arm, cpuinfo_uarch_neoverse_n1, 3000000000}}}}; +static struct woa_chip_info woa_chip_unknown = {L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { struct woa_chip_info* chip_info = NULL; @@ -87,23 +35,6 @@ BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PV return true; } -bool get_core_uarch_for_efficiency( - enum woa_chip_name chip, - BYTE EfficiencyClass, - enum cpuinfo_uarch* uarch, - uint64_t* frequency) { - /* For currently supported WoA chips, the Efficiency class selects - * the pre-defined little and big core. - * Any further supported SoC's logic should be implemented here. - */ - if (uarch && frequency && chip < woa_chip_name_last && EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) { - *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch; - *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency; - return true; - } - return false; -} - /* Static helper functions */ static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { @@ -149,40 +80,117 @@ static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { return text_buffer; } +static uint64_t read_registry_qword(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = sizeof(uint64_t); + const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ + uint64_t qword_value = 0; + LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, &qword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { + cpuinfo_log_error("Registry QWORD read error"); + return 0; + } + return qword_value; +} + +static uint64_t read_registry_dword(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = sizeof(DWORD); + DWORD dword_value = 0; + LSTATUS result = RegGetValueW( + HKEY_LOCAL_MACHINE, + subkey, + value, + RRF_RT_REG_DWORD, + &key_type, + &dword_value, + &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { + cpuinfo_log_error("Registry DWORD read error"); + return 0; + } + return (uint64_t)dword_value; +} + +static wchar_t* wcsndup(const wchar_t* src, size_t n) { + size_t len = wcsnlen(src, n); + wchar_t* dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (len + 1) * sizeof(wchar_t)); + if (dup) { + wcsncpy_s(dup, len + 1, src, len); + dup[len] = L'\0'; + } + return dup; +} + +static struct core_info_by_chip_name get_core_info_from_midr(uint32_t midr, uint64_t frequency) { + struct core_info_by_chip_name info; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; + +#if CPUINFO_ARCH_ARM + bool has_vfpv4 = false; + cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); +#else + cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); +#endif + + info.vendor = vendor; + info.uarch = uarch; + info.frequency = frequency; + return info; +} + static struct woa_chip_info* get_system_info_from_registry(void) { wchar_t* text_buffer = NULL; LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; LPCWSTR chip_name_value = L"ProcessorNameString"; + LPCWSTR chip_midr_value = L"CP 4000"; + LPCWSTR chip_mhz_value = L"~MHz"; struct woa_chip_info* chip_info = NULL; - HANDLE heap = GetProcessHeap(); - /* Read processor model name from registry and find in the hard-coded * list. */ text_buffer = read_registry(cpu0_subkey, chip_name_value); if (text_buffer == NULL) { - cpuinfo_log_error("Registry read error"); + cpuinfo_log_error("Registry read error for processor name"); return NULL; } - for (uint32_t i = 0; i < (uint32_t)woa_chip_name_last; i++) { - size_t compare_length = wcsnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX); - int compare_result = wcsncmp(text_buffer, woa_chips[i].chip_name_string, compare_length); - if (compare_result == 0) { - chip_info = woa_chips + i; - break; - } + + /* + * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register + * Regedit for MIDR : HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 + */ + uint64_t midr_qword = (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); + if (midr_qword == 0) { + cpuinfo_log_error("Registry read error for MIDR value"); + return NULL; } - if (chip_info == NULL) { - /* No match was found, so print a warning and assign the unknown - * case. */ - cpuinfo_log_error( - "Unknown chip model name '%ls'.\nPlease add new Windows on Arm SoC/chip support to arm/windows/init.c!", - text_buffer); - } else { - cpuinfo_log_debug("detected chip model name: %s", chip_info->chip_name_string); + // MIDR is only 32 bits, so we need to cast it to uint32_t + uint32_t midr_value = (uint32_t)midr_qword; + + /* Read the frequency from the registry + * The value is in MHz, so we need to convert it to Hz */ + uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); + if (frequency_mhz == 0) { + cpuinfo_log_error("Registry read error for frequency value"); + return NULL; } + // Convert MHz to Hz + uint64_t frequency_hz = frequency_mhz * 1000000; + + // Allocate chip_info before using it. + chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct woa_chip_info)); + if (chip_info == NULL) { + cpuinfo_log_error("Heap allocation error for chip_info"); + return NULL; + } + + // set chip_info fields + chip_info->chip_name_string = wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); + chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); + + cpuinfo_log_debug("detected chip model name: %ls", chip_info->chip_name_string); - HeapFree(heap, 0, text_buffer); return chip_info; } @@ -216,4 +224,4 @@ static void set_cpuinfo_isa_fields(void) { cpuinfo_isa.pmull = crypto; cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; -} +} \ No newline at end of file diff --git a/src/arm/windows/windows-arm-init.h b/src/arm/windows/windows-arm-init.h index dc6e184c..52dd1807 100644 --- a/src/arm/windows/windows-arm-init.h +++ b/src/arm/windows/windows-arm-init.h @@ -3,17 +3,6 @@ /* Efficiency class = 0 means little core, while 1 means big core for now. */ #define MAX_WOA_VALID_EFFICIENCY_CLASSES 2 -/* List of known and supported Windows on Arm SoCs/chips. */ -enum woa_chip_name { - woa_chip_name_microsoft_sq_1 = 0, - woa_chip_name_microsoft_sq_2 = 1, - woa_chip_name_microsoft_sq_3 = 2, - woa_chip_name_microsoft_sq_3_devkit = 3, - woa_chip_name_ampere_altra = 4, - woa_chip_name_unknown = 5, - woa_chip_name_last = woa_chip_name_unknown -}; - /* Topology information hard-coded by SoC/chip name */ struct core_info_by_chip_name { enum cpuinfo_vendor vendor; @@ -26,14 +15,7 @@ struct core_info_by_chip_name { */ struct woa_chip_info { wchar_t* chip_name_string; - enum woa_chip_name chip_name; struct core_info_by_chip_name uarchs[MAX_WOA_VALID_EFFICIENCY_CLASSES]; }; -bool get_core_uarch_for_efficiency( - enum woa_chip_name chip, - BYTE EfficiencyClass, - enum cpuinfo_uarch* uarch, - uint64_t* frequency); - bool cpu_info_init_by_logical_sys_info(const struct woa_chip_info* chip_info, enum cpuinfo_vendor vendor); From 1fe503468fb9568084242fdab3d43c29c48706f9 Mon Sep 17 00:00:00 2001 From: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com> Date: Fri, 14 Mar 2025 19:39:28 +0100 Subject: [PATCH 41/62] - oryon string --- tools/cpu-info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/cpu-info.c b/tools/cpu-info.c index b896b270..ca3ebfad 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -222,6 +222,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Falkor"; case cpuinfo_uarch_saphira: return "Saphira"; + case cpuinfo_uarch_oryon: + return "Oryon"; case cpuinfo_uarch_denver: return "Denver"; case cpuinfo_uarch_denver2: From 88989a2926327db63f5fdcd2f0f053252bd470e2 Mon Sep 17 00:00:00 2001 From: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com> Date: Mon, 17 Mar 2025 09:27:41 +0100 Subject: [PATCH 42/62] - clang format --- src/arm/api.h | 183 ++++++++++----------- src/arm/windows/init.c | 359 +++++++++++++++++++++-------------------- 2 files changed, 267 insertions(+), 275 deletions(-) diff --git a/src/arm/api.h b/src/arm/api.h index b1be6392..dc3858d0 100644 --- a/src/arm/api.h +++ b/src/arm/api.h @@ -1,9 +1,9 @@ #pragma once #ifdef _MSC_VER - #define RESTRICT_STATIC /* nothing for MSVC */ +#define RESTRICT_STATIC /* nothing for MSVC */ #else - #define RESTRICT_STATIC restrict static +#define RESTRICT_STATIC restrict static #endif #include @@ -13,121 +13,112 @@ #include enum cpuinfo_arm_chipset_vendor { - cpuinfo_arm_chipset_vendor_unknown = 0, - cpuinfo_arm_chipset_vendor_qualcomm, - cpuinfo_arm_chipset_vendor_mediatek, - cpuinfo_arm_chipset_vendor_samsung, - cpuinfo_arm_chipset_vendor_hisilicon, - cpuinfo_arm_chipset_vendor_actions, - cpuinfo_arm_chipset_vendor_allwinner, - cpuinfo_arm_chipset_vendor_amlogic, - cpuinfo_arm_chipset_vendor_broadcom, - cpuinfo_arm_chipset_vendor_lg, - cpuinfo_arm_chipset_vendor_leadcore, - cpuinfo_arm_chipset_vendor_marvell, - cpuinfo_arm_chipset_vendor_mstar, - cpuinfo_arm_chipset_vendor_novathor, - cpuinfo_arm_chipset_vendor_nvidia, - cpuinfo_arm_chipset_vendor_pinecone, - cpuinfo_arm_chipset_vendor_renesas, - cpuinfo_arm_chipset_vendor_rockchip, - cpuinfo_arm_chipset_vendor_spreadtrum, - cpuinfo_arm_chipset_vendor_telechips, - cpuinfo_arm_chipset_vendor_texas_instruments, - cpuinfo_arm_chipset_vendor_unisoc, - cpuinfo_arm_chipset_vendor_wondermedia, - cpuinfo_arm_chipset_vendor_max, + cpuinfo_arm_chipset_vendor_unknown = 0, + cpuinfo_arm_chipset_vendor_qualcomm, + cpuinfo_arm_chipset_vendor_mediatek, + cpuinfo_arm_chipset_vendor_samsung, + cpuinfo_arm_chipset_vendor_hisilicon, + cpuinfo_arm_chipset_vendor_actions, + cpuinfo_arm_chipset_vendor_allwinner, + cpuinfo_arm_chipset_vendor_amlogic, + cpuinfo_arm_chipset_vendor_broadcom, + cpuinfo_arm_chipset_vendor_lg, + cpuinfo_arm_chipset_vendor_leadcore, + cpuinfo_arm_chipset_vendor_marvell, + cpuinfo_arm_chipset_vendor_mstar, + cpuinfo_arm_chipset_vendor_novathor, + cpuinfo_arm_chipset_vendor_nvidia, + cpuinfo_arm_chipset_vendor_pinecone, + cpuinfo_arm_chipset_vendor_renesas, + cpuinfo_arm_chipset_vendor_rockchip, + cpuinfo_arm_chipset_vendor_spreadtrum, + cpuinfo_arm_chipset_vendor_telechips, + cpuinfo_arm_chipset_vendor_texas_instruments, + cpuinfo_arm_chipset_vendor_unisoc, + cpuinfo_arm_chipset_vendor_wondermedia, + cpuinfo_arm_chipset_vendor_max, }; enum cpuinfo_arm_chipset_series { - cpuinfo_arm_chipset_series_unknown = 0, - cpuinfo_arm_chipset_series_qualcomm_qsd, - cpuinfo_arm_chipset_series_qualcomm_msm, - cpuinfo_arm_chipset_series_qualcomm_apq, - cpuinfo_arm_chipset_series_qualcomm_snapdragon, - cpuinfo_arm_chipset_series_mediatek_mt, - cpuinfo_arm_chipset_series_samsung_exynos, - cpuinfo_arm_chipset_series_hisilicon_k3v, - cpuinfo_arm_chipset_series_hisilicon_hi, - cpuinfo_arm_chipset_series_hisilicon_kirin, - cpuinfo_arm_chipset_series_actions_atm, - cpuinfo_arm_chipset_series_allwinner_a, - cpuinfo_arm_chipset_series_amlogic_aml, - cpuinfo_arm_chipset_series_amlogic_s, - cpuinfo_arm_chipset_series_broadcom_bcm, - cpuinfo_arm_chipset_series_lg_nuclun, - cpuinfo_arm_chipset_series_leadcore_lc, - cpuinfo_arm_chipset_series_marvell_pxa, - cpuinfo_arm_chipset_series_mstar_6a, - cpuinfo_arm_chipset_series_novathor_u, - cpuinfo_arm_chipset_series_nvidia_tegra_t, - cpuinfo_arm_chipset_series_nvidia_tegra_ap, - cpuinfo_arm_chipset_series_nvidia_tegra_sl, - cpuinfo_arm_chipset_series_pinecone_surge_s, - cpuinfo_arm_chipset_series_renesas_mp, - cpuinfo_arm_chipset_series_rockchip_rk, - cpuinfo_arm_chipset_series_spreadtrum_sc, - cpuinfo_arm_chipset_series_telechips_tcc, - cpuinfo_arm_chipset_series_texas_instruments_omap, - cpuinfo_arm_chipset_series_unisoc_t, - cpuinfo_arm_chipset_series_unisoc_ums, - cpuinfo_arm_chipset_series_wondermedia_wm, - cpuinfo_arm_chipset_series_max, + cpuinfo_arm_chipset_series_unknown = 0, + cpuinfo_arm_chipset_series_qualcomm_qsd, + cpuinfo_arm_chipset_series_qualcomm_msm, + cpuinfo_arm_chipset_series_qualcomm_apq, + cpuinfo_arm_chipset_series_qualcomm_snapdragon, + cpuinfo_arm_chipset_series_mediatek_mt, + cpuinfo_arm_chipset_series_samsung_exynos, + cpuinfo_arm_chipset_series_hisilicon_k3v, + cpuinfo_arm_chipset_series_hisilicon_hi, + cpuinfo_arm_chipset_series_hisilicon_kirin, + cpuinfo_arm_chipset_series_actions_atm, + cpuinfo_arm_chipset_series_allwinner_a, + cpuinfo_arm_chipset_series_amlogic_aml, + cpuinfo_arm_chipset_series_amlogic_s, + cpuinfo_arm_chipset_series_broadcom_bcm, + cpuinfo_arm_chipset_series_lg_nuclun, + cpuinfo_arm_chipset_series_leadcore_lc, + cpuinfo_arm_chipset_series_marvell_pxa, + cpuinfo_arm_chipset_series_mstar_6a, + cpuinfo_arm_chipset_series_novathor_u, + cpuinfo_arm_chipset_series_nvidia_tegra_t, + cpuinfo_arm_chipset_series_nvidia_tegra_ap, + cpuinfo_arm_chipset_series_nvidia_tegra_sl, + cpuinfo_arm_chipset_series_pinecone_surge_s, + cpuinfo_arm_chipset_series_renesas_mp, + cpuinfo_arm_chipset_series_rockchip_rk, + cpuinfo_arm_chipset_series_spreadtrum_sc, + cpuinfo_arm_chipset_series_telechips_tcc, + cpuinfo_arm_chipset_series_texas_instruments_omap, + cpuinfo_arm_chipset_series_unisoc_t, + cpuinfo_arm_chipset_series_unisoc_ums, + cpuinfo_arm_chipset_series_wondermedia_wm, + cpuinfo_arm_chipset_series_max, }; #define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8 struct cpuinfo_arm_chipset { - enum cpuinfo_arm_chipset_vendor vendor; - enum cpuinfo_arm_chipset_series series; - uint32_t model; - char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX]; + enum cpuinfo_arm_chipset_vendor vendor; + enum cpuinfo_arm_chipset_series series; + uint32_t model; + char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX]; }; #define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX #ifndef __cplusplus CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( - const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); -CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( - struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - uint32_t cores, - uint32_t max_cpu_freq_max); +CPUINFO_INTERNAL void +cpuinfo_arm_fixup_chipset(struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + uint32_t cores, uint32_t max_cpu_freq_max); -CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( - uint32_t midr, +CPUINFO_INTERNAL void +cpuinfo_arm_decode_vendor_uarch(uint32_t midr, #if CPUINFO_ARCH_ARM - bool has_vfpv4, + bool has_vfpv4, #endif - enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], - enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( - enum cpuinfo_uarch uarch, - uint32_t cluster_cores, - uint32_t midr, - const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - uint32_t cluster_id, - uint32_t arch_version, - struct cpuinfo_cache l1i[RESTRICT_STATIC 1], - struct cpuinfo_cache l1d[RESTRICT_STATIC 1], - struct cpuinfo_cache l2[RESTRICT_STATIC 1], - struct cpuinfo_cache l3[RESTRICT_STATIC 1]); + enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + uint32_t cluster_id, uint32_t arch_version, + struct cpuinfo_cache l1i[RESTRICT_STATIC 1], + struct cpuinfo_cache l1d[RESTRICT_STATIC 1], + struct cpuinfo_cache l2[RESTRICT_STATIC 1], + struct cpuinfo_cache l3[RESTRICT_STATIC 1]); -CPUINFO_INTERNAL uint32_t -cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); +CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size( + const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); #else /* defined(__cplusplus) */ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( - enum cpuinfo_uarch uarch, - uint32_t cluster_cores, - uint32_t midr, - const struct cpuinfo_arm_chipset chipset[1], - uint32_t cluster_id, - uint32_t arch_version, - struct cpuinfo_cache l1i[1], - struct cpuinfo_cache l1d[1], - struct cpuinfo_cache l2[1], - struct cpuinfo_cache l3[1]); + enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, + const struct cpuinfo_arm_chipset chipset[1], uint32_t cluster_id, + uint32_t arch_version, struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]); #endif diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c index 84098433..2e4ec2e9 100644 --- a/src/arm/windows/init.c +++ b/src/arm/windows/init.c @@ -15,213 +15,214 @@ struct cpuinfo_arm_isa cpuinfo_isa; static void set_cpuinfo_isa_fields(void); -static struct woa_chip_info* get_system_info_from_registry(void); +static struct woa_chip_info *get_system_info_from_registry(void); -static struct woa_chip_info woa_chip_unknown = {L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; +static struct woa_chip_info woa_chip_unknown = { + L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; -BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { - struct woa_chip_info* chip_info = NULL; - enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown; +BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, + PVOID *context) { + struct woa_chip_info *chip_info = NULL; + enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown; - set_cpuinfo_isa_fields(); + set_cpuinfo_isa_fields(); - chip_info = get_system_info_from_registry(); - if (chip_info == NULL) { - chip_info = &woa_chip_unknown; - } + chip_info = get_system_info_from_registry(); + if (chip_info == NULL) { + chip_info = &woa_chip_unknown; + } - cpuinfo_is_initialized = cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor); + cpuinfo_is_initialized = + cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor); - return true; + return true; } /* Static helper functions */ -static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = 0; - const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */ - wchar_t* text_buffer = NULL; - LSTATUS result = 0; - HANDLE heap = GetProcessHeap(); - - result = RegGetValueW( - HKEY_LOCAL_MACHINE, - subkey, - value, - flags, - &key_type, - NULL, /* Request buffer size */ - &data_size); - if (result != 0 || data_size == 0) { - cpuinfo_log_error("Registry entry size read error"); - return NULL; - } - - text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size); - if (text_buffer == NULL) { - cpuinfo_log_error("Registry textbuffer allocation error"); - return NULL; - } - - result = RegGetValueW( - HKEY_LOCAL_MACHINE, - subkey, - value, - flags, - NULL, - text_buffer, /* Write string in this destination buffer */ - &data_size); - if (result != 0) { - cpuinfo_log_error("Registry read error"); - HeapFree(heap, 0, text_buffer); - return NULL; - } - return text_buffer; +static wchar_t *read_registry(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = 0; + const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */ + wchar_t *text_buffer = NULL; + LSTATUS result = 0; + HANDLE heap = GetProcessHeap(); + + result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, + NULL, /* Request buffer size */ + &data_size); + if (result != 0 || data_size == 0) { + cpuinfo_log_error("Registry entry size read error"); + return NULL; + } + + text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry textbuffer allocation error"); + return NULL; + } + + result = + RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, NULL, + text_buffer, /* Write string in this destination buffer */ + &data_size); + if (result != 0) { + cpuinfo_log_error("Registry read error"); + HeapFree(heap, 0, text_buffer); + return NULL; + } + return text_buffer; } static uint64_t read_registry_qword(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = sizeof(uint64_t); - const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ - uint64_t qword_value = 0; - LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, &qword_value, &data_size); - if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { - cpuinfo_log_error("Registry QWORD read error"); - return 0; - } - return qword_value; + DWORD key_type = 0; + DWORD data_size = sizeof(uint64_t); + const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ + uint64_t qword_value = 0; + LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, + &key_type, &qword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { + cpuinfo_log_error("Registry QWORD read error"); + return 0; + } + return qword_value; } static uint64_t read_registry_dword(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = sizeof(DWORD); - DWORD dword_value = 0; - LSTATUS result = RegGetValueW( - HKEY_LOCAL_MACHINE, - subkey, - value, - RRF_RT_REG_DWORD, - &key_type, - &dword_value, - &data_size); - if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { - cpuinfo_log_error("Registry DWORD read error"); - return 0; - } - return (uint64_t)dword_value; + DWORD key_type = 0; + DWORD data_size = sizeof(DWORD); + DWORD dword_value = 0; + LSTATUS result = + RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, RRF_RT_REG_DWORD, + &key_type, &dword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { + cpuinfo_log_error("Registry DWORD read error"); + return 0; + } + return (uint64_t)dword_value; } -static wchar_t* wcsndup(const wchar_t* src, size_t n) { - size_t len = wcsnlen(src, n); - wchar_t* dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (len + 1) * sizeof(wchar_t)); - if (dup) { - wcsncpy_s(dup, len + 1, src, len); - dup[len] = L'\0'; - } - return dup; +static wchar_t *wcsndup(const wchar_t *src, size_t n) { + size_t len = wcsnlen(src, n); + wchar_t *dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, + (len + 1) * sizeof(wchar_t)); + if (dup) { + wcsncpy_s(dup, len + 1, src, len); + dup[len] = L'\0'; + } + return dup; } -static struct core_info_by_chip_name get_core_info_from_midr(uint32_t midr, uint64_t frequency) { - struct core_info_by_chip_name info; - enum cpuinfo_vendor vendor; - enum cpuinfo_uarch uarch; +static struct core_info_by_chip_name +get_core_info_from_midr(uint32_t midr, uint64_t frequency) { + struct core_info_by_chip_name info; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; #if CPUINFO_ARCH_ARM - bool has_vfpv4 = false; - cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); + bool has_vfpv4 = false; + cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); #else - cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); + cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); #endif - info.vendor = vendor; - info.uarch = uarch; - info.frequency = frequency; - return info; + info.vendor = vendor; + info.uarch = uarch; + info.frequency = frequency; + return info; } -static struct woa_chip_info* get_system_info_from_registry(void) { - wchar_t* text_buffer = NULL; - LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; - LPCWSTR chip_name_value = L"ProcessorNameString"; - LPCWSTR chip_midr_value = L"CP 4000"; - LPCWSTR chip_mhz_value = L"~MHz"; - struct woa_chip_info* chip_info = NULL; - - /* Read processor model name from registry and find in the hard-coded - * list. */ - text_buffer = read_registry(cpu0_subkey, chip_name_value); - if (text_buffer == NULL) { - cpuinfo_log_error("Registry read error for processor name"); - return NULL; - } - - /* - * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register - * Regedit for MIDR : HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 - */ - uint64_t midr_qword = (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); - if (midr_qword == 0) { - cpuinfo_log_error("Registry read error for MIDR value"); - return NULL; - } - // MIDR is only 32 bits, so we need to cast it to uint32_t - uint32_t midr_value = (uint32_t)midr_qword; - - /* Read the frequency from the registry - * The value is in MHz, so we need to convert it to Hz */ - uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); - if (frequency_mhz == 0) { - cpuinfo_log_error("Registry read error for frequency value"); - return NULL; - } - // Convert MHz to Hz - uint64_t frequency_hz = frequency_mhz * 1000000; - - // Allocate chip_info before using it. - chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct woa_chip_info)); - if (chip_info == NULL) { - cpuinfo_log_error("Heap allocation error for chip_info"); - return NULL; - } - - // set chip_info fields - chip_info->chip_name_string = wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); - chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); - - cpuinfo_log_debug("detected chip model name: %ls", chip_info->chip_name_string); - - return chip_info; +static struct woa_chip_info *get_system_info_from_registry(void) { + wchar_t *text_buffer = NULL; + LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; + LPCWSTR chip_name_value = L"ProcessorNameString"; + LPCWSTR chip_midr_value = L"CP 4000"; + LPCWSTR chip_mhz_value = L"~MHz"; + struct woa_chip_info *chip_info = NULL; + + /* Read processor model name from registry and find in the hard-coded + * list. */ + text_buffer = read_registry(cpu0_subkey, chip_name_value); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry read error for processor name"); + return NULL; + } + + /* + * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register + * Regedit for MIDR : + *HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 + */ + uint64_t midr_qword = + (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); + if (midr_qword == 0) { + cpuinfo_log_error("Registry read error for MIDR value"); + return NULL; + } + // MIDR is only 32 bits, so we need to cast it to uint32_t + uint32_t midr_value = (uint32_t)midr_qword; + + /* Read the frequency from the registry + * The value is in MHz, so we need to convert it to Hz */ + uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); + if (frequency_mhz == 0) { + cpuinfo_log_error("Registry read error for frequency value"); + return NULL; + } + // Convert MHz to Hz + uint64_t frequency_hz = frequency_mhz * 1000000; + + // Allocate chip_info before using it. + chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, + sizeof(struct woa_chip_info)); + if (chip_info == NULL) { + cpuinfo_log_error("Heap allocation error for chip_info"); + return NULL; + } + + // set chip_info fields + chip_info->chip_name_string = + wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); + chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); + + cpuinfo_log_debug("detected chip model name: %ls", + chip_info->chip_name_string); + + return chip_info; } static void set_cpuinfo_isa_fields(void) { - cpuinfo_isa.atomics = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0; - - const bool dotprod = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.dot = dotprod; - - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - switch (system_info.wProcessorLevel) { - case 0x803: // Kryo 385 Silver (Snapdragon 850) - cpuinfo_isa.fp16arith = dotprod; - cpuinfo_isa.rdm = dotprod; - break; - default: - // Assume that Dot Product support implies FP16 - // arithmetics and RDM support. ARM manuals don't - // guarantee that, but it holds in practice. - cpuinfo_isa.fp16arith = dotprod; - cpuinfo_isa.rdm = dotprod; - break; - } - - /* Windows API reports all or nothing for cryptographic instructions. */ - const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.aes = crypto; - cpuinfo_isa.sha1 = crypto; - cpuinfo_isa.sha2 = crypto; - cpuinfo_isa.pmull = crypto; - - cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.atomics = + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0; + + const bool dotprod = + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.dot = dotprod; + + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + switch (system_info.wProcessorLevel) { + case 0x803: // Kryo 385 Silver (Snapdragon 850) + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + default: + // Assume that Dot Product support implies FP16 + // arithmetics and RDM support. ARM manuals don't + // guarantee that, but it holds in practice. + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + } + + /* Windows API reports all or nothing for cryptographic instructions. */ + const bool crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.aes = crypto; + cpuinfo_isa.sha1 = crypto; + cpuinfo_isa.sha2 = crypto; + cpuinfo_isa.pmull = crypto; + + cpuinfo_isa.crc32 = + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; } \ No newline at end of file From 96cb9405ef97bef01e5e1e401a7c61648abef87d Mon Sep 17 00:00:00 2001 From: Ozan Aydin <148207261+ozanMSFT@users.noreply.github.com> Date: Thu, 20 Mar 2025 10:17:31 +0100 Subject: [PATCH 43/62] - clang reformat --- src/arm/api.h | 179 ++++++----- src/arm/windows/init-by-logical-sys-info.c | 4 +- src/arm/windows/init.c | 354 ++++++++++----------- src/x86/isa.c | 2 +- 4 files changed, 271 insertions(+), 268 deletions(-) diff --git a/src/arm/api.h b/src/arm/api.h index dc3858d0..32a271c4 100644 --- a/src/arm/api.h +++ b/src/arm/api.h @@ -13,112 +13,121 @@ #include enum cpuinfo_arm_chipset_vendor { - cpuinfo_arm_chipset_vendor_unknown = 0, - cpuinfo_arm_chipset_vendor_qualcomm, - cpuinfo_arm_chipset_vendor_mediatek, - cpuinfo_arm_chipset_vendor_samsung, - cpuinfo_arm_chipset_vendor_hisilicon, - cpuinfo_arm_chipset_vendor_actions, - cpuinfo_arm_chipset_vendor_allwinner, - cpuinfo_arm_chipset_vendor_amlogic, - cpuinfo_arm_chipset_vendor_broadcom, - cpuinfo_arm_chipset_vendor_lg, - cpuinfo_arm_chipset_vendor_leadcore, - cpuinfo_arm_chipset_vendor_marvell, - cpuinfo_arm_chipset_vendor_mstar, - cpuinfo_arm_chipset_vendor_novathor, - cpuinfo_arm_chipset_vendor_nvidia, - cpuinfo_arm_chipset_vendor_pinecone, - cpuinfo_arm_chipset_vendor_renesas, - cpuinfo_arm_chipset_vendor_rockchip, - cpuinfo_arm_chipset_vendor_spreadtrum, - cpuinfo_arm_chipset_vendor_telechips, - cpuinfo_arm_chipset_vendor_texas_instruments, - cpuinfo_arm_chipset_vendor_unisoc, - cpuinfo_arm_chipset_vendor_wondermedia, - cpuinfo_arm_chipset_vendor_max, + cpuinfo_arm_chipset_vendor_unknown = 0, + cpuinfo_arm_chipset_vendor_qualcomm, + cpuinfo_arm_chipset_vendor_mediatek, + cpuinfo_arm_chipset_vendor_samsung, + cpuinfo_arm_chipset_vendor_hisilicon, + cpuinfo_arm_chipset_vendor_actions, + cpuinfo_arm_chipset_vendor_allwinner, + cpuinfo_arm_chipset_vendor_amlogic, + cpuinfo_arm_chipset_vendor_broadcom, + cpuinfo_arm_chipset_vendor_lg, + cpuinfo_arm_chipset_vendor_leadcore, + cpuinfo_arm_chipset_vendor_marvell, + cpuinfo_arm_chipset_vendor_mstar, + cpuinfo_arm_chipset_vendor_novathor, + cpuinfo_arm_chipset_vendor_nvidia, + cpuinfo_arm_chipset_vendor_pinecone, + cpuinfo_arm_chipset_vendor_renesas, + cpuinfo_arm_chipset_vendor_rockchip, + cpuinfo_arm_chipset_vendor_spreadtrum, + cpuinfo_arm_chipset_vendor_telechips, + cpuinfo_arm_chipset_vendor_texas_instruments, + cpuinfo_arm_chipset_vendor_unisoc, + cpuinfo_arm_chipset_vendor_wondermedia, + cpuinfo_arm_chipset_vendor_max, }; enum cpuinfo_arm_chipset_series { - cpuinfo_arm_chipset_series_unknown = 0, - cpuinfo_arm_chipset_series_qualcomm_qsd, - cpuinfo_arm_chipset_series_qualcomm_msm, - cpuinfo_arm_chipset_series_qualcomm_apq, - cpuinfo_arm_chipset_series_qualcomm_snapdragon, - cpuinfo_arm_chipset_series_mediatek_mt, - cpuinfo_arm_chipset_series_samsung_exynos, - cpuinfo_arm_chipset_series_hisilicon_k3v, - cpuinfo_arm_chipset_series_hisilicon_hi, - cpuinfo_arm_chipset_series_hisilicon_kirin, - cpuinfo_arm_chipset_series_actions_atm, - cpuinfo_arm_chipset_series_allwinner_a, - cpuinfo_arm_chipset_series_amlogic_aml, - cpuinfo_arm_chipset_series_amlogic_s, - cpuinfo_arm_chipset_series_broadcom_bcm, - cpuinfo_arm_chipset_series_lg_nuclun, - cpuinfo_arm_chipset_series_leadcore_lc, - cpuinfo_arm_chipset_series_marvell_pxa, - cpuinfo_arm_chipset_series_mstar_6a, - cpuinfo_arm_chipset_series_novathor_u, - cpuinfo_arm_chipset_series_nvidia_tegra_t, - cpuinfo_arm_chipset_series_nvidia_tegra_ap, - cpuinfo_arm_chipset_series_nvidia_tegra_sl, - cpuinfo_arm_chipset_series_pinecone_surge_s, - cpuinfo_arm_chipset_series_renesas_mp, - cpuinfo_arm_chipset_series_rockchip_rk, - cpuinfo_arm_chipset_series_spreadtrum_sc, - cpuinfo_arm_chipset_series_telechips_tcc, - cpuinfo_arm_chipset_series_texas_instruments_omap, - cpuinfo_arm_chipset_series_unisoc_t, - cpuinfo_arm_chipset_series_unisoc_ums, - cpuinfo_arm_chipset_series_wondermedia_wm, - cpuinfo_arm_chipset_series_max, + cpuinfo_arm_chipset_series_unknown = 0, + cpuinfo_arm_chipset_series_qualcomm_qsd, + cpuinfo_arm_chipset_series_qualcomm_msm, + cpuinfo_arm_chipset_series_qualcomm_apq, + cpuinfo_arm_chipset_series_qualcomm_snapdragon, + cpuinfo_arm_chipset_series_mediatek_mt, + cpuinfo_arm_chipset_series_samsung_exynos, + cpuinfo_arm_chipset_series_hisilicon_k3v, + cpuinfo_arm_chipset_series_hisilicon_hi, + cpuinfo_arm_chipset_series_hisilicon_kirin, + cpuinfo_arm_chipset_series_actions_atm, + cpuinfo_arm_chipset_series_allwinner_a, + cpuinfo_arm_chipset_series_amlogic_aml, + cpuinfo_arm_chipset_series_amlogic_s, + cpuinfo_arm_chipset_series_broadcom_bcm, + cpuinfo_arm_chipset_series_lg_nuclun, + cpuinfo_arm_chipset_series_leadcore_lc, + cpuinfo_arm_chipset_series_marvell_pxa, + cpuinfo_arm_chipset_series_mstar_6a, + cpuinfo_arm_chipset_series_novathor_u, + cpuinfo_arm_chipset_series_nvidia_tegra_t, + cpuinfo_arm_chipset_series_nvidia_tegra_ap, + cpuinfo_arm_chipset_series_nvidia_tegra_sl, + cpuinfo_arm_chipset_series_pinecone_surge_s, + cpuinfo_arm_chipset_series_renesas_mp, + cpuinfo_arm_chipset_series_rockchip_rk, + cpuinfo_arm_chipset_series_spreadtrum_sc, + cpuinfo_arm_chipset_series_telechips_tcc, + cpuinfo_arm_chipset_series_texas_instruments_omap, + cpuinfo_arm_chipset_series_unisoc_t, + cpuinfo_arm_chipset_series_unisoc_ums, + cpuinfo_arm_chipset_series_wondermedia_wm, + cpuinfo_arm_chipset_series_max, }; #define CPUINFO_ARM_CHIPSET_SUFFIX_MAX 8 struct cpuinfo_arm_chipset { - enum cpuinfo_arm_chipset_vendor vendor; - enum cpuinfo_arm_chipset_series series; - uint32_t model; - char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX]; + enum cpuinfo_arm_chipset_vendor vendor; + enum cpuinfo_arm_chipset_series series; + uint32_t model; + char suffix[CPUINFO_ARM_CHIPSET_SUFFIX_MAX]; }; #define CPUINFO_ARM_CHIPSET_NAME_MAX CPUINFO_PACKAGE_NAME_MAX #ifndef __cplusplus CPUINFO_INTERNAL void cpuinfo_arm_chipset_to_string( - const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + char name[RESTRICT_STATIC CPUINFO_ARM_CHIPSET_NAME_MAX]); -CPUINFO_INTERNAL void -cpuinfo_arm_fixup_chipset(struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - uint32_t cores, uint32_t max_cpu_freq_max); +CPUINFO_INTERNAL void cpuinfo_arm_fixup_chipset( + struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + uint32_t cores, + uint32_t max_cpu_freq_max); -CPUINFO_INTERNAL void -cpuinfo_arm_decode_vendor_uarch(uint32_t midr, +CPUINFO_INTERNAL void cpuinfo_arm_decode_vendor_uarch( + uint32_t midr, #if CPUINFO_ARCH_ARM - bool has_vfpv4, + bool has_vfpv4, #endif - enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], - enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); + enum cpuinfo_vendor vendor[RESTRICT_STATIC 1], + enum cpuinfo_uarch uarch[RESTRICT_STATIC 1]); CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( - enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, - const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], - uint32_t cluster_id, uint32_t arch_version, - struct cpuinfo_cache l1i[RESTRICT_STATIC 1], - struct cpuinfo_cache l1d[RESTRICT_STATIC 1], - struct cpuinfo_cache l2[RESTRICT_STATIC 1], - struct cpuinfo_cache l3[RESTRICT_STATIC 1]); + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[RESTRICT_STATIC 1], + uint32_t cluster_id, + uint32_t arch_version, + struct cpuinfo_cache l1i[RESTRICT_STATIC 1], + struct cpuinfo_cache l1d[RESTRICT_STATIC 1], + struct cpuinfo_cache l2[RESTRICT_STATIC 1], + struct cpuinfo_cache l3[RESTRICT_STATIC 1]); -CPUINFO_INTERNAL uint32_t cpuinfo_arm_compute_max_cache_size( - const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); +CPUINFO_INTERNAL uint32_t +cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor processor[RESTRICT_STATIC 1]); #else /* defined(__cplusplus) */ CPUINFO_INTERNAL void cpuinfo_arm_decode_cache( - enum cpuinfo_uarch uarch, uint32_t cluster_cores, uint32_t midr, - const struct cpuinfo_arm_chipset chipset[1], uint32_t cluster_id, - uint32_t arch_version, struct cpuinfo_cache l1i[1], - struct cpuinfo_cache l1d[1], struct cpuinfo_cache l2[1], - struct cpuinfo_cache l3[1]); + enum cpuinfo_uarch uarch, + uint32_t cluster_cores, + uint32_t midr, + const struct cpuinfo_arm_chipset chipset[1], + uint32_t cluster_id, + uint32_t arch_version, + struct cpuinfo_cache l1i[1], + struct cpuinfo_cache l1d[1], + struct cpuinfo_cache l2[1], + struct cpuinfo_cache l3[1]); #endif diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c index ecb238e1..ef6b144c 100644 --- a/src/arm/windows/init-by-logical-sys-info.c +++ b/src/arm/windows/init-by-logical-sys-info.c @@ -750,12 +750,12 @@ void store_core_info_per_processor( if (cores) { processors[processor_global_index].core = cores + core_id; cores[core_id].core_id = core_id; - + if (chip_info->uarchs == NULL) { cpuinfo_log_error("uarch is NULL for core %d", core_id); return; } - + cores[core_id].uarch = chip_info->uarchs[0].uarch; cores[core_id].frequency = chip_info->uarchs[0].frequency; diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c index 2e4ec2e9..79e947a6 100644 --- a/src/arm/windows/init.c +++ b/src/arm/windows/init.c @@ -15,214 +15,208 @@ struct cpuinfo_arm_isa cpuinfo_isa; static void set_cpuinfo_isa_fields(void); -static struct woa_chip_info *get_system_info_from_registry(void); +static struct woa_chip_info* get_system_info_from_registry(void); -static struct woa_chip_info woa_chip_unknown = { - L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; +static struct woa_chip_info woa_chip_unknown = {L"Unknown", {{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}}; -BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, - PVOID *context) { - struct woa_chip_info *chip_info = NULL; - enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown; +BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) { + struct woa_chip_info* chip_info = NULL; + enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown; - set_cpuinfo_isa_fields(); + set_cpuinfo_isa_fields(); - chip_info = get_system_info_from_registry(); - if (chip_info == NULL) { - chip_info = &woa_chip_unknown; - } + chip_info = get_system_info_from_registry(); + if (chip_info == NULL) { + chip_info = &woa_chip_unknown; + } - cpuinfo_is_initialized = - cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor); + cpuinfo_is_initialized = cpu_info_init_by_logical_sys_info(chip_info, chip_info->uarchs[0].vendor); - return true; + return true; } /* Static helper functions */ -static wchar_t *read_registry(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = 0; - const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */ - wchar_t *text_buffer = NULL; - LSTATUS result = 0; - HANDLE heap = GetProcessHeap(); - - result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, - NULL, /* Request buffer size */ - &data_size); - if (result != 0 || data_size == 0) { - cpuinfo_log_error("Registry entry size read error"); - return NULL; - } - - text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size); - if (text_buffer == NULL) { - cpuinfo_log_error("Registry textbuffer allocation error"); - return NULL; - } - - result = - RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, NULL, - text_buffer, /* Write string in this destination buffer */ - &data_size); - if (result != 0) { - cpuinfo_log_error("Registry read error"); - HeapFree(heap, 0, text_buffer); - return NULL; - } - return text_buffer; +static wchar_t* read_registry(LPCWSTR subkey, LPCWSTR value) { + DWORD key_type = 0; + DWORD data_size = 0; + const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */ + wchar_t* text_buffer = NULL; + LSTATUS result = 0; + HANDLE heap = GetProcessHeap(); + + result = RegGetValueW( + HKEY_LOCAL_MACHINE, + subkey, + value, + flags, + &key_type, + NULL, /* Request buffer size */ + &data_size); + if (result != 0 || data_size == 0) { + cpuinfo_log_error("Registry entry size read error"); + return NULL; + } + + text_buffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, data_size); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry textbuffer allocation error"); + return NULL; + } + + result = RegGetValueW( + HKEY_LOCAL_MACHINE, + subkey, + value, + flags, + NULL, + text_buffer, /* Write string in this destination buffer */ + &data_size); + if (result != 0) { + cpuinfo_log_error("Registry read error"); + HeapFree(heap, 0, text_buffer); + return NULL; + } + return text_buffer; } static uint64_t read_registry_qword(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = sizeof(uint64_t); - const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ - uint64_t qword_value = 0; - LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, - &key_type, &qword_value, &data_size); - if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { - cpuinfo_log_error("Registry QWORD read error"); - return 0; - } - return qword_value; + DWORD key_type = 0; + DWORD data_size = sizeof(uint64_t); + const DWORD flags = RRF_RT_REG_QWORD; /* Only read QWORD (REG_QWORD) values */ + uint64_t qword_value = 0; + LSTATUS result = RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, flags, &key_type, &qword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(uint64_t)) { + cpuinfo_log_error("Registry QWORD read error"); + return 0; + } + return qword_value; } static uint64_t read_registry_dword(LPCWSTR subkey, LPCWSTR value) { - DWORD key_type = 0; - DWORD data_size = sizeof(DWORD); - DWORD dword_value = 0; - LSTATUS result = - RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, RRF_RT_REG_DWORD, - &key_type, &dword_value, &data_size); - if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { - cpuinfo_log_error("Registry DWORD read error"); - return 0; - } - return (uint64_t)dword_value; + DWORD key_type = 0; + DWORD data_size = sizeof(DWORD); + DWORD dword_value = 0; + LSTATUS result = + RegGetValueW(HKEY_LOCAL_MACHINE, subkey, value, RRF_RT_REG_DWORD, &key_type, &dword_value, &data_size); + if (result != ERROR_SUCCESS || data_size != sizeof(DWORD)) { + cpuinfo_log_error("Registry DWORD read error"); + return 0; + } + return (uint64_t)dword_value; } -static wchar_t *wcsndup(const wchar_t *src, size_t n) { - size_t len = wcsnlen(src, n); - wchar_t *dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, - (len + 1) * sizeof(wchar_t)); - if (dup) { - wcsncpy_s(dup, len + 1, src, len); - dup[len] = L'\0'; - } - return dup; +static wchar_t* wcsndup(const wchar_t* src, size_t n) { + size_t len = wcsnlen(src, n); + wchar_t* dup = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (len + 1) * sizeof(wchar_t)); + if (dup) { + wcsncpy_s(dup, len + 1, src, len); + dup[len] = L'\0'; + } + return dup; } -static struct core_info_by_chip_name -get_core_info_from_midr(uint32_t midr, uint64_t frequency) { - struct core_info_by_chip_name info; - enum cpuinfo_vendor vendor; - enum cpuinfo_uarch uarch; +static struct core_info_by_chip_name get_core_info_from_midr(uint32_t midr, uint64_t frequency) { + struct core_info_by_chip_name info; + enum cpuinfo_vendor vendor; + enum cpuinfo_uarch uarch; #if CPUINFO_ARCH_ARM - bool has_vfpv4 = false; - cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); + bool has_vfpv4 = false; + cpuinfo_arm_decode_vendor_uarch(midr, has_vfpv4, &vendor, &uarch); #else - cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); + cpuinfo_arm_decode_vendor_uarch(midr, &vendor, &uarch); #endif - info.vendor = vendor; - info.uarch = uarch; - info.frequency = frequency; - return info; + info.vendor = vendor; + info.uarch = uarch; + info.frequency = frequency; + return info; } -static struct woa_chip_info *get_system_info_from_registry(void) { - wchar_t *text_buffer = NULL; - LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; - LPCWSTR chip_name_value = L"ProcessorNameString"; - LPCWSTR chip_midr_value = L"CP 4000"; - LPCWSTR chip_mhz_value = L"~MHz"; - struct woa_chip_info *chip_info = NULL; - - /* Read processor model name from registry and find in the hard-coded - * list. */ - text_buffer = read_registry(cpu0_subkey, chip_name_value); - if (text_buffer == NULL) { - cpuinfo_log_error("Registry read error for processor name"); - return NULL; - } - - /* - * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register - * Regedit for MIDR : - *HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 - */ - uint64_t midr_qword = - (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); - if (midr_qword == 0) { - cpuinfo_log_error("Registry read error for MIDR value"); - return NULL; - } - // MIDR is only 32 bits, so we need to cast it to uint32_t - uint32_t midr_value = (uint32_t)midr_qword; - - /* Read the frequency from the registry - * The value is in MHz, so we need to convert it to Hz */ - uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); - if (frequency_mhz == 0) { - cpuinfo_log_error("Registry read error for frequency value"); - return NULL; - } - // Convert MHz to Hz - uint64_t frequency_hz = frequency_mhz * 1000000; - - // Allocate chip_info before using it. - chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, - sizeof(struct woa_chip_info)); - if (chip_info == NULL) { - cpuinfo_log_error("Heap allocation error for chip_info"); - return NULL; - } - - // set chip_info fields - chip_info->chip_name_string = - wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); - chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); - - cpuinfo_log_debug("detected chip model name: %ls", - chip_info->chip_name_string); - - return chip_info; +static struct woa_chip_info* get_system_info_from_registry(void) { + wchar_t* text_buffer = NULL; + LPCWSTR cpu0_subkey = L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"; + LPCWSTR chip_name_value = L"ProcessorNameString"; + LPCWSTR chip_midr_value = L"CP 4000"; + LPCWSTR chip_mhz_value = L"~MHz"; + struct woa_chip_info* chip_info = NULL; + + /* Read processor model name from registry and find in the hard-coded + * list. */ + text_buffer = read_registry(cpu0_subkey, chip_name_value); + if (text_buffer == NULL) { + cpuinfo_log_error("Registry read error for processor name"); + return NULL; + } + + /* + * https://developer.arm.com/documentation/100442/0100/register-descriptions/aarch32-system-registers/midr--main-id-register + * Regedit for MIDR : + *HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000 + */ + uint64_t midr_qword = (uint32_t)read_registry_qword(cpu0_subkey, chip_midr_value); + if (midr_qword == 0) { + cpuinfo_log_error("Registry read error for MIDR value"); + return NULL; + } + // MIDR is only 32 bits, so we need to cast it to uint32_t + uint32_t midr_value = (uint32_t)midr_qword; + + /* Read the frequency from the registry + * The value is in MHz, so we need to convert it to Hz */ + uint64_t frequency_mhz = read_registry_dword(cpu0_subkey, chip_mhz_value); + if (frequency_mhz == 0) { + cpuinfo_log_error("Registry read error for frequency value"); + return NULL; + } + // Convert MHz to Hz + uint64_t frequency_hz = frequency_mhz * 1000000; + + // Allocate chip_info before using it. + chip_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct woa_chip_info)); + if (chip_info == NULL) { + cpuinfo_log_error("Heap allocation error for chip_info"); + return NULL; + } + + // set chip_info fields + chip_info->chip_name_string = wcsndup(text_buffer, CPUINFO_PACKAGE_NAME_MAX - 1); + chip_info->uarchs[0] = get_core_info_from_midr(midr_value, frequency_hz); + + cpuinfo_log_debug("detected chip model name: %ls", chip_info->chip_name_string); + + return chip_info; } static void set_cpuinfo_isa_fields(void) { - cpuinfo_isa.atomics = - IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0; - - const bool dotprod = - IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.dot = dotprod; - - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - switch (system_info.wProcessorLevel) { - case 0x803: // Kryo 385 Silver (Snapdragon 850) - cpuinfo_isa.fp16arith = dotprod; - cpuinfo_isa.rdm = dotprod; - break; - default: - // Assume that Dot Product support implies FP16 - // arithmetics and RDM support. ARM manuals don't - // guarantee that, but it holds in practice. - cpuinfo_isa.fp16arith = dotprod; - cpuinfo_isa.rdm = dotprod; - break; - } - - /* Windows API reports all or nothing for cryptographic instructions. */ - const bool crypto = - IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; - cpuinfo_isa.aes = crypto; - cpuinfo_isa.sha1 = crypto; - cpuinfo_isa.sha2 = crypto; - cpuinfo_isa.pmull = crypto; - - cpuinfo_isa.crc32 = - IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.atomics = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != 0; + + const bool dotprod = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.dot = dotprod; + + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + switch (system_info.wProcessorLevel) { + case 0x803: // Kryo 385 Silver (Snapdragon 850) + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + default: + // Assume that Dot Product support implies FP16 + // arithmetics and RDM support. ARM manuals don't + // guarantee that, but it holds in practice. + cpuinfo_isa.fp16arith = dotprod; + cpuinfo_isa.rdm = dotprod; + break; + } + + /* Windows API reports all or nothing for cryptographic instructions. */ + const bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; + cpuinfo_isa.aes = crypto; + cpuinfo_isa.sha1 = crypto; + cpuinfo_isa.sha2 = crypto; + cpuinfo_isa.pmull = crypto; + + cpuinfo_isa.crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; } \ No newline at end of file diff --git a/src/x86/isa.c b/src/x86/isa.c index 377583b3..222bd231 100644 --- a/src/x86/isa.c +++ b/src/x86/isa.c @@ -440,7 +440,7 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa( /* * AVX 10.2 instructions: avx 10 version information. * - Intel: ebx[bits 0-7] in structured features info (eax = 24 ecx = 0). - */ + */ isa.avx10_2 = ((structured_feature_info2.ebx & UINT32_C(0x000000FF)) >= 2) && isa.avx10_1; /* From 74ce7a0e27853ee2819dd9fcbefcdccb868d8437 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 29 Jan 2025 17:49:54 -0500 Subject: [PATCH 44/62] Fix invalid function pointer cast in cpuinfo.c While casting function pointers is allowed in C, the function must ultimately be called through a pointer with the same type signature as the function itself. Type signature mismatches, even decaying T* to void* is undefined behavior. UBSan flags this with -fsanitize=function. The easiest way I found to repro this was: CC=clang-18 CXX=clang++-18 \ CFLAGS="-fsanitize=function -fno-sanitize-recover=function" \ CXXFLAGS="-fsanitize=function -fno-sanitize-recover=function" \ cmake -GNinja -B build -DCPUINFO_BUILD_BENCHMARKS=OFF ninja -C build ./build/cpu-info That gives the following error: [...]/src/linux/multiline.c:85:11: runtime error: call to function parse_line through pointer to incorrect function type 'bool (*)(const char *, const char *, void *, unsigned long)' cpuinfo.c: note: parse_line defined here SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior [...]/src/linux/multiline.c:85:11 The fix is fairly straightforward: just keep the function at the type signature the expected, and cast void* instead the function instead. --- src/x86/linux/cpuinfo.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/x86/linux/cpuinfo.c b/src/x86/linux/cpuinfo.c index 7df72aba..8f038b07 100644 --- a/src/x86/linux/cpuinfo.c +++ b/src/x86/linux/cpuinfo.c @@ -83,8 +83,9 @@ struct proc_cpuinfo_parser_state { static bool parse_line( const char* line_start, const char* line_end, - struct proc_cpuinfo_parser_state state[restrict static 1], + void* context, uint64_t line_number) { + struct proc_cpuinfo_parser_state* restrict state = context; /* Empty line. Skip. */ if (line_start == line_end) { return true; @@ -215,5 +216,5 @@ bool cpuinfo_x86_linux_parse_proc_cpuinfo( .processors = processors, }; return cpuinfo_linux_parse_multiline_file( - "/proc/cpuinfo", BUFFER_SIZE, (cpuinfo_line_callback)parse_line, &state); + "/proc/cpuinfo", BUFFER_SIZE, parse_line, &state); } From c2bbb022590fa958bfef8d11878b3cfca0cedbf1 Mon Sep 17 00:00:00 2001 From: Daniel Lenski Date: Tue, 21 May 2024 21:14:36 -0700 Subject: [PATCH 45/62] For Apple silicon, use machdep.cpu.brand_string in preference to decoding hw.machine This functionality was implemented in #65 ("Updated package.name to also query machdep.cpu.brand_string if decode of hw.machine fails"), but then it was omitted from the subsequent #100, probably inadvertently. Adding that functionality back here, so that the package/device name can be shown correctly on recent devices and macOS/iOS versions. I have reversed the order so that `machdep.cpu.brand_string` is checked before attempting to decode `hw.machine`, since the former appears to be more future-proof. Before this change, on a recent MacBook Pro: $ cpu-info ... Debug (cpuinfo): hw.machine: arm64 Warning in cpuinfo: parsing "hw.machine" failed: Undefined error: 0 ... Packages: 0: After this change: $ cpu-info ... Debug (cpuinfo): machdep.cpu.brand_string: Apple M2 Pro ... Packages: 0: Apple M2 Pro --- src/arm/mach/init.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index 3fb62414..c4e6521b 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -101,17 +101,34 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, return cpuinfo_uarch_unknown; } -static void decode_package_name(char* package_name) { +static int read_package_name_from_brand_string(char* package_name) { + size_t size; + if (sysctlbyname("machdep.cpu.brand_string", NULL, &size, NULL, 0) != 0) { + sysctlfail: + cpuinfo_log_warning("sysctlbyname(\"machdep.cpu.brand_string\") failed: %s", strerror(errno)); + return false; + } + + char* brand_string = alloca(size); + if (sysctlbyname("machdep.cpu.brand_string", brand_string, &size, NULL, 0) != 0) + goto sysctlfail; + cpuinfo_log_debug("machdep.cpu.brand_string: %s", brand_string); + + strlcpy(package_name, brand_string, CPUINFO_PACKAGE_NAME_MAX); + return true; +} + +static int decode_package_name_from_hw_machine(char* package_name) { size_t size; if (sysctlbyname("hw.machine", NULL, &size, NULL, 0) != 0) { cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); - return; + return false; } char* machine_name = alloca(size); if (sysctlbyname("hw.machine", machine_name, &size, NULL, 0) != 0) { cpuinfo_log_warning("sysctlbyname(\"hw.machine\") failed: %s", strerror(errno)); - return; + return false; } cpuinfo_log_debug("hw.machine: %s", machine_name); @@ -119,7 +136,7 @@ static void decode_package_name(char* package_name) { uint32_t major = 0, minor = 0; if (sscanf(machine_name, "%9[^,0123456789]%" SCNu32 ",%" SCNu32, name, &major, &minor) != 3) { cpuinfo_log_warning("parsing \"hw.machine\" failed: %s", strerror(errno)); - return; + return false; } uint32_t chip_model = 0; @@ -224,7 +241,9 @@ static void decode_package_name(char* package_name) { } if (chip_model != 0) { snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX, "Apple A%" PRIu32 "%c", chip_model, suffix); + return true; } + return false; } void cpuinfo_arm_mach_init(void) { @@ -275,7 +294,8 @@ void cpuinfo_arm_mach_init(void) { .core_start = i * cores_per_package, .core_count = cores_per_package, }; - decode_package_name(packages[i].name); + if (!read_package_name_from_brand_string(packages[i].name)) + decode_package_name_from_hw_machine(packages[i].name); } const uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); From 76d3cc89c90dbf4199ee1fcbbb5e4c5e97158c88 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 27 Mar 2025 02:38:47 -0700 Subject: [PATCH 46/62] isa-tool display SME and SME2 for arm --- tools/isa-info.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/isa-info.c b/tools/isa-info.c index 21f94a5f..023b6989 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -173,6 +173,8 @@ int main(int argc, char** argv) { printf("SIMD extensions:\n"); printf("\tARM SVE: %s\n", cpuinfo_has_arm_sve() ? "yes" : "no"); printf("\tARM SVE 2: %s\n", cpuinfo_has_arm_sve2() ? "yes" : "no"); + printf("\tARM SME: %s\n", cpuinfo_has_arm_sme() ? "yes" : "no"); + printf("\tARM SME 2: %s\n", cpuinfo_has_arm_sme2() ? "yes" : "no"); printf("ARM SVE Capabilities:\n"); printf("\tSVE max length: %d\n", cpuinfo_get_max_arm_sve_length()); From 45e895ee62372eb2a41646dbbbf9c75f90ac6487 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Thu, 27 Mar 2025 17:35:14 -0700 Subject: [PATCH 47/62] Add SME vector length detect - cpuinfo_get_max_arm_sme_length() returns svl vector length in bits - Display length of SME vectors in isa-tool - Upgrade cmake-linux-riscv64 ubuntu-22.04 runners to ubuntu-24.04 SME may be enabled on cpus that do not have SVE --- .github/workflows/build.yml | 4 ++-- include/cpuinfo.h | 10 ++++++++++ src/arm/linux/aarch64-isa.c | 18 ++++++++++++++++++ tools/isa-info.c | 1 + 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 622650af..17483b15 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -94,14 +94,14 @@ jobs: env: ANDROID_NDK: ${{ steps.setup-ndk.outputs.ndk-path }} cmake-linux-qemu: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 40 strategy: matrix: build_props: - [ "cmake-linux-riscv64", - "riscv64/ubuntu:22.04" + "riscv64/ubuntu:24.04" ] name: ${{ matrix.build_props[0] }} diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 5f93819e..f1d35d4d 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -1700,6 +1700,7 @@ struct cpuinfo_arm_isa { bool sme_b16b16; bool sme_f16f16; uint32_t svelen; + uint32_t smelen; #endif bool rdm; bool fp16arith; @@ -2081,6 +2082,15 @@ static inline uint32_t cpuinfo_get_max_arm_sve_length(void) { #endif } +// Function to get the max SME vector length on ARM CPU's which support SME. +static inline uint32_t cpuinfo_get_max_arm_sme_length(void) { +#if CPUINFO_ARCH_ARM64 + return cpuinfo_isa.smelen * 8; // bytes * 8 = bit length(vector length) +#else + return 0; +#endif +} + static inline bool cpuinfo_has_arm_sme(void) { #if CPUINFO_ARCH_ARM64 return cpuinfo_isa.sme; diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index bc2186f6..7a9cebbb 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -191,4 +191,22 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( // Mask out the SVE vector length bits isa->svelen = ret & PR_SVE_VL_LEN_MASK; } + +#ifndef PR_SME_GET_VL +#define PR_SME_GET_VL 64 +#endif + +#ifndef PR_SME_VL_LEN_MASK +#define PR_SME_VL_LEN_MASK 0xffff +#endif + + ret = prctl(PR_SME_GET_VL); + if (ret < 0) { + cpuinfo_log_warning("No SME support on this machine"); + isa->smelen = 0; // Assume no SME support if the call fails + } else { + // Mask out the SME vector length bits + isa->smelen = ret & PR_SME_VL_LEN_MASK; + } } + diff --git a/tools/isa-info.c b/tools/isa-info.c index 023b6989..740be648 100644 --- a/tools/isa-info.c +++ b/tools/isa-info.c @@ -178,6 +178,7 @@ int main(int argc, char** argv) { printf("ARM SVE Capabilities:\n"); printf("\tSVE max length: %d\n", cpuinfo_get_max_arm_sve_length()); + printf("\tSME max length: %d\n", cpuinfo_get_max_arm_sme_length()); printf("Cryptography extensions:\n"); printf("\tAES: %s\n", cpuinfo_has_arm_aes() ? "yes" : "no"); From 2cdcd5ba9ee53eb9439657e5a0d86e31f6934610 Mon Sep 17 00:00:00 2001 From: Christopher Degawa Date: Wed, 12 Feb 2025 11:54:08 -0600 Subject: [PATCH 48/62] cmake: rename duplicate names Xcode on macOS cannot handle multiple .c files having the same name as it produces an object file like init-.o, but tries to link to init.o https://gitlab.kitware.com/cmake/cmake/-/issues/20501 Signed-off-by: Christopher Degawa --- CMakeLists.txt | 8 ++++---- src/{init.c => maininit.c} | 0 src/x86/cache/{init.c => cacheinit.c} | 0 src/x86/{init.c => x86init.c} | 0 src/x86/{topology.c => x86topology.c} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename src/{init.c => maininit.c} (100%) rename src/x86/cache/{init.c => cacheinit.c} (100%) rename src/x86/{init.c => x86init.c} (100%) rename src/x86/{topology.c => x86topology.c} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index aedc9831..be12f858 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,19 +158,19 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS) ENDIF() # ---[ cpuinfo library -SET(CPUINFO_SRCS src/api.c src/cache.c src/init.c src/log.c) +SET(CPUINFO_SRCS src/api.c src/cache.c src/maininit.c src/log.c) IF(CPUINFO_SUPPORTED_PLATFORM) IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")) LIST(APPEND CPUINFO_SRCS - src/x86/init.c + src/x86/x86init.c src/x86/info.c src/x86/vendor.c src/x86/uarch.c src/x86/name.c - src/x86/topology.c + src/x86/x86topology.c src/x86/isa.c - src/x86/cache/init.c + src/x86/cache/cacheinit.c src/x86/cache/descriptor.c src/x86/cache/deterministic.c) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") diff --git a/src/init.c b/src/maininit.c similarity index 100% rename from src/init.c rename to src/maininit.c diff --git a/src/x86/cache/init.c b/src/x86/cache/cacheinit.c similarity index 100% rename from src/x86/cache/init.c rename to src/x86/cache/cacheinit.c diff --git a/src/x86/init.c b/src/x86/x86init.c similarity index 100% rename from src/x86/init.c rename to src/x86/x86init.c diff --git a/src/x86/topology.c b/src/x86/x86topology.c similarity index 100% rename from src/x86/topology.c rename to src/x86/x86topology.c From af054ddc381e676377e619d7abba4d5fd4cee206 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Thu, 22 May 2025 08:27:53 -0700 Subject: [PATCH 49/62] Fix clang format (#294) Regressions introduced by https://github.com/pytorch/cpuinfo/pull/275 and https://github.com/pytorch/cpuinfo/pull/287 --- src/arm/linux/aarch64-isa.c | 1 - src/x86/linux/cpuinfo.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c index 7a9cebbb..8e83d512 100644 --- a/src/arm/linux/aarch64-isa.c +++ b/src/arm/linux/aarch64-isa.c @@ -209,4 +209,3 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo( isa->smelen = ret & PR_SME_VL_LEN_MASK; } } - diff --git a/src/x86/linux/cpuinfo.c b/src/x86/linux/cpuinfo.c index 8f038b07..f09d1c9f 100644 --- a/src/x86/linux/cpuinfo.c +++ b/src/x86/linux/cpuinfo.c @@ -80,11 +80,7 @@ struct proc_cpuinfo_parser_state { * Decode a single line of /proc/cpuinfo information. * Lines have format [ ]*:[ ] */ -static bool parse_line( - const char* line_start, - const char* line_end, - void* context, - uint64_t line_number) { +static bool parse_line(const char* line_start, const char* line_end, void* context, uint64_t line_number) { struct proc_cpuinfo_parser_state* restrict state = context; /* Empty line. Skip. */ if (line_start == line_end) { @@ -215,6 +211,5 @@ bool cpuinfo_x86_linux_parse_proc_cpuinfo( .max_processors_count = max_processors_count, .processors = processors, }; - return cpuinfo_linux_parse_multiline_file( - "/proc/cpuinfo", BUFFER_SIZE, parse_line, &state); + return cpuinfo_linux_parse_multiline_file("/proc/cpuinfo", BUFFER_SIZE, parse_line, &state); } From 0d4a77c0084b413c725893e3252b47543420e723 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 22 May 2025 14:08:46 -0700 Subject: [PATCH 50/62] [CI] Update checkout action to v4 --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 17483b15..0373cc99 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Update apt run: sudo apt update - name: Install ninja @@ -28,7 +28,7 @@ jobs: runs-on: macos-latest timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install ninja run: brew install ninja - name: Configure and build @@ -38,7 +38,7 @@ jobs: runs-on: windows-latest timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install ninja run: choco install ninja - name: Configure and build @@ -49,7 +49,7 @@ jobs: runs-on: windows-latest timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: 'x64-uwp(Debug)' uses: lukka/run-cmake@v10 with: @@ -77,7 +77,7 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 40 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Update apt run: sudo apt update - name: Install ninja @@ -106,7 +106,7 @@ jobs: name: ${{ matrix.build_props[0] }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup QEMU uses: docker/setup-qemu-action@v3.0.0 - name: Build cpuinfo in ${{ matrix.build_props[1] }} From 02519ac451138f1bdb0ec99788d3d7212792aca9 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Thu, 22 May 2025 14:47:34 -0700 Subject: [PATCH 51/62] Revert "cmake: rename duplicate names" (#296) Reverts pytorch/cpuinfo#284 as it broke Bazel builds --- CMakeLists.txt | 8 ++++---- src/{maininit.c => init.c} | 0 src/x86/cache/{cacheinit.c => init.c} | 0 src/x86/{x86init.c => init.c} | 0 src/x86/{x86topology.c => topology.c} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename src/{maininit.c => init.c} (100%) rename src/x86/cache/{cacheinit.c => init.c} (100%) rename src/x86/{x86init.c => init.c} (100%) rename src/x86/{x86topology.c => topology.c} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index be12f858..aedc9831 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,19 +158,19 @@ IF(CPUINFO_SUPPORTED_PLATFORM AND CPUINFO_BUILD_BENCHMARKS) ENDIF() # ---[ cpuinfo library -SET(CPUINFO_SRCS src/api.c src/cache.c src/maininit.c src/log.c) +SET(CPUINFO_SRCS src/api.c src/cache.c src/init.c src/log.c) IF(CPUINFO_SUPPORTED_PLATFORM) IF(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?)$" OR IOS_ARCH MATCHES "^(i386|x86_64)$")) LIST(APPEND CPUINFO_SRCS - src/x86/x86init.c + src/x86/init.c src/x86/info.c src/x86/vendor.c src/x86/uarch.c src/x86/name.c - src/x86/x86topology.c + src/x86/topology.c src/x86/isa.c - src/x86/cache/cacheinit.c + src/x86/cache/init.c src/x86/cache/descriptor.c src/x86/cache/deterministic.c) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" OR CMAKE_SYSTEM_NAME STREQUAL "Android") diff --git a/src/maininit.c b/src/init.c similarity index 100% rename from src/maininit.c rename to src/init.c diff --git a/src/x86/cache/cacheinit.c b/src/x86/cache/init.c similarity index 100% rename from src/x86/cache/cacheinit.c rename to src/x86/cache/init.c diff --git a/src/x86/x86init.c b/src/x86/init.c similarity index 100% rename from src/x86/x86init.c rename to src/x86/init.c diff --git a/src/x86/x86topology.c b/src/x86/topology.c similarity index 100% rename from src/x86/x86topology.c rename to src/x86/topology.c From 8fb65ea436c5605cf976e2752227e1ea8a840051 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Thu, 22 May 2025 16:06:17 -0700 Subject: [PATCH 52/62] [CI] Fix riscv64-in-qemu build (#295) Tried a few things, but looks like all one needs to do is to add `--platform linux/riscv64` flag to `docker run` command --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0373cc99..13942231 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -111,7 +111,7 @@ jobs: uses: docker/setup-qemu-action@v3.0.0 - name: Build cpuinfo in ${{ matrix.build_props[1] }} run: | - docker run -i -v $(pwd):/cpuinfo ${{ matrix.build_props[1] }} /bin/bash -c " + docker run --platform linux/riscv64 -i -v $(pwd):/cpuinfo ${{ matrix.build_props[1] }} /bin/bash -c " apt update && apt install -y cmake git gcc g++ && cd /cpuinfo && From 0cc4241067e7726a9b4a70528e65da5633e1533e Mon Sep 17 00:00:00 2001 From: enh-google Date: Thu, 22 May 2025 20:45:30 -0400 Subject: [PATCH 53/62] riscv-hw.c: match kernel type in syscall(). (#292) syscall() doesn't care about these types at all, and the kernel uses cpu_set_t, so we're better off just removing the cast entirely. --- src/riscv/linux/riscv-hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riscv/linux/riscv-hw.c b/src/riscv/linux/riscv-hw.c index d737acad..63a92e24 100644 --- a/src/riscv/linux/riscv-hw.c +++ b/src/riscv/linux/riscv-hw.c @@ -112,7 +112,7 @@ void cpuinfo_riscv_linux_decode_vendor_uarch_from_hwprobe( * * for more details. */ - int ret = syscall(NR_riscv_hwprobe, pairs, pairs_count, cpu_set_size, (unsigned long*)cpu_set, 0 /* flags */); + int ret = syscall(NR_riscv_hwprobe, pairs, pairs_count, cpu_set_size, cpu_set, 0 /* flags */); #else int ret = __riscv_hwprobe(pairs, pairs_count, cpu_set_size, (unsigned long*)cpu_set, 0 /* flags */); #endif From 2776fccc8639c4037c4cb53f4081fdcf069dc0b0 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Thu, 29 May 2025 13:26:30 -0700 Subject: [PATCH 54/62] update apple soc info (#290) * [WIP] update apple soc info Summary: Added support for A16, A17, A18, A18 pro. Reg values are found from ncnn and needs validation. Additional source Constants are taken from https://github.com/apple-oss-distributions/xnu/blob/e3723e1f17661b24996789d8afc084c0c3303b26/osfmk/mach/machine.h#L449 Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "[WIP] update apple soc info" Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "[WIP] update apple soc info" Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "[WIP] update apple soc info" Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] * Update on "update apple soc info" Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] --- include/cpuinfo.h | 16 +++++++++++++ src/arm/mach/init.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ tools/cpu-info.c | 16 +++++++++++++ 3 files changed, 88 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index f1d35d4d..f0afcc0f 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -581,6 +581,22 @@ enum cpuinfo_uarch { cpuinfo_uarch_avalanche = 0x0070010D, /** Apple A15 / M2 processor (little cores). */ cpuinfo_uarch_blizzard = 0x0070010E, + /** Apple A16 processor (big cores). */ + cpuinfo_uarch_everest = 0x00700200, + /** Apple A16 processor (little cores). */ + cpuinfo_uarch_sawtooth = 0x00700201, + /** Apple A17 processor (big cores). */ + cpuinfo_uarch_coll_everest = 0x00700202, + /** Apple A17 processor (little cores). */ + cpuinfo_uarch_coll_sawtooth = 0x00700203, + /** Apple A18 processor (big cores). */ + cpuinfo_uarch_tupai_everest = 0x00700204, + /** Apple A18 processor (little cores). */ + cpuinfo_uarch_tupai_sawtooth = 0x00700205, + /** Apple A18 pro processor (big cores). */ + cpuinfo_uarch_tahiti_everest = 0x00700206, + /** Apple A18 pro processor (little cores). */ + cpuinfo_uarch_tahiti_sawtooth = 0x00700207, /** Cavium ThunderX. */ cpuinfo_uarch_thunderx = 0x00800100, diff --git a/src/arm/mach/init.c b/src/arm/mach/init.c index c4e6521b..2980f23a 100644 --- a/src/arm/mach/init.c +++ b/src/arm/mach/init.c @@ -27,6 +27,45 @@ #ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D #endif +// Following are copied over from ncnn/src/cpu.cpp +// A16 +#ifndef CPUFAMILY_ARM_EVEREST_SAWTOOTH +#define CPUFAMILY_ARM_EVEREST_SAWTOOTH 0x8765edea +#endif +// A17 +#ifndef CPUFAMILY_ARM_COLL +#define CPUFAMILY_ARM_COLL 0x2876f5b5 +#endif +// A18 +#ifndef CPUFAMILY_ARM_TUPAI +#define CPUFAMILY_ARM_TUPAI 0x204526d0 +#endif +// A18 Pro +#ifndef CPUFAMILY_ARM_TAHITI +#define CPUFAMILY_ARM_TAHITI 0x75d4acb9 +#endif +// For M3/M4 we need to populate more information about +// efficiency and perf cores. +// M3 +#ifndef CPUFAMILY_ARM_IBIZA +#define CPUFAMILY_ARM_IBIZA 0xfa33415e +#endif +// M3 Pro +#ifndef CPUFAMILY_ARM_LOBOS +#define CPUFAMILY_ARM_LOBOS 0x5f4dea93 +#endif +// M3 Max +#ifndef CPUFAMILY_ARM_PALMA +#define CPUFAMILY_ARM_PALMA 0x72015832 +#endif +// M4 +#ifndef CPUFAMILY_ARM_DONAN +#define CPUFAMILY_ARM_DONAN 0x6f5129ac +#endif +// M4 Pro / M4 Max +#ifndef CPUFAMILY_ARM_BRAVA +#define CPUFAMILY_ARM_BRAVA 0x17d5b93a +#endif struct cpuinfo_arm_isa cpuinfo_isa = { .aes = true, @@ -93,6 +132,23 @@ static enum cpuinfo_uarch decode_uarch(uint32_t cpu_family, uint32_t core_index, case CPUFAMILY_ARM_AVALANCHE_BLIZZARD: /* Hexa-core: 2x Avalanche + 4x Blizzard */ return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard; + case CPUFAMILY_ARM_EVEREST_SAWTOOTH: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_everest : cpuinfo_uarch_sawtooth; + return core_index + 4 < core_count ? cpuinfo_uarch_avalanche : cpuinfo_uarch_blizzard; + case CPUFAMILY_ARM_COLL: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_coll_everest : cpuinfo_uarch_coll_sawtooth; + + case CPUFAMILY_ARM_TUPAI: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_tupai_everest : cpuinfo_uarch_tupai_sawtooth; + + case CPUFAMILY_ARM_TAHITI: + /* Hexa-core: 2x Avalanche + 4x Blizzard */ + return core_index + 4 < core_count ? cpuinfo_uarch_tahiti_everest + : cpuinfo_uarch_tahiti_sawtooth; + default: /* Use hw.cpusubtype for detection */ break; diff --git a/tools/cpu-info.c b/tools/cpu-info.c index ca3ebfad..5f8a1158 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -270,6 +270,22 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Avalanche"; case cpuinfo_uarch_blizzard: return "Blizzard"; + case cpuinfo_uarch_everest: + return "Everest"; + case cpuinfo_uarch_sawtooth: + return "Sawtooth"; + case cpuinfo_uarch_coll_everest: + return "Coll_Everest"; + case cpuinfo_uarch_coll_sawtooth: + return "Coll_Sawtooth"; + case cpuinfo_uarch_tupai_everest: + return "Tupai_Everest"; + case cpuinfo_uarch_tupai_sawtooth: + return "Tupai_Sawtooth"; + case cpuinfo_uarch_tahiti_everest: + return "Tahiti_Everest"; + case cpuinfo_uarch_tahiti_sawtooth: + return "Tahiti_Sawtooth"; case cpuinfo_uarch_thunderx: return "ThunderX"; case cpuinfo_uarch_thunderx2: From 5a1b0499d3a04c65eb49131f88e38ac92e2682fe Mon Sep 17 00:00:00 2001 From: fbarchard Date: Mon, 2 Jun 2025 09:54:50 -0700 Subject: [PATCH 55/62] Add Intel Crestmont uarch (#299) Darkmont is the uarch used in Sierra Forest Tested: make cpu-info sde -srf -- ./cpu-info --- include/cpuinfo.h | 2 ++ src/x86/uarch.c | 2 ++ tools/cpu-info.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index f0afcc0f..a26272fc 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -371,6 +371,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_goldmont = 0x00100404, /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ cpuinfo_uarch_goldmont_plus = 0x00100405, + /** Intel Crestmont microarchitecture (Sierra Forest). */ + cpuinfo_uarch_crestmont = 0x00100407, /** Intel Knights Ferry HPC boards. */ cpuinfo_uarch_knights_ferry = 0x00100500, diff --git a/src/x86/uarch.c b/src/x86/uarch.c index a21eabb0..93e49e56 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -185,6 +185,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x5A: // Moorefield case 0x5D: // SoFIA return cpuinfo_uarch_silvermont; + case 0xAF: // Sierra Forest + return cpuinfo_uarch_crestmont; case 0x4C: // Braswell, Cherry // Trail case 0x75: // Spreadtrum diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 5f8a1158..85033918 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -90,6 +90,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Saltwell"; case cpuinfo_uarch_silvermont: return "Silvermont"; + case cpuinfo_uarch_crestmont: + return "Crestmont"; case cpuinfo_uarch_airmont: return "Airmont"; case cpuinfo_uarch_goldmont: From 9dd88ad8815bf0ddd7dbc7e011cb154965f720fe Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Tue, 3 Jun 2025 09:50:51 -0700 Subject: [PATCH 56/62] Disable neon dot on unknown chipsets on aarch32 (#300) --- src/arm/linux/aarch32-isa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c index 29663a70..d87a964a 100644 --- a/src/arm/linux/aarch32-isa.c +++ b/src/arm/linux/aarch32-isa.c @@ -149,6 +149,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo( cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310"); } else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_ums && chipset->model == 312) { cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc UMS312"); + } else if (chipset->vendor == cpuinfo_arm_chipset_vendor_unknown) { + cpuinfo_log_warning("VDOT instructions disabled: unknown chipset"); } else { switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) { case UINT32_C(0x4100D0B0): /* Cortex-A76 */ From bed006e7264dbd7a59db8907342555deea2fc906 Mon Sep 17 00:00:00 2001 From: fbarchard Date: Mon, 9 Jun 2025 14:42:28 -0700 Subject: [PATCH 57/62] Add Intel Darkmont uarch (#298) * Add Intel Darkmont uarch - Darkmont is the uarch used in Clearwater Forest * Add Intel Darkmont uarch - Darkmont is the uarch used in Clearwater Forest * Add Intel Darkmont uarch - Darkmont is the uarch used in Clearwater Forest --- include/cpuinfo.h | 2 ++ src/x86/uarch.c | 2 ++ tools/cpu-info.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index a26272fc..0598a6f0 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -384,6 +384,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_knights_hill = 0x00100503, /** Intel Knights Mill Xeon Phi. */ cpuinfo_uarch_knights_mill = 0x00100504, + /** Intel Darkmont microarchitecture (e-core used in Clearwater Forest). */ + cpuinfo_uarch_darkmont = 0x00100505, /** Intel/Marvell XScale series. */ cpuinfo_uarch_xscale = 0x00100600, diff --git a/src/x86/uarch.c b/src/x86/uarch.c index 93e49e56..c4471b6a 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -203,6 +203,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( return cpuinfo_uarch_knights_landing; case 0x85: return cpuinfo_uarch_knights_mill; + case 0xDD: // Clearwater Forest + return cpuinfo_uarch_darkmont; } break; case 0x0F: diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 85033918..70f78f88 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -98,6 +98,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Goldmont"; case cpuinfo_uarch_goldmont_plus: return "Goldmont Plus"; + case cpuinfo_uarch_darkmont: + return "Darkmont"; case cpuinfo_uarch_knights_ferry: return "Knights Ferry"; case cpuinfo_uarch_knights_corner: From 85c33463801fd6447c4daeda90657bdda8f60716 Mon Sep 17 00:00:00 2001 From: Richard Winterton Date: Tue, 10 Jun 2025 08:00:39 -0600 Subject: [PATCH 58/62] Added Willow Cove (#301) * Added Willow Cove * Update uarch.c removed incomplete modification in naming * Update cpu-info.c moved willow_cove up two lines to be next to sunny_cove * Update cpuinfo.h made the enum consistent with uarch naming following sunny_cove * Update cpuinfo.h updated comments to refer to intel microarchitecture per request to keep consistent. --- include/cpuinfo.h | 2 ++ src/x86/uarch.c | 3 +++ tools/cpu-info.c | 2 ++ 3 files changed, 7 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 0598a6f0..0d3f6a05 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -353,6 +353,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_palm_cove = 0x0010020B, /** Intel Sunny Cove microarchitecture (10 nm, Ice Lake). */ cpuinfo_uarch_sunny_cove = 0x0010020C, + /** Intel Willow Cove microarchitecture (10 nm, Tiger Lake). */ + cpuinfo_uarch_willow_cove = 0x0010020D, /** Pentium 4 with Willamette, Northwood, or Foster cores. */ cpuinfo_uarch_willamette = 0x00100300, diff --git a/src/x86/uarch.c b/src/x86/uarch.c index c4471b6a..146f2dd5 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -168,6 +168,9 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x7E: // Ice Lake-U return cpuinfo_uarch_sunny_cove; + case 0x8C: // Tiger U + case 0x8D: // Tiger H + return cpuinfo_uarch_willow_cove; /* Low-power cores */ case 0x1C: // Diamondville, // Silverthorne, diff --git a/tools/cpu-info.c b/tools/cpu-info.c index 70f78f88..bbfc1040 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -80,6 +80,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Palm Cove"; case cpuinfo_uarch_sunny_cove: return "Sunny Cove"; + case cpuinfo_uarch_willow_cove: + return "Willow Cove"; case cpuinfo_uarch_willamette: return "Willamette"; case cpuinfo_uarch_prescott: From c5be6dcb9c800a8cddbdfdfc27d122ffa73760db Mon Sep 17 00:00:00 2001 From: fbarchard Date: Wed, 25 Jun 2025 21:30:09 -0700 Subject: [PATCH 59/62] Add Intel Gracemont uarch (#303) --- include/cpuinfo.h | 2 ++ src/x86/uarch.c | 2 ++ tools/cpu-info.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/include/cpuinfo.h b/include/cpuinfo.h index 0d3f6a05..891416ab 100644 --- a/include/cpuinfo.h +++ b/include/cpuinfo.h @@ -373,6 +373,8 @@ enum cpuinfo_uarch { cpuinfo_uarch_goldmont = 0x00100404, /** Intel Goldmont Plus microarchitecture (Gemini Lake). */ cpuinfo_uarch_goldmont_plus = 0x00100405, + /** Intel Gracemont microarchitecture (Twin Lake). */ + cpuinfo_uarch_gracemont = 0x00100406, /** Intel Crestmont microarchitecture (Sierra Forest). */ cpuinfo_uarch_crestmont = 0x00100407, diff --git a/src/x86/uarch.c b/src/x86/uarch.c index 146f2dd5..2f4292bb 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -188,6 +188,8 @@ enum cpuinfo_uarch cpuinfo_x86_decode_uarch( case 0x5A: // Moorefield case 0x5D: // SoFIA return cpuinfo_uarch_silvermont; + case 0xBE: // Twin Lake + return cpuinfo_uarch_gracemont; case 0xAF: // Sierra Forest return cpuinfo_uarch_crestmont; case 0x4C: // Braswell, Cherry diff --git a/tools/cpu-info.c b/tools/cpu-info.c index bbfc1040..d1ba2d48 100644 --- a/tools/cpu-info.c +++ b/tools/cpu-info.c @@ -92,6 +92,8 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) { return "Saltwell"; case cpuinfo_uarch_silvermont: return "Silvermont"; + case cpuinfo_uarch_gracemont: + return "Gracemont"; case cpuinfo_uarch_crestmont: return "Crestmont"; case cpuinfo_uarch_airmont: From 02da1224b2ffd2603d523c3b4663df4c7a995dcc Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Wed, 25 Sep 2024 12:42:21 +0200 Subject: [PATCH 60/62] Update cpuinfo.h From 726f9f9d1108bc469791888f64928a01e9950c16 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Tue, 27 Aug 2024 12:38:43 +0200 Subject: [PATCH 61/62] Add detection for `sme` on `aarch64`. From bfde4595b95c33d36cb54418b5279bd04b8038e3 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet Date: Wed, 25 Sep 2024 12:42:21 +0200 Subject: [PATCH 62/62] Update cpuinfo.h