From 175026f597aa2c352d27758907371a805764cc00 Mon Sep 17 00:00:00 2001 From: Ryan Mansfield Date: Fri, 16 Jan 2026 11:49:41 -0500 Subject: [PATCH] Add -d archs data source for Mach-O universal binaries Implements the 'archs' data source to break down universal binaries by architecture. This allows users to: - View file size breakdown by architecture: bloaty -d archs - Filter to specific architecture: bloaty -d archs,segments --source-filter=arm64 - Hierarchical breakdown: bloaty -d archs,segments --- src/bloaty.cc | 1 + src/bloaty.h | 1 + src/elf.cc | 2 + src/macho.cc | 84 +++++++++++++ tests/macho/archs.test | 263 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 351 insertions(+) create mode 100644 tests/macho/archs.test diff --git a/src/bloaty.cc b/src/bloaty.cc index f489c60..631b96b 100644 --- a/src/bloaty.cc +++ b/src/bloaty.cc @@ -85,6 +85,7 @@ struct DataSourceDefinition { constexpr DataSourceDefinition data_sources[] = { {DataSource::kArchiveMembers, "armembers", "the .o files in a .a file"}, + {DataSource::kArchs, "archs", "architecture slices in universal binaries"}, {DataSource::kCompileUnits, "compileunits", "source file for the .o file (translation unit). requires debug info."}, {DataSource::kInputFiles, "inputfiles", diff --git a/src/bloaty.h b/src/bloaty.h index 85515fc..c9e2d1f 100644 --- a/src/bloaty.h +++ b/src/bloaty.h @@ -58,6 +58,7 @@ enum class DataSource { kRawRanges, kSections, kSegments, + kArchs, // We always set this to one of the concrete symbol types below before // setting it on a sink. diff --git a/src/elf.cc b/src/elf.cc index ac0f610..de2df46 100644 --- a/src/elf.cc +++ b/src/elf.cc @@ -1401,6 +1401,8 @@ class ElfObjectFile : public ObjectFile { DoReadELFSections(sink, kReportByEscapedSectionName); break; } + case DataSource::kArchs: + THROW("ELF files do not support 'archs' data source"); default: THROW("unknown data source"); } diff --git a/src/macho.cc b/src/macho.cc index cd826fe..a2b8564 100644 --- a/src/macho.cc +++ b/src/macho.cc @@ -21,6 +21,7 @@ #include #include "absl/strings/str_join.h" +#include "absl/strings/str_format.h" #include "absl/strings/substitute.h" #include "third_party/darwin_xnu_macho/mach-o/loader.h" #include "third_party/darwin_xnu_macho/mach-o/fat.h" @@ -69,6 +70,57 @@ void MaybeAddOverhead(RangeSink* sink, const char* label, string_view data) { } } +// ARM64E capability field constants +static constexpr uint32_t ARM64E_SUBTYPE_MASK = 0x00FFFFFF; // Low 24 bits: subtype proper + +static bool IsArm64eSubtype(uint32_t cpusubtype) { + uint32_t subtype_proper = cpusubtype & ARM64E_SUBTYPE_MASK; + return subtype_proper == CPU_SUBTYPE_ARM64E; +} + +std::string CpuTypeToString(uint32_t cputype, uint32_t cpusubtype) { + switch (cputype) { + case CPU_TYPE_X86_64: + switch (cpusubtype) { + case CPU_SUBTYPE_X86_64_H: + return "x86_64h"; + default: + return "x86_64"; + } + case CPU_TYPE_ARM64: + if (IsArm64eSubtype(cpusubtype)) { + return "arm64e"; + } + switch (cpusubtype) { + case CPU_SUBTYPE_ARM64_V8: + return "arm64v8"; + default: + return "arm64"; + } + case CPU_TYPE_X86: + return "i386"; + case CPU_TYPE_ARM: + switch (cpusubtype) { + case CPU_SUBTYPE_ARM_V6: + return "armv6"; + case CPU_SUBTYPE_ARM_V7: + return "armv7"; + case CPU_SUBTYPE_ARM_V7F: + return "armv7f"; + case CPU_SUBTYPE_ARM_V7S: + return "armv7s"; + case CPU_SUBTYPE_ARM_V7K: + return "armv7k"; + case CPU_SUBTYPE_ARM_V8: + return "armv8"; + default: + return "arm"; + } + default: + return absl::StrFormat("cpu_%d", cputype); + } +} + struct LoadCommand { bool is64bit; uint32_t cmd; @@ -619,6 +671,10 @@ class MachOObjectFile : public ObjectFile { ReadDWARFInlines(dwarf, sink, true); break; } + case DataSource::kArchs: { + ProcessArchitectures(sink); + break; + } case DataSource::kArchiveMembers: default: THROW("Mach-O doesn't support this data source"); @@ -627,6 +683,34 @@ class MachOObjectFile : public ObjectFile { } } + void ProcessArchitectures(RangeSink* sink) const { + uint32_t magic = ReadMagic(file_data().data()); + + if (magic == FAT_CIGAM) { + string_view header_data = file_data().data(); + auto header = GetStructPointerAndAdvance(&header_data); + uint32_t nfat_arch = ByteSwap(header->nfat_arch); + + for (uint32_t i = 0; i < nfat_arch; i++) { + auto arch = GetStructPointerAndAdvance(&header_data); + uint32_t cputype = ByteSwap(arch->cputype); + uint32_t cpusubtype = ByteSwap(arch->cpusubtype); + uint32_t offset = ByteSwap(arch->offset); + uint32_t size = ByteSwap(arch->size); + + std::string arch_name = CpuTypeToString(cputype, cpusubtype); + string_view slice_data = StrictSubstr(file_data().data(), offset, size); + + sink->AddFileRange("archs", arch_name, slice_data); + } + } else { + auto header = GetStructPointer(file_data().data()); + std::string arch_name = CpuTypeToString(header->cputype, header->cpusubtype); + + sink->AddFileRange("archs", arch_name, file_data().data()); + } + } + bool GetDisassemblyInfo(std::string_view /*symbol*/, DataSource /*symbol_source*/, DisassemblyInfo* /*info*/) const override { diff --git a/tests/macho/archs.test b/tests/macho/archs.test new file mode 100644 index 0000000..a8374b4 --- /dev/null +++ b/tests/macho/archs.test @@ -0,0 +1,263 @@ +# Test -d archs data source for mach-o universal binaries +# +# Tests that the 'archs' data source correctly reports architecture slices +# in universal binaries and single-architecture binaries. + +## Test 1: Universal binary with two architectures (x86_64 and arm64) +# RUN: %yaml2obj --docnum=1 %s -o %t.universal +# RUN: %bloaty %t.universal -d archs --domain=file | %FileCheck --check-prefix=UNIVERSAL %s + +# UNIVERSAL: FILE SIZE +# UNIVERSAL-DAG: x86_64 +# UNIVERSAL-DAG: arm64 +# UNIVERSAL-DAG: [Unmapped] + +## Test 2: Filter to x86_64 architecture only +# RUN: %bloaty %t.universal -d archs,segments --source-filter=x86_64 --domain=file | %FileCheck --check-prefix=FILTER-X86 %s + +# FILTER-X86: FILE SIZE +# FILTER-X86: x86_64 +# FILTER-X86: __TEXT +# FILTER-X86: __LINKEDIT +# FILTER-X86-NOT: arm64 + +## Test 3: Filter to arm64 architecture only +# RUN: %bloaty %t.universal -d archs,segments --source-filter=arm64 --domain=file | %FileCheck --check-prefix=FILTER-ARM %s + +# FILTER-ARM: FILE SIZE +# FILTER-ARM: arm64 +# FILTER-ARM: __TEXT +# FILTER-ARM: __LINKEDIT +# FILTER-ARM-NOT: x86_64 + +## Test 4: Single architecture binary +# RUN: %yaml2obj --docnum=2 %s -o %t.single +# RUN: %bloaty %t.single -d archs --domain=file | %FileCheck --check-prefix=SINGLE %s + +# SINGLE: FILE SIZE +# SINGLE: x86_64 +# SINGLE-NOT: arm64 + +## Universal binary with x86_64 and arm64 slices +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x1000007 + cpusubtype: 0x3 + offset: 0x1000 + size: 4176 + align: 12 + - cputype: 0x100000C + cpusubtype: 0x0 + offset: 0x2050 + size: 8280 + align: 12 +Slices: + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - _main + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100001F80 + size: 8 + offset: 0x1F80 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 00008052C0035FD6 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294975488 + vmsize: 4096 + fileoff: 8192 + filesize: 88 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294975360 + StringTable: + - ' ' + - _main + +## Single x86_64 Mach-O executable +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x2 + ncmds: 3 + sizeofcmds: 328 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 4096 + fileoff: 0 + filesize: 4096 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100000F80 + size: 8 + offset: 0xF80 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294971392 + vmsize: 4096 + fileoff: 4096 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294971264 + StringTable: + - ' ' + - _main