diff --git a/include/orc/ar.hpp b/include/orc/ar.hpp index 2108586..4040c13 100644 --- a/include/orc/ar.hpp +++ b/include/orc/ar.hpp @@ -18,6 +18,6 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/coff.hpp b/include/orc/coff.hpp new file mode 100644 index 0000000..a8e22de --- /dev/null +++ b/include/orc/coff.hpp @@ -0,0 +1,27 @@ +// Copyright 2021 Adobe +// All Rights Reserved. +// +// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms +// of the Adobe license agreement accompanying it. + +#pragma once + +// stdc++ +#include + +// application +#include "orc/parse_file.hpp" + +//-------------------------------------------------------------------------------------------------- + +void read_coff(object_ancestry&& ancestry, + freader& s, + std::istream::pos_type end_pos, + file_details details, + reader_params params); + +//-------------------------------------------------------------------------------------------------- + +struct dwarf dwarf_from_coff(std::uint32_t ofd_index, reader_params params); + +//-------------------------------------------------------------------------------------------------- diff --git a/include/orc/fat.hpp b/include/orc/fat.hpp index 6df85fd..096858c 100644 --- a/include/orc/fat.hpp +++ b/include/orc/fat.hpp @@ -18,6 +18,6 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/macho.hpp b/include/orc/macho.hpp index b56715a..037fb7d 100644 --- a/include/orc/macho.hpp +++ b/include/orc/macho.hpp @@ -18,11 +18,11 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- -struct dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params); +struct dwarf dwarf_from_macho(std::uint32_t ofd_index, reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/include/orc/parse_file.hpp b/include/orc/parse_file.hpp index 7301281..e5aa0bf 100644 --- a/include/orc/parse_file.hpp +++ b/include/orc/parse_file.hpp @@ -44,6 +44,11 @@ struct freader { return _p - _f; } + std::size_t leftovers() const { + ADOBE_INVARIANT(*this); + return _l - _p; + } + void seekg(std::istream::off_type offset) { _p = _f + offset; ADOBE_INVARIANT(*this); @@ -70,13 +75,24 @@ struct freader { ADOBE_INVARIANT(*this); } - void read(char* p, std::size_t n) { + // Read a exactly `n` bytes into `p`. + // Assumes the bytes are plain old data. + void read(void* p, std::size_t n) { + ADOBE_INVARIANT(leftovers() > n); std::memcpy(p, _p, n); _p += n; ADOBE_INVARIANT(*this); } + // Read exactly `sizeof(T)` bytes into `x` + // Assumes the value is plain old data. + template + void read(T& x) { + read(&x, sizeof(T)); + } + char get() { + ADOBE_INVARIANT(leftovers() > 0); char result = *_p++; ADOBE_INVARIANT(*this); return result; @@ -153,6 +169,7 @@ struct file_details { macho, ar, fat, + coff, }; std::size_t _offset{0}; format _format{format::unknown}; @@ -181,7 +198,7 @@ void endian_swap(T& c) { template T read_pod(freader& s) { T x; - s.read(reinterpret_cast(&x), sizeof(T)); + s.read(x); return x; } @@ -193,7 +210,7 @@ inline bool read_pod(freader& s) { template T read_pod(freader& s, bool byteswap) { T x; - s.read(reinterpret_cast(&x), sizeof(T)); + s.read(&x, sizeof(T)); if (byteswap) { endian_swap(x); } @@ -227,18 +244,18 @@ constexpr std::decay_t copy(T&& value) noexcept(noexcept(std::decay_t{ //-------------------------------------------------------------------------------------------------- -enum class macho_reader_mode { +enum class reader_mode { invalid, register_dies, derive_dylibs, odrv_reporting, }; -struct macho_params { +struct reader_params { using register_dependencies_callback = std::function&&)>; - macho_reader_mode _mode{macho_reader_mode::invalid}; + reader_mode _mode{reader_mode::invalid}; std::filesystem::path _executable_path; // only required if mode == derive_dylibs register_dependencies_callback _register_dependencies; // only required if mode == derive_dylibs }; @@ -247,6 +264,6 @@ void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - macho_params params); + reader_params params); //-------------------------------------------------------------------------------------------------- diff --git a/src/ar.cpp b/src/ar.cpp index c1edc83..00b2bf1 100644 --- a/src/ar.cpp +++ b/src/ar.cpp @@ -34,7 +34,7 @@ void read_ar(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { std::string magic = read_fixed_string<8>(s); assert(magic == "!\n"); diff --git a/src/coff.cpp b/src/coff.cpp new file mode 100644 index 0000000..b3e250d --- /dev/null +++ b/src/coff.cpp @@ -0,0 +1,172 @@ +// Copyright 2021 Adobe +// All Rights Reserved. +// +// NOTICE: Adobe permits you to use, modify, and distribute this file in accordance with the terms +// of the Adobe license agreement accompanying it. + +// identity +#include "orc/coff.hpp" + +// stdc++ +#include + +// adobe contract checks +#include "adobe/contract_checks.hpp" + +// application +#include "orc/dwarf.hpp" +#include "orc/object_file_registry.hpp" +#include "orc/settings.hpp" // for globals + +//-------------------------------------------------------------------------------------------------- + +namespace { + +//-------------------------------------------------------------------------------------------------- +// +// Relevant documentation: +// - Portable Executable (PE) format: +// https://learn.microsoft.com/en-us/windows/win32/debug/pe-format +// - image_file_header: +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header +// - image_section_header: +// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header +// + +struct image_file_header { + std::int16_t machine{0}; + std::int16_t section_count{0}; + std::int32_t datetimestamp{0}; + std::int32_t symbol_table_pointer{0}; + std::int32_t symbol_count{0}; + std::int16_t optional_header_size{0}; + std::int16_t characteristics{0}; +}; + +static_assert(sizeof(image_file_header) == 20); + +struct image_section_header { + std::int8_t name[8]{0}; + union { + std::int32_t physical_address{0}; + std::int32_t virtual_size; + } misc; + std::int32_t virtual_address{0}; + std::int32_t raw_data_size{0}; + std::int32_t raw_data_pointer{0}; + std::int32_t relocations_pointer{0}; + std::int32_t line_numbers_pointer{0}; + std::int16_t relocations_count{0}; + std::int16_t line_numbers_count{0}; + std::int32_t characteristics{0}; +}; + +static_assert(sizeof(image_section_header) == 40); + +struct section { + image_section_header header; + std::string actual_name; +}; + +//-------------------------------------------------------------------------------------------------- +#if 0 +/// Similar to strlen, except with an upper limit as to the size of the string. +/// APPARENTLY this is already available in some POSIX extensions \ macOS. +/// Keeping this around just in case. +std::size_t strnlen(const char* s, std::size_t n) { + std::size_t result{0}; + for (; *s; ++s) { + if (++result == n) { + break; + } + } + return result; +} +#endif +//-------------------------------------------------------------------------------------------------- + +} // namespace + +//-------------------------------------------------------------------------------------------------- + +void read_coff(object_ancestry&& ancestry, + freader& s, + std::istream::pos_type end_pos, + file_details details, + reader_params params) { + std::uint32_t ofd_index = + static_cast(object_file_register(std::move(ancestry), copy(details))); + + dwarf_from_coff(ofd_index, std::move(params)).process_all_dies(); +} + +//-------------------------------------------------------------------------------------------------- + +dwarf dwarf_from_coff(std::uint32_t ofd_index, reader_params params) { + const auto& entry = object_file_fetch(ofd_index); + freader s(entry._ancestry.begin()->allocate_path()); + dwarf dwarf(ofd_index, copy(s), copy(entry._details)); + + s.seekg(entry._details._offset); + + // If you hit this, you're running ORC in a mode not supported by COFF. + ADOBE_INVARIANT(params._mode == reader_mode::register_dies || + params._mode == reader_mode::odrv_reporting); + + // The general format of COFF is: + // header + // section headers + // section data + // symbols + // strings + // In our case, we're just looking for the DWARF data, which is housed + // in one of the "raw data" blocks in COFF sections whose names are + // the DWARF segments we are interested in (debug_info, debug_abbrev, + // etc.) So we don't need to read anything beyond the section headers. + + const auto header = read_pod(s); + + // According to the PE format docs there should be no optional header for object files. + ADOBE_INVARIANT(header.optional_header_size == 0); + + // Grab the string table offset and size, which we'll need when deriving + // the name of some of the sections we read below. + const auto string_table_offset = header.symbol_table_pointer + header.symbol_count * 18; + const auto string_table_size = + temp_seek(s, string_table_offset, [&] { return read_pod(s); }); + + // Read the section headers. As we go, derive the actual section header + // name, which may be in the string table. If the name is a DWARF segment, + // add it to the DWARF processor. + std::vector
sections(header.section_count); + for (auto& section : sections) { + s.read(section.header); + const char* name = reinterpret_cast(§ion.header.name[0]); + if (*name != '/') { + // strnlen is the same as strlen but with a string length upper limit. + // Apparently its available via POSIX extension? Who knew. Not this guy. + std::size_t len = strnlen(name, 8); + section.actual_name = std::string(name, len); + } else { + ++name; + int section_name_offset = std::atoi(name); + ADOBE_INVARIANT(section_name_offset < string_table_size); + section.actual_name = temp_seek(s, string_table_offset + section_name_offset, + [&] { return s.read_c_string_view(); }); + } + + if (section.actual_name.starts_with(".debug")) { + // std::cout << section.actual_name << '\n'; + dwarf.register_section(section.actual_name, section.header.raw_data_pointer, + section.header.raw_data_size); + } + } + + if (params._mode == reader_mode::register_dies) { + ++globals::instance()._object_file_count; + } + + return dwarf; +} + +//-------------------------------------------------------------------------------------------------- diff --git a/src/dwarf.cpp b/src/dwarf.cpp index 3cd11e9..fd1f138 100644 --- a/src/dwarf.cpp +++ b/src/dwarf.cpp @@ -655,7 +655,9 @@ struct dwarf::implementation { void register_section(const std::string& name, std::size_t offset, std::size_t size); - bool register_sections_done(); + // Called in between on-disk DWARF section discovery and die processing + // to set up the state machine to process dies. + void finalize_section_registration(); void report_die_processing_failure(std::size_t die_absolute_offset, std::string&& error); void process_all_dies(); @@ -729,6 +731,7 @@ struct dwarf::implementation { std::size_t _cu_die_offset{ 0}; // offset of the `compile_unit` die. Relative to start of `debug_info` pool_string _cu_compilation_directory; + std::optional _cu_str_offsets_base; std::uint32_t _ofd_index{0}; // index to the obj_registry in macho.cpp section _debug_abbrev; section _debug_info; @@ -798,17 +801,17 @@ void dwarf::implementation::register_section(const std::string& name, // Instead, the section registration must be complete and cannot be revisited. ADOBE_PRECONDITION(!_ready); - if (name == "__debug_str") { + if (name == "__debug_str" || name == ".debug_str") { _debug_str = section{offset, size}; - } else if (name == "__debug_info") { + } else if (name == "__debug_info" || name == ".debug_info") { _debug_info = section{offset, size}; - } else if (name == "__debug_abbrev") { + } else if (name == "__debug_abbrev" || name == ".debug_abbrev") { _debug_abbrev = section{offset, size}; - } else if (name == "__debug_line") { + } else if (name == "__debug_line" || name == ".debug_line") { _debug_line = section{offset, size}; - } else if (name == "__debug_line_str__DWARF") { + } else if (name == "__debug_line_str__DWARF" || name == ".debug_line_str") { _debug_line_str = section{offset, size}; - } else if (name == "__debug_str_offs__DWARF") { + } else if (name == "__debug_str_offs__DWARF" || name == ".debug_str_offsets") { _debug_str_offsets = section{offset, size}; } else { // save for debugging. @@ -936,53 +939,30 @@ pool_string dwarf::implementation::read_debug_line_str(std::size_t offset) { //-------------------------------------------------------------------------------------------------- // SPECREF: DWARF5 page 26 (8) line 28 -- v4 -> v5 changes -pool_string dwarf::implementation::read_debug_str_offs(std::size_t entry) { - if (const auto found = _debug_str_offs_cache.find(entry); +pool_string dwarf::implementation::read_debug_str_offs(std::size_t index) { + if (const auto found = _debug_str_offs_cache.find(index); found != _debug_str_offs_cache.end()) { return found->second; } - // SPECREF: DWARF5 page 259 (241) line 6 -- - // Apparently `DW_AT_str_offsets_base` points to the first entry in this table, but I am not - // sure where that attribute lives. So we'll take the time to derive that offset every time. - // If that becomes too expensive we can revisit hunting down `DW_AT_str_offsets_base` and - // caching it. - - // This section contains a header, then a series of offsets stored as 4- or 8-byte - // values, then a series of strings. So we have to jump twice: first to get - // the offset, then to get the string. The 0th string immediately follows the last - // entry offset. - - const std::size_t entry_offset = temp_seek(_s, _debug_str_offsets._offset, [&] { - const std::size_t startoff = _s.tellg(); - // SPECREF: DWARF5 page 258 (240) line 9 -- string offsets table details - const std::uint64_t length = read_initial_length(); - const std::uint16_t version = read16(); - ADOBE_INVARIANT(version == 5); - const std::uint16_t padding = read16(); - ADOBE_INVARIANT(padding == 0); - const std::size_t endoff = _s.tellg(); - const std::size_t header_size = endoff - startoff; - - // length does not include itself. So the on-disk size taken - // up by the entry offsets is the length minus version and padding. - const std::size_t entry_offsets_size = length - 4; - - // At this point tellg() is at the 0th entry offset value. - // To get the entry offset value we are interested in, we - // temp seek to its location and read 4 bytes. (Note that - // all of this assumes 32-bit DWARF.) - std::size_t entry_offset = 4 * entry; - const std::uint32_t entry_offset_value = - temp_seek(_s, entry_offset, std::ios::cur, [&] { return read32(); }); - - // This result is relative to `_debug_str_offsets._offset`. - return header_size + entry_offsets_size + entry_offset_value; - }); + // It is possible for the compilation unit header itself to + // want to use debug_str_offsets _before_ + // `DW_AT_str_offsets_base` has been encountered. In + // such case we punt on the resolved value, and hope + // we don't actually need it while processing dies. + if (!_cu_str_offsets_base) { + static const auto no_resolution_k(empool("read_debug_str_offs_FIXME")); + return no_resolution_k; + } + + const auto base = _debug_str_offsets._offset + *_cu_str_offsets_base; + const auto offset = index * 4; // 8 on DWARF64? - return _debug_str_offs_cache[entry] = - temp_seek(_s, _debug_str_offsets._offset + entry_offset, - [&] { return empool(_s.read_c_string_view()); }); + const auto debug_str_offset = temp_seek(_s, base + offset, [&] { return read32(); }); + + // SPECREF: DWARF5 page 204 (186) line 23 -- these are offsets into + // the `.debug_str` section + return read_debug_str(debug_str_offset); } //-------------------------------------------------------------------------------------------------- @@ -1718,27 +1698,37 @@ attribute_value dwarf::implementation::process_form(const attribute& attr, case dw::form::strx: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read_uleb())); + // (cache the uint value for possible use later.) + result.uint(read_uleb()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx1: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read8())); + // (cache the uint value for possible use later.) + result.uint(read8()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx2: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read16())); + // (cache the uint value for possible use later.) + result.uint(read16()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx3: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read24())); + // (cache the uint value for possible use later.) + result.uint(read24()); + result.string(read_debug_str_offs(result.uint())); } break; case dw::form::strx4: { // First seen in Xcode 16.1 w/ DWARF5. // SPECREF: DWARF5 page 236 (218) line 31 - result.string(read_debug_str_offs(read32())); + // (cache the uint value for possible use later.) + result.uint(read32()); + result.string(read_debug_str_offs(result.uint())); } break; default: { handle_passover(); @@ -1885,11 +1875,21 @@ die_pair dwarf::implementation::abbreviation_to_die(std::size_t die_address, pro //-------------------------------------------------------------------------------------------------- -bool dwarf::implementation::register_sections_done() { - ADOBE_PRECONDITION(!_ready); +void dwarf::implementation::finalize_section_registration() { + if (_ready) { + return; + } // Houston, we have a problem. - if (!(_debug_info.valid() && _debug_abbrev.valid() && _debug_line.valid())) return false; + if (!_debug_info.valid()) { + throw std::runtime_error("Unread section: debug_info"); + } + if (!_debug_abbrev.valid()) { + throw std::runtime_error("Unread section: debug_abbrev"); + } + if (!_debug_line.valid()) { + throw std::runtime_error("Unread section: debug_line"); + } // the declaration files are 1-indexed. The 0th index is reserved for the compilation unit / // partial unit name. We need to prime this here because in single process mode we don't get @@ -1907,8 +1907,6 @@ bool dwarf::implementation::register_sections_done() { read_abbreviations(); _ready = true; - - return true; } //-------------------------------------------------------------------------------------------------- @@ -2124,7 +2122,8 @@ void dwarf::implementation::report_die_processing_failure(std::size_t die_addres * @throws std::runtime_error If DIE processing fails and cannot be recovered */ void dwarf::implementation::process_all_dies() { - if (!_ready && !register_sections_done()) return; + finalize_section_registration(); + ADOBE_PRECONDITION(_ready); auto section_begin = _debug_info._offset; @@ -2297,6 +2296,18 @@ void dwarf::implementation::post_process_compilation_unit_die( const die& die, const attribute_sequence& attributes) { _cu_die_offset = die._offset; + // SPECREF DWARF5 84 (66) line 1 -- + // The compilation unit header may (should?) have `DW_AT_str_offsets_base`. + // This is used to figure out where strings are coming from out of the + // debug_str_offsets section. Save this for later. + // + // SPECREF DWARF5 237 (219) line 18 -- + // This value is apparently of type `stroffsetsptr` which is a + // 4- or 8-byte unsigned value. + if (attributes.has(dw::at::str_offsets_base)) { + _cu_str_offsets_base = attributes.uint(dw::at::str_offsets_base); + } + // Spec (section 3.1.1) says that compilation and partial units may specify which // __debug_line subsection they want to draw their decl_files list from. This also // means we need to clear our current decl_files list (from index 1 to the end) @@ -2313,16 +2324,23 @@ void dwarf::implementation::post_process_compilation_unit_die( } // Grab the comp_dir value here, and apply it to relative paths so we can - // display the full path whenever necessary. - if (attributes.has_string(dw::at::comp_dir)) { - _cu_compilation_directory = attributes.string(dw::at::comp_dir); + // display the full path whenever necessary. We don't read the string + // directly, as the resolution of the string may have happened before + // `DW_AT_str_offsets_base` was found, and thus would be an invalid + // value. For this value specifically, then, we re-grab the string based + // on the offset. + if (attributes.has_uint(dw::at::comp_dir)) { + _cu_compilation_directory = read_debug_str_offs(attributes.uint(dw::at::comp_dir)); } // REVISIT (fosterbrereton): If the name is a relative path, there may be a // DW_AT_comp_dir attribute that specifies the path it is relative from. // Is it worth making this path absolute? + // + // (This string suffers from the same `DW_AT_str_offsets_base` issue as + // comp_dir, hence the call to `read_debug_str_offs`.) - _decl_files[0] = attributes.string(dw::at::name); + _decl_files[0] = read_debug_str_offs(attributes.uint(dw::at::name)); } //-------------------------------------------------------------------------------------------------- @@ -2415,7 +2433,7 @@ die_pair dwarf::implementation::fetch_one_die(std::size_t die_offset, ZoneScoped; #endif // ORC_FEATURE(PROFILE_DIE_DETAILS) - if (!_ready && !register_sections_done()) throw std::runtime_error("dwarf setup failed"); + finalize_section_registration(); _cu_header_offset = cu_header_offset; diff --git a/src/fat.cpp b/src/fat.cpp index 5636f5a..cad400c 100644 --- a/src/fat.cpp +++ b/src/fat.cpp @@ -44,7 +44,7 @@ void read_fat(object_ancestry&& ancestry, freader& s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { auto header = read_pod(s); if (details._needs_byteswap) { endian_swap(header.magic); diff --git a/src/macho.cpp b/src/macho.cpp index ccb6e97..3a55434 100644 --- a/src/macho.cpp +++ b/src/macho.cpp @@ -46,10 +46,10 @@ struct macho_reader { macho_reader(std::uint32_t ofd_index, freader&& s, file_details&& details, - macho_params&& params) + reader_params&& params) : _ofd_index(ofd_index), _s(std::move(s)), _details(std::move(details)), _params(std::move(params)), _dwarf(ofd_index, copy(_s), copy(_details)) { - if (params._mode == macho_reader_mode::invalid) { + if (params._mode == reader_mode::invalid) { cerr_safe([&](auto& s) { s << "Invalid reader mode.\n"; }); std::terminate(); } @@ -59,10 +59,9 @@ struct macho_reader { struct dwarf& dwarf() & { return _dwarf; } struct dwarf&& dwarf() && { return std::move(_dwarf); } - bool register_dies_mode() const { return _params._mode == macho_reader_mode::register_dies; } - bool derive_dylibs_mode() const { return _params._mode == macho_reader_mode::derive_dylibs; } - // bool odrv_reporting_mode() const { return _params._mode == macho_reader_mode::odrv_reporting; - // } + bool register_dies_mode() const { return _params._mode == reader_mode::register_dies; } + bool derive_dylibs_mode() const { return _params._mode == reader_mode::derive_dylibs; } + // bool odrv_reporting_mode() const { return _params._mode == reader_mode::odrv_reporting; } void derive_dependencies(); @@ -79,7 +78,7 @@ struct macho_reader { const std::uint32_t _ofd_index{0}; freader _s; const file_details _details; - const macho_params _params; + const reader_params _params; std::vector _unresolved_dylibs; std::vector _rpaths; struct dwarf _dwarf; // must be last @@ -383,7 +382,7 @@ void read_macho(object_ancestry&& ancestry, freader s, std::istream::pos_type end_pos, file_details details, - macho_params params) { + reader_params params) { orc::do_work([_ancestry = std::move(ancestry), _s = std::move(s), _details = std::move(details), _params = std::move(params)]() mutable { ZoneScopedN("read_macho"); @@ -412,7 +411,7 @@ void read_macho(object_ancestry&& ancestry, //-------------------------------------------------------------------------------------------------- -dwarf dwarf_from_macho(std::uint32_t ofd_index, macho_params params) { +dwarf dwarf_from_macho(std::uint32_t ofd_index, reader_params params) { const auto& entry = object_file_fetch(ofd_index); freader s(entry._ancestry.begin()->allocate_path()); @@ -475,8 +474,8 @@ std::vector derive_immediate_dylibs( TracyLockable(std::mutex, dylib_result_mutex); std::vector result; freader input(input_path); - macho_params params; - params._mode = macho_reader_mode::derive_dylibs; + reader_params params; + params._mode = reader_mode::derive_dylibs; params._executable_path = executable_path; params._register_dependencies = [&](std::vector&& p) { ZoneScopedN("register_dependencies"); diff --git a/src/orc.cpp b/src/orc.cpp index e026aa0..e98886c 100644 --- a/src/orc.cpp +++ b/src/orc.cpp @@ -39,6 +39,7 @@ // application #include "orc/async.hpp" +#include "orc/coff.hpp" #include "orc/dwarf.hpp" #include "orc/features.hpp" #include "orc/macho.hpp" @@ -180,11 +181,32 @@ const char* problem_prefix() { return settings::instance()._graceful_exit ? "war //-------------------------------------------------------------------------------------------------- +dwarf dwarf_from_object_file(std::uint32_t ofd_index, reader_params params) { + const object_file_descriptor& descriptor = object_file_fetch(ofd_index); + + switch (descriptor._details._format) { + case file_details::format::macho: { + return dwarf_from_macho(ofd_index, std::move(params)); + } break; + case file_details::format::coff: { + return dwarf_from_coff(ofd_index, std::move(params)); + } break; + default: { + // If you get here, the object file format is either new and + // unaccounted for, or the format is a container type (ar, fat) + // and not a low-level variant where actual DWARF data is found. + throw std::runtime_error("dwarf_from_object_file: unknown / bad object file"); + } + } +} + +//-------------------------------------------------------------------------------------------------- + attribute_sequence fetch_attributes_for_die(const die& d) { // Too verbose for larger projects, but keep around for debugging/smaller projects. // ZoneScoped; - auto dwarf = dwarf_from_macho(d._ofd_index, macho_params{macho_reader_mode::odrv_reporting}); + auto dwarf = dwarf_from_object_file(d._ofd_index, reader_params{reader_mode::odrv_reporting}); auto [die, attributes] = dwarf.fetch_one_die(d._offset, d._cu_header_offset, d._cu_die_offset); ADOBE_INVARIANT(die._tag == d._tag); @@ -459,7 +481,7 @@ void parse_dsym(const std::filesystem::path& dsym) { freader input(_input_path); parse_file(_input_path.string(), object_ancestry(), input, input.size(), - macho_params{macho_reader_mode::register_dies}); + reader_params{reader_mode::register_dies}); }); } } @@ -503,7 +525,7 @@ std::vector orc_process(std::vector&& file_l freader input(_input_path); parse_file(_input_path.string(), object_ancestry(), input, input.size(), - macho_params{macho_reader_mode::register_dies}); + reader_params{reader_mode::register_dies}); } }); } diff --git a/src/parse_file.cpp b/src/parse_file.cpp index 7010b20..351d94f 100644 --- a/src/parse_file.cpp +++ b/src/parse_file.cpp @@ -26,6 +26,7 @@ // application #include "orc/ar.hpp" +#include "orc/coff.hpp" #include "orc/fat.hpp" #include "orc/macho.hpp" #include "orc/orc.hpp" @@ -53,6 +54,15 @@ file_details detect_file(freader& s) { } else if (header == FAT_MAGIC || header == FAT_CIGAM || header == FAT_MAGIC_64 || header == FAT_CIGAM_64) { result._format = file_details::format::fat; + } else if ((header & 0xffff) == 0x8664) { + // In COFF, the first 16 bits are a machine type code, + // which we are treating here like a magic number. We + // only check for one (0x8664) which means x64. This + // will likely need updating as other machine types are + // observed. + // See https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types + result._format = file_details::format::coff; + result._is_64_bit = true; } result._is_64_bit = header == MH_MAGIC_64 || header == MH_CIGAM_64 || @@ -156,7 +166,7 @@ void parse_file(std::string_view object_name, const object_ancestry& ancestry, freader& s, std::istream::pos_type end_pos, - macho_params params) { + reader_params params) { auto detection = detect_file(s); // append this object name to the ancestry @@ -175,6 +185,9 @@ void parse_file(std::string_view object_name, case file_details::format::fat: return read_fat(std::move(new_ancestry), s, end_pos, std::move(detection), std::move(params)); + case file_details::format::coff: + return read_coff(std::move(new_ancestry), s, end_pos, std::move(detection), + std::move(params)); } }