From 7c2d90b43b634cc9fe94fe59f0317d606545aa04 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 20 Mar 2024 11:08:24 +0100 Subject: [PATCH 01/19] Fix clippy and rustdoc lints (#152) --- examples/pdb2hpp.rs | 8 ++++---- src/common.rs | 4 ++-- src/modi/c13.rs | 7 +++---- src/modi/mod.rs | 2 +- src/msf/mod.rs | 2 +- src/msf/page_list.rs | 1 - src/omap.rs | 4 +--- src/tpi/data.rs | 2 +- src/tpi/mod.rs | 7 +++---- 9 files changed, 16 insertions(+), 21 deletions(-) diff --git a/examples/pdb2hpp.rs b/examples/pdb2hpp.rs index 0235d45..3e146bb 100644 --- a/examples/pdb2hpp.rs +++ b/examples/pdb2hpp.rs @@ -7,8 +7,8 @@ use pdb::FallibleIterator; type TypeSet = BTreeSet; -pub fn type_name<'p>( - type_finder: &pdb::TypeFinder<'p>, +pub fn type_name( + type_finder: &pdb::TypeFinder<'_>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result { @@ -359,8 +359,8 @@ impl<'p> Method<'p> { } } -fn argument_list<'p>( - type_finder: &pdb::TypeFinder<'p>, +fn argument_list( + type_finder: &pdb::TypeFinder<'_>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result> { diff --git a/src/common.rs b/src/common.rs index 893a857..f5953d7 100644 --- a/src/common.rs +++ b/src/common.rs @@ -14,7 +14,7 @@ use std::result; use std::slice; use scroll::ctx::TryFromCtx; -use scroll::{self, Endian, Pread, LE}; +use scroll::{Endian, Pread, LE}; use crate::tpi::constants; @@ -581,7 +581,7 @@ pub trait ItemIndex: /// [`ModuleInfo`](crate::ModuleInfo). Note that this comparison needs to be done /// case-insensitively as the name in the DBI stream and name table are known to not /// have matching cases. - /// 4. Resolve the [`Local`](crate::Local) index into a global one using + /// 4. Resolve the [`Local`] index into a global one using /// [`CrossModuleExports`](crate::CrossModuleExports). /// /// Cross module references are specially formatted indexes with the most significant bit set to diff --git a/src/modi/c13.rs b/src/modi/c13.rs index 645d0b0..223bb99 100644 --- a/src/modi/c13.rs +++ b/src/modi/c13.rs @@ -6,8 +6,8 @@ use scroll::{ctx::TryFromCtx, Endian, Pread}; use crate::common::*; use crate::modi::{ - constants, CrossModuleExport, CrossModuleRef, FileChecksum, FileIndex, FileInfo, LineInfo, - LineInfoKind, ModuleRef, + constants, CrossModuleExport, CrossModuleRef, FileChecksum, FileInfo, LineInfo, LineInfoKind, + ModuleRef, }; use crate::symbol::{BinaryAnnotation, BinaryAnnotationsIter, InlineSiteSymbol}; use crate::FallibleIterator; @@ -345,6 +345,7 @@ enum LineEntry { /// Declares a source line number. Number(LineNumberEntry), /// Declares a debugging marker. + #[allow(dead_code)] // reason = "the inner `LineMarkerEntry` is not (yet) accessed" Marker(LineMarkerEntry), } @@ -1418,8 +1419,6 @@ impl<'a> LineProgram<'a> { mod tests { use super::*; - use std::mem; - use crate::symbol::BinaryAnnotations; #[test] diff --git a/src/modi/mod.rs b/src/modi/mod.rs index cdb2372..c0e2dc7 100644 --- a/src/modi/mod.rs +++ b/src/modi/mod.rs @@ -49,7 +49,7 @@ impl<'s> ModuleInfo<'s> { } fn lines_data(&self, size: usize) -> &[u8] { - let start = self.symbols_size as usize; + let start = self.symbols_size; &self.stream[start..start + size] } diff --git a/src/msf/mod.rs b/src/msf/mod.rs index 92b6db3..338fa32 100644 --- a/src/msf/mod.rs +++ b/src/msf/mod.rs @@ -299,7 +299,7 @@ mod big { let _ = stream_table.take((stream_count - stream_number - 1) as usize * 4)?; // skip the preceding streams' page numbers - let _ = stream_table.take((page_numbers_to_skip as usize) * 4)?; + let _ = stream_table.take(page_numbers_to_skip * 4)?; // we're now at the list of pages for our stream // accumulate them into a PageList diff --git a/src/msf/page_list.rs b/src/msf/page_list.rs index 88570b6..e5eace9 100644 --- a/src/msf/page_list.rs +++ b/src/msf/page_list.rs @@ -99,7 +99,6 @@ impl PageList { #[cfg(test)] mod tests { use crate::msf::page_list::*; - use crate::source::SourceSlice; #[test] fn test_push() { diff --git a/src/omap.rs b/src/omap.rs index e0c1d57..442dfa8 100644 --- a/src/omap.rs +++ b/src/omap.rs @@ -77,7 +77,7 @@ impl fmt::Debug for OMAPRecord { impl PartialOrd for OMAPRecord { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - self.source_address().partial_cmp(&other.source_address()) + Some(self.cmp(other)) } } @@ -577,8 +577,6 @@ impl PdbInternalSectionOffset { mod tests { use super::*; - use std::mem; - #[test] fn test_omap_record() { assert_eq!(mem::size_of::(), 8); diff --git a/src/tpi/data.rs b/src/tpi/data.rs index ed7d987..29eb878 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -166,7 +166,7 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result Date: Tue, 21 Jun 2022 15:39:50 -0500 Subject: [PATCH 02/19] Implement AliasType --- src/tpi/data.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/tpi/data.rs b/src/tpi/data.rs index 29eb878..aaf8aab 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -33,6 +33,7 @@ pub enum TypeData<'t> { Enumerate(EnumerateType<'t>), Array(ArrayType), Union(UnionType<'t>), + Alias(AliasType<'t>), Bitfield(BitfieldType), FieldList(FieldList<'t>), ArgumentList(ArgumentList), @@ -336,6 +337,12 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Alias(AliasType { + underlying_type: buf.parse()?, + name: parse_string(leaf, &mut buf)?, + })), + // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2164-L2170 LF_BITFIELD => Ok(TypeData::Bitfield(BitfieldType { underlying_type: buf.parse()?, @@ -1023,6 +1030,13 @@ pub struct UnionType<'t> { pub unique_name: Option>, } +/// The information parsed from a type record with kind `LF_ALIAS` or `LF_ALIAS_ST`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AliasType<'t> { + pub underlying_type: TypeIndex, + pub name: RawString<'t>, +} + /// The information parsed from a type record with kind `LF_BITFIELD`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct BitfieldType { From 241bcf7327c3efa1e10ff979279e8e72f817e5b2 Mon Sep 17 00:00:00 2001 From: TrinityDevelopers Date: Thu, 23 Jun 2022 09:25:03 -0500 Subject: [PATCH 03/19] Add Alias to TypeData::name + fix typo --- src/tpi/data.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tpi/data.rs b/src/tpi/data.rs index aaf8aab..b4e7bbd 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -51,7 +51,8 @@ impl<'t> TypeData<'t> { | Self::Nested(NestedType { ref name, .. }) | Self::Enumeration(EnumerationType { ref name, .. }) | Self::Enumerate(EnumerateType { ref name, .. }) - | Self::Union(UnionType { ref name, .. }) => name, + | Self::Union(UnionType { ref name, .. }) + | Self::Alias(AliasType { ref name, .. }) => name, _ => return None, }; @@ -340,7 +341,7 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result Ok(TypeData::Alias(AliasType { underlying_type: buf.parse()?, - name: parse_string(leaf, &mut buf)?, + name: parse_string(leaf, buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2164-L2170 From cac2362bb3b2357ef5f6c3c295db8953e5a42f4b Mon Sep 17 00:00:00 2001 From: Andrew Miller Date: Tue, 21 Jun 2022 10:34:41 -0500 Subject: [PATCH 04/19] Fix integer typos in documentation --- src/tpi/primitive.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tpi/primitive.rs b/src/tpi/primitive.rs index da45e56..26a0f99 100644 --- a/src/tpi/primitive.rs +++ b/src/tpi/primitive.rs @@ -81,13 +81,13 @@ pub enum PrimitiveKind { /// Signed 32-bit integer Long, - /// Unsigned 32-bit inteer + /// Unsigned 32-bit integer ULong, /// Signed 32-bit integer I32, - /// Unsigned 32-bit inteer + /// Unsigned 32-bit integer U32, /// Signed 64-bit integer From f59c865517ccde47a242f1d2ec2bc4509ac200a3 Mon Sep 17 00:00:00 2001 From: TrinityDevelopers Date: Fri, 1 Jul 2022 12:05:24 -0500 Subject: [PATCH 05/19] Add remaining FieldAttribute values --- src/tpi/data.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/tpi/data.rs b/src/tpi/data.rs index b4e7bbd..8bb514a 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -634,6 +634,31 @@ impl FieldAttributes { pub fn is_intro_virtual(self) -> bool { matches!(self.method_properties(), 0x04 | 0x06) } + + #[inline] + pub fn is_pseudo(self) -> bool { + self.0 & 0x0020 != 0 + } + + #[inline] + pub fn noinherit(self) -> bool { + self.0 & 0x0040 != 0 + } + + #[inline] + pub fn noconstruct(self) -> bool { + self.0 & 0x0080 != 0 + } + + #[inline] + pub fn is_compgenx(self) -> bool { + self.0 & 0x0100 != 0 + } + + #[inline] + pub fn sealed(self) -> bool { + self.0 & 0x0200 != 0 + } // TODO } From 6cac77c23fc3cad8ab94642c632e45709f7fc452 Mon Sep 17 00:00:00 2001 From: Glenn Smith Date: Thu, 21 Jul 2022 17:48:34 -0400 Subject: [PATCH 06/19] Implement VirtualFunctionTable / VirtualTableShape --- src/tpi/data.rs | 77 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/src/tpi/data.rs b/src/tpi/data.rs index 8bb514a..59d4684 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -25,6 +25,8 @@ pub enum TypeData<'t> { Nested(NestedType<'t>), BaseClass(BaseClassType), VirtualBaseClass(VirtualBaseClassType), + VirtualFunctionTable(VirtualFunctionTableType<'t>), + VirtualTableShape(VirtualTableShapeType), VirtualFunctionTablePointer(VirtualFunctionTablePointerType), Procedure(ProcedureType), Pointer(PointerType), @@ -353,14 +355,40 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result { - // TODO - Err(Error::UnimplementedTypeKind(leaf)) + let mut vtshape = VirtualTableShapeType { + descriptors: vec![], + }; + let count = buf.parse_u16()? as usize; + // These are packed 4-bit values + for _ in 0..((count + 1) / 2) { + let desc: u8 = buf.parse()?; + vtshape.descriptors.push(desc & 0xF); + if vtshape.descriptors.len() < count { + vtshape.descriptors.push(desc >> 4); + } + } + Ok(TypeData::VirtualTableShape(vtshape)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1825-L1837 LF_VFTABLE => { - // TODO - Err(Error::UnimplementedTypeKind(leaf)) + let mut vftable = VirtualFunctionTableType { + owner: buf.parse()?, + base: buf.parse()?, + object_offset: parse_unsigned(buf)? as u32, + names: vec![], + }; + + let names_length = parse_unsigned(buf)? as usize; + + let mut len = 0; + while len < names_length { + let s = buf.parse_cstring()?; + len += s.len() + 1; + vftable.names.push(s); + } + + Ok(TypeData::VirtualFunctionTable(vftable)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2521-L2528 @@ -859,6 +887,32 @@ impl PointerAttributes { } } +/* +typedef enum CV_VTS_desc_e { + CV_VTS_near = 0x00, + CV_VTS_far = 0x01, + CV_VTS_thin = 0x02, + CV_VTS_outer = 0x03, + CV_VTS_meta = 0x04, + CV_VTS_near32 = 0x05, + CV_VTS_far32 = 0x06, + CV_VTS_unused = 0x07 +} CV_VTS_desc_e; + */ +#[allow(unused)] +#[repr(u8)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum VirtualTableShapeDescriptor { + Near = 0x00, + Far = 0x01, + Thin = 0x02, + Outer = 0x03, + Meta = 0x04, + Near32 = 0x05, + Far32 = 0x06, + Unused = 0x07, +} + /// The information parsed from a type record with kind /// `LF_CLASS`, `LF_CLASS_ST`, `LF_STRUCTURE`, `LF_STRUCTURE_ST` or `LF_INTERFACE`. // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1631 @@ -980,6 +1034,21 @@ pub struct VirtualFunctionTablePointerType { pub table: TypeIndex, } +/// The information parsed from a type record with kind `LF_VTSHAPE`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VirtualTableShapeType { + pub descriptors: Vec, +} + +/// The information parsed from a type record with kind `LF_VFTABLE`. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VirtualFunctionTableType<'t> { + pub owner: TypeIndex, + pub base: TypeIndex, + pub object_offset: u32, + pub names: Vec>, +} + /// The information parsed from a type record with kind `LF_PROCEDURE`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct ProcedureType { From 13930e364db4286df376adb10e53de06884f70c4 Mon Sep 17 00:00:00 2001 From: Justin Moore Date: Thu, 17 Nov 2022 11:14:38 -0600 Subject: [PATCH 07/19] Add `DebugInformation::is_stripped` to easily query to see if the PDB is stripped --- src/dbi.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/dbi.rs b/src/dbi.rs index 86379cb..2993cb6 100644 --- a/src/dbi.rs +++ b/src/dbi.rs @@ -88,6 +88,17 @@ impl<'s> DebugInformation<'s> { } } + /// Returns whether or not this PDB has been marked as stripped. Stripped PDBs do not contain + /// type information, line number information, or per-object CV symbols. + /// + /// This flag is set when a PDB is written with [/PDBSTRIPPED] by MSVC. + /// + /// [/PDBSTRIPPED]: https://learn.microsoft.com/en-us/cpp/build/reference/pdbstripped-strip-private-symbols?view=msvc-170 + pub fn is_stripped(&self) -> bool { + // flags.fStripped + (self.header.flags & 0x2) != 0 + } + /// Returns an iterator that can traverse the modules list in sequential order. pub fn modules(&self) -> Result> { let mut buf = self.stream.parse_buffer(); From 30b2b718719c1c9fbd2311d6c6747b91101b6f8d Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Fri, 17 Feb 2023 00:38:51 +0100 Subject: [PATCH 08/19] Add support for managed procedures Added symbols: - S_TOKENREF - S_LMANPROC - S_GMANPROC --- examples/pdb_symbols.rs | 8 +++- src/common.rs | 8 ++++ src/symbol/mod.rs | 103 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 1 deletion(-) diff --git a/examples/pdb_symbols.rs b/examples/pdb_symbols.rs index bf6d3eb..b132bbb 100644 --- a/examples/pdb_symbols.rs +++ b/examples/pdb_symbols.rs @@ -3,7 +3,7 @@ use std::env; use pdb2 as pdb; use getopts::Options; -use pdb::{FallibleIterator, PdbInternalSectionOffset}; +use pdb::{FallibleIterator, PdbInternalSectionOffset, RawString}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} input.pdb", program); @@ -28,6 +28,12 @@ fn print_symbol(symbol: &pdb::Symbol<'_>) -> pdb::Result<()> { pdb::SymbolData::Procedure(data) => { print_row(data.offset, "function", data.name); } + pdb::SymbolData::ManagedProcedure(data) => { + match data.name { + None => print_row(data.offset, "function", RawString::from(&b""[..])), + Some(name) => print_row(data.offset, "function", name), + } + } _ => { // ignore everything else } diff --git a/src/common.rs b/src/common.rs index f5953d7..937b5af 100644 --- a/src/common.rs +++ b/src/common.rs @@ -605,6 +605,14 @@ impl_pread!(TypeIndex); impl ItemIndex for TypeIndex {} +/// COM+ metadata token for managed procedures (`CV_tkn_t`). +#[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct COMToken(pub u32); + +impl_convert!(COMToken, u32); +impl_hex_fmt!(COMToken); +impl_pread!(COMToken); + /// Index of an [`Id`](crate::Id) in [`IdInformation`](crate::IdInformation) stream. /// /// If this index is a [cross module reference](ItemIndex::is_cross_module), it must be resolved diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 98d6cf7..c0b1483 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -185,6 +185,8 @@ pub enum SymbolData<'t> { Public(PublicSymbol<'t>), /// A procedure, such as a function or method. Procedure(ProcedureSymbol<'t>), + /// A managed procedure, such as a function or method. + ManagedProcedure(ManagedProcedureSymbol<'t>), /// A thread local variable. ThreadStorage(ThreadStorageSymbol<'t>), /// Flags used to compile a module. @@ -197,6 +199,8 @@ pub enum SymbolData<'t> { DataReference(DataReferenceSymbol<'t>), /// Reference to an annotation. AnnotationReference(AnnotationReferenceSymbol<'t>), + /// Reference to a managed procedure. + TokenReference(TokenReferenceSymbol<'t>), /// Trampoline thunk. Trampoline(TrampolineSymbol), /// An exported symbol. @@ -236,12 +240,14 @@ impl<'t> SymbolData<'t> { Self::Data(data) => Some(data.name), Self::Public(data) => Some(data.name), Self::Procedure(data) => Some(data.name), + Self::ManagedProcedure(data) => data.name, Self::ThreadStorage(data) => Some(data.name), Self::CompileFlags(_) => None, Self::UsingNamespace(data) => Some(data.name), Self::ProcedureReference(data) => data.name, Self::DataReference(data) => data.name, Self::AnnotationReference(data) => Some(data.name), + Self::TokenReference(data) => Some(data.name), Self::Trampoline(_) => None, Self::Export(data) => Some(data.name), Self::Local(data) => Some(data.name), @@ -283,6 +289,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID | S_LPROC32_DPC | S_LPROC32_DPC_ID => SymbolData::Procedure(buf.parse_with(kind)?), + S_LMANPROC | S_GMANPROC => SymbolData::ManagedProcedure(buf.parse_with(kind)?), S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { SymbolData::ThreadStorage(buf.parse_with(kind)?) } @@ -296,6 +303,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_TRAMPOLINE => Self::Trampoline(buf.parse_with(kind)?), S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), + S_TOKENREF => SymbolData::TokenReference(buf.parse_with(kind)?), S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), @@ -575,6 +583,41 @@ impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { } } +/// Reference to a managed procedure symbol (`S_LMANPROC` or `S_GMANPROC`). +/// +/// Symbol kind `S_TOKENREF`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct TokenReferenceSymbol<'t> { + /// SUC of the name. + pub sum_name: u32, + /// Symbol index of the referenced [`ManagedProcedureSymbol`]. + /// + /// Note that this symbol might be located in a different module. + pub symbol_index: SymbolIndex, + /// Index of the module in [`DebugInformation::modules`](crate::DebugInformation::modules) + /// containing the actual symbol. + pub module: Option, + /// Name of the procedure reference. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for TokenReferenceSymbol <'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = TokenReferenceSymbol { + sum_name: buf.parse()?, + symbol_index: buf.parse()?, + module: buf.parse::()?.checked_sub(1).map(usize::from), + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + /// Subtype of [`TrampolineSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -840,6 +883,66 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { } } +/// A managed procedure, such as a function or method. +/// +/// Symbol kinds: +/// - `S_GMANPROC`, `S_GMANPROCIA64` for global procedures +/// - `S_LMANPROC`, `S_LMANPROCIA64` for local procedures +/// +/// `S_GMANPROCIA64` and `S_LMANPROCIA64` are only mentioned, there is no available source. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ManagedProcedureSymbol<'t> { + /// Whether this is a global or local procedure. + pub global: bool, + /// The parent scope that this procedure is nested in. + pub parent: Option, + /// The end symbol of this procedure. + pub end: SymbolIndex, + /// The next procedure symbol. + pub next: Option, + /// The length of the code block covered by this procedure. + pub len: u32, + /// Start offset of the procedure's body code, which marks the end of the prologue. + pub dbg_start_offset: u32, + /// End offset of the procedure's body code, which marks the start of the epilogue. + pub dbg_end_offset: u32, + /// COM+ metadata token + pub token: COMToken, + /// Code offset of the start of this procedure. + pub offset: PdbInternalSectionOffset, + /// Detailed flags of this procedure. + pub flags: ProcedureFlags, + /// Register return value is in (may not be used for all archs). + pub return_register: u16, + /// Optional name of the procedure. + pub name: Option>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ManagedProcedureSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ManagedProcedureSymbol { + global: matches!(kind, S_GMANPROC), + parent: parse_optional_index(&mut buf)?, + end: buf.parse()?, + next: parse_optional_index(&mut buf)?, + len: buf.parse()?, + dbg_start_offset: buf.parse()?, + dbg_end_offset: buf.parse()?, + token: buf.parse()?, + offset: buf.parse()?, + flags: buf.parse()?, + return_register: buf.parse()?, + name: parse_optional_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + /// The callsite of an inlined function. /// /// Symbol kind `S_INLINESITE`, or `S_INLINESITE2`. From 556fce602a0c1c059855474d49edc19a0b2c4a52 Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Fri, 17 Feb 2023 02:06:24 +0100 Subject: [PATCH 09/19] Add OEM symbols Added base for S_OEM parsing, needs improvements in future. --- src/symbol/mod.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index c0b1483..54e6f85 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -225,6 +225,8 @@ pub enum SymbolData<'t> { Thunk(ThunkSymbol<'t>), /// A block of separated code. SeparatedCode(SeparatedCodeSymbol), + /// OEM information. + OEM(OemSymbol<'t>), } impl<'t> SymbolData<'t> { @@ -260,6 +262,7 @@ impl<'t> SymbolData<'t> { Self::RegisterRelative(data) => Some(data.name), Self::Thunk(data) => Some(data.name), Self::SeparatedCode(_) => None, + Self::OEM(_) => None, } } } @@ -315,6 +318,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_REGREL32 => SymbolData::RegisterRelative(buf.parse_with(kind)?), S_THUNK32 | S_THUNK32_ST => SymbolData::Thunk(buf.parse_with(kind)?), S_SEPCODE => SymbolData::SeparatedCode(buf.parse_with(kind)?), + S_OEM => SymbolData::OEM(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -1602,6 +1606,37 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SeparatedCodeSymbol { } } +/// An OEM symbol. +/// +/// Symbol kind `S_OEM`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct OemSymbol<'t> { + /// OEM's identifier (16B GUID). + pub id_oem: RawString<'t>, + /// Type index. + pub type_index: TypeIndex, + /// User data with forced 4B-alignment. + /// + /// An array of variable size, currently only the first 4B are parsed. + pub rgl: u32, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for OemSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], _kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = OemSymbol { + id_oem: buf.parse_cstring()?, + type_index: buf.parse()?, + rgl: buf.parse()?, + }; + + Ok((symbol, buf.pos())) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// From 3c8f497b039234f945f44cca7b7c0c676e7706a6 Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Fri, 17 Feb 2023 02:56:43 +0100 Subject: [PATCH 10/19] Add support for managed local variable slots Added support for: - S_MANSLOT - S_MANSLOT_ST --- examples/pdb_symbols.rs | 3 +++ src/symbol/mod.rs | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/examples/pdb_symbols.rs b/examples/pdb_symbols.rs index b132bbb..0d75027 100644 --- a/examples/pdb_symbols.rs +++ b/examples/pdb_symbols.rs @@ -34,6 +34,9 @@ fn print_symbol(symbol: &pdb::Symbol<'_>) -> pdb::Result<()> { Some(name) => print_row(data.offset, "function", name), } } + pdb::SymbolData::ManagedSlot(data) => { + print_row(data.offset, "data", data.name); + } _ => { // ignore everything else } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 54e6f85..c3b49f7 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -207,6 +207,8 @@ pub enum SymbolData<'t> { Export(ExportSymbol<'t>), /// A local symbol in optimized code. Local(LocalSymbol<'t>), + /// A managed local variable slot. + ManagedSlot(ManagedSlotSymbol<'t>), /// Reference to build information. BuildInfo(BuildInfoSymbol), /// The callsite of an inlined function. @@ -253,6 +255,7 @@ impl<'t> SymbolData<'t> { Self::Trampoline(_) => None, Self::Export(data) => Some(data.name), Self::Local(data) => Some(data.name), + Self::ManagedSlot(data) => Some(data.name), Self::InlineSite(_) => None, Self::BuildInfo(_) => None, Self::InlineSiteEnd => None, @@ -309,6 +312,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_TOKENREF => SymbolData::TokenReference(buf.parse_with(kind)?), S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), + S_MANSLOT | S_MANSLOT_ST => SymbolData::ManagedSlot(buf.parse_with(kind)?), S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), S_INLINESITE_END => SymbolData::InlineSiteEnd, @@ -1276,6 +1280,41 @@ impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { } } +/// A managed local variable slot. +/// +/// Symbol kind `S_MANSLOT`. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ManagedSlotSymbol<'t> { + /// Slot index. + pub slot: u32, + /// Type index or metadata token. + pub type_index: TypeIndex, + /// First code address where var is live. + pub offset: PdbInternalSectionOffset, + /// Local variable flags. + pub flags: LocalVariableFlags, + /// Length-prefixed name of the variable. + pub name: RawString<'t>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for ManagedSlotSymbol<'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = ManagedSlotSymbol { + slot: buf.parse()?, + type_index: buf.parse()?, + offset: buf.parse()?, + flags: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)?, + }; + + Ok((symbol, buf.pos())) + } +} + // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4456 /// Flags of an [`ExportSymbol`]. #[non_exhaustive] From 0c4f58f17e6144e2d774f21620ba228b5fabea25 Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Sun, 14 May 2023 20:13:14 +0200 Subject: [PATCH 11/19] S_ENVBLOCK support + support for env block symbol --- src/symbol/mod.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index c3b49f7..3bff6ae 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -229,6 +229,8 @@ pub enum SymbolData<'t> { SeparatedCode(SeparatedCodeSymbol), /// OEM information. OEM(OemSymbol<'t>), + /// Environment block split off from S_COMPILE2. + EnvBlock(EnvBlockSymbol<'t>) } impl<'t> SymbolData<'t> { @@ -266,6 +268,7 @@ impl<'t> SymbolData<'t> { Self::Thunk(data) => Some(data.name), Self::SeparatedCode(_) => None, Self::OEM(_) => None, + Self::EnvBlock(_) => None, } } } @@ -323,6 +326,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_THUNK32 | S_THUNK32_ST => SymbolData::Thunk(buf.parse_with(kind)?), S_SEPCODE => SymbolData::SeparatedCode(buf.parse_with(kind)?), S_OEM => SymbolData::OEM(buf.parse_with(kind)?), + S_ENVBLOCK => SymbolData::EnvBlock(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -1676,6 +1680,39 @@ impl<'t> TryFromCtx<'t, SymbolKind> for OemSymbol<'t> { } } +/// Environment block split off from `S_COMPILE2`. +/// +/// Symbol kind `S_ENVBLOCK`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct EnvBlockSymbol<'t> { + /// EC flag (previously called `rev`). + pub edit_and_continue: bool, + /// Sequence of zero-terminated command strings. + pub rgsz: Vec>, +} + +impl<'t> TryFromCtx<'t, SymbolKind> for EnvBlockSymbol <'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + let flags: u8 = buf.parse()?; + + let mut strings: Vec> = Vec::new(); + + while !buf.is_empty() { + strings.push(parse_symbol_name(&mut buf, kind)?); + } + + let symbol = EnvBlockSymbol { + edit_and_continue: flags & 1 != 0, + rgsz: strings, + }; + + Ok((symbol, buf.pos())) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// From fc65faa1ea7ff89d32ca5c91ba81a5de2bd62298 Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Sun, 14 May 2023 21:04:41 +0200 Subject: [PATCH 12/19] S_SECTION support + support for PE section symbol --- src/symbol/mod.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 3bff6ae..b256702 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -230,7 +230,9 @@ pub enum SymbolData<'t> { /// OEM information. OEM(OemSymbol<'t>), /// Environment block split off from S_COMPILE2. - EnvBlock(EnvBlockSymbol<'t>) + EnvBlock(EnvBlockSymbol<'t>), + /// A COFF section in a PE executable. + Section(SectionSymbol<'t>), } impl<'t> SymbolData<'t> { @@ -269,6 +271,7 @@ impl<'t> SymbolData<'t> { Self::SeparatedCode(_) => None, Self::OEM(_) => None, Self::EnvBlock(_) => None, + Self::Section(data) => Some(data.name), } } } @@ -327,6 +330,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_SEPCODE => SymbolData::SeparatedCode(buf.parse_with(kind)?), S_OEM => SymbolData::OEM(buf.parse_with(kind)?), S_ENVBLOCK => SymbolData::EnvBlock(buf.parse_with(kind)?), + S_SECTION => SymbolData::Section(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -1713,6 +1717,48 @@ impl<'t> TryFromCtx<'t, SymbolKind> for EnvBlockSymbol <'t> { } } +/// A COFF section in a PE executable. +/// +/// Symbol kind `S_SECTION`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SectionSymbol<'t> { + /// Section number. + pub isec: u16, + /// Alignment of this section (power of 2). + pub align: u8, + /// Reserved. Must be zero. + pub reserved: u8, + /// Section's RVA. + pub rva: u32, + /// Section's CB. + pub cb: u32, + /// Section characteristics. + pub characteristics: u32, + /// Section name. + pub name: RawString<'t> + +} + +impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol <'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = SectionSymbol { + isec: buf.parse()?, + align: buf.parse()?, + reserved: buf.parse()?, + rva: buf.parse()?, + cb: buf.parse()?, + characteristics: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)? + }; + + Ok((symbol, buf.pos())) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// From 93ab8724862139f44420ff750a862818b156bc2c Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Sun, 14 May 2023 21:21:48 +0200 Subject: [PATCH 13/19] S_COFFGROUP support + support for COFF group symbol --- src/symbol/mod.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index b256702..28fa16f 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -233,6 +233,8 @@ pub enum SymbolData<'t> { EnvBlock(EnvBlockSymbol<'t>), /// A COFF section in a PE executable. Section(SectionSymbol<'t>), + /// A COFF group. + CoffGroup(CoffGroupSymbol<'t>), } impl<'t> SymbolData<'t> { @@ -272,6 +274,7 @@ impl<'t> SymbolData<'t> { Self::OEM(_) => None, Self::EnvBlock(_) => None, Self::Section(data) => Some(data.name), + Self::CoffGroup(data) => Some(data.name), } } } @@ -331,6 +334,7 @@ impl<'t> TryFromCtx<'t> for SymbolData<'t> { S_OEM => SymbolData::OEM(buf.parse_with(kind)?), S_ENVBLOCK => SymbolData::EnvBlock(buf.parse_with(kind)?), S_SECTION => SymbolData::Section(buf.parse_with(kind)?), + S_COFFGROUP => SymbolData::CoffGroup(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; @@ -1759,6 +1763,42 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol <'t> { } } +/// A COFF section in a PE executable. +/// +/// Symbol kind `S_SECTION`. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct CoffGroupSymbol<'t> { + /// COFF group's CB. + pub cb: u32, + /// COFF group characteristics. + pub characteristics: u32, + /// Symbol offset. + pub offset: PdbInternalSectionOffset, + /// Symbol segment. + pub segment: u16, + /// COFF group name. + pub name: RawString<'t> + +} + +impl<'t> TryFromCtx<'t, SymbolKind> for CoffGroupSymbol <'t> { + type Error = Error; + + fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { + let mut buf = ParseBuffer::from(this); + + let symbol = CoffGroupSymbol { + cb: buf.parse()?, + characteristics: buf.parse()?, + offset: buf.parse()?, + segment: buf.parse()?, + name: parse_symbol_name(&mut buf, kind)? + }; + + Ok((symbol, buf.pos())) + } +} + /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// From a4ea364038514934d35e48d20bd994de49e73cbc Mon Sep 17 00:00:00 2001 From: ergrelet Date: Sun, 19 Feb 2023 04:07:47 +0100 Subject: [PATCH 14/19] Add support for char8_t --- src/tpi/primitive.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tpi/primitive.rs b/src/tpi/primitive.rs index 26a0f99..4ab68ac 100644 --- a/src/tpi/primitive.rs +++ b/src/tpi/primitive.rs @@ -60,6 +60,9 @@ pub enum PrimitiveKind { /// "Really a 32-bit char" RChar32, + /// UTF-8 character + Char8, + /// Signed 8-bit integer I8, @@ -225,6 +228,7 @@ pub fn type_data_for_primitive(index: TypeIndex) -> Result> { 0x71 => PrimitiveKind::WChar, 0x7a => PrimitiveKind::RChar16, 0x7b => PrimitiveKind::RChar32, + 0x7c => PrimitiveKind::Char8, 0x11 => PrimitiveKind::Short, 0x21 => PrimitiveKind::UShort, From 922cdb98b94420c5266b8ef01e958df80f923333 Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Tue, 1 Oct 2024 01:16:55 -0500 Subject: [PATCH 15/19] add new SourceLanguage variants --- src/symbol/constants.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index dd8f9ac..8698de2 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -525,6 +525,19 @@ pub enum SourceLanguage { MSIL = 0x0f, /// Application language is High Level Shader Language. HLSL = 0x10, + /// Application language is Objective-C. + ObjC = 0x11, + /// Application language is Objective-C++. + ObjCXX = 0x12, + /// Application language is Swift. + Swift = 0x13, + /// Application is a module generated by the aliasobj tool. + AliasObj = 0x14, + /// Application language is Rust. + Rust = 0x15, + /// Application language is Go. + Go = 0x16, + /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't /// have an enumerator for it yet. @@ -551,6 +564,12 @@ impl fmt::Display for SourceLanguage { Self::JScript => write!(f, "JScript"), Self::MSIL => write!(f, "MSIL"), Self::HLSL => write!(f, "HLSL"), + Self::ObjC => write!(f, "ObjC"), + Self::ObjCXX => write!(f, "ObjCXX"), + Self::Swift => write!(f, "Swift"), + Self::AliasObj => write!(f, "AliasObj"), + Self::Rust => write!(f, "Rust"), + Self::Go => write!(f, "Go"), Self::D => write!(f, "D"), } } @@ -576,6 +595,12 @@ impl From for SourceLanguage { 0x0e => Self::JScript, 0x0f => Self::MSIL, 0x10 => Self::HLSL, + 0x11 => Self::ObjC, + 0x12 => Self::ObjCXX, + 0x13 => Self::Swift, + 0x14 => Self::AliasObj, + 0x16 => Self::Rust, + 0x17 => Self::Go, 0x44 => Self::D, _ => Self::Masm, // There is no unknown, so we just force to Masm as the default. } From da27cc9ecee5967fa689030a14a49329b5775b10 Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Tue, 1 Oct 2024 01:18:25 -0500 Subject: [PATCH 16/19] update doc link --- src/symbol/constants.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index 8698de2..27c02c3 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -483,7 +483,7 @@ impl<'a> TryFromCtx<'a, Endian> for CPUType { } /// These values correspond to the CV_CFL_LANG enumeration, and are documented -/// [on MSDN](https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx). +/// [on MSDN](https://learn.microsoft.com/en-us/visualstudio/debugger/debug-interface-access/cv-cfl-lang?view=vs-2022). #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum SourceLanguage { From 8404d2b00755a9ca9a9d46be0cb8fb18498d82f7 Mon Sep 17 00:00:00 2001 From: Walnut <39544927+Walnut356@users.noreply.github.com> Date: Tue, 1 Oct 2024 01:52:21 -0500 Subject: [PATCH 17/19] consolidate macro usage in SourceLanguage::Display --- src/symbol/constants.rs | 56 ++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index 27c02c3..5b6851a 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -537,8 +537,6 @@ pub enum SourceLanguage { Rust = 0x15, /// Application language is Go. Go = 0x16, - - /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't /// have an enumerator for it yet. D = 0x44, @@ -546,32 +544,34 @@ pub enum SourceLanguage { impl fmt::Display for SourceLanguage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::C => write!(f, "C"), - Self::Cpp => write!(f, "Cpp"), - Self::Fortran => write!(f, "Fortran"), - Self::Masm => write!(f, "Masm"), - Self::Pascal => write!(f, "Pascal"), - Self::Basic => write!(f, "Basic"), - Self::Cobol => write!(f, "Cobol"), - Self::Link => write!(f, "Link"), - Self::Cvtres => write!(f, "Cvtres"), - Self::Cvtpgd => write!(f, "Cvtpgd"), - Self::CSharp => write!(f, "CSharp"), - Self::VB => write!(f, "VB"), - Self::ILAsm => write!(f, "ILAsm"), - Self::Java => write!(f, "Java"), - Self::JScript => write!(f, "JScript"), - Self::MSIL => write!(f, "MSIL"), - Self::HLSL => write!(f, "HLSL"), - Self::ObjC => write!(f, "ObjC"), - Self::ObjCXX => write!(f, "ObjCXX"), - Self::Swift => write!(f, "Swift"), - Self::AliasObj => write!(f, "AliasObj"), - Self::Rust => write!(f, "Rust"), - Self::Go => write!(f, "Go"), - Self::D => write!(f, "D"), - } + let str_repr = match self { + Self::C => "C", + Self::Cpp => "Cpp", + Self::Fortran => "Fortran", + Self::Masm => "Masm", + Self::Pascal => "Pascal", + Self::Basic => "Basic", + Self::Cobol => "Cobol", + Self::Link => "Link", + Self::Cvtres => "Cvtres", + Self::Cvtpgd => "Cvtpgd", + Self::CSharp => "CSharp", + Self::VB => "VB", + Self::ILAsm => "ILAsm", + Self::Java => "Java", + Self::JScript => "JScript", + Self::MSIL => "MSIL", + Self::HLSL => "HLSL", + Self::ObjC => "ObjC", + Self::ObjCXX => "ObjCXX", + Self::Swift => "Swift", + Self::AliasObj => "AliasObj", + Self::Rust => "Rust", + Self::Go => "Go", + Self::D => "D", + }; + + write!(f, "{str_repr}") } } From 5e2de147378655ceb2c8ca7c43c35389299368d2 Mon Sep 17 00:00:00 2001 From: Camden Smallwood Date: Mon, 21 Oct 2024 17:45:29 -0400 Subject: [PATCH 18/19] Add initial small MSF support --- src/msf/mod.rs | 202 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 4 deletions(-) diff --git a/src/msf/mod.rs b/src/msf/mod.rs index 338fa32..045fb6d 100644 --- a/src/msf/mod.rs +++ b/src/msf/mod.rs @@ -344,8 +344,201 @@ mod big { } mod small { - pub const MAGIC: &[u8] = b"Microsoft C/C++ program database 2.00\r\n\x1a\x4a\x47"; - // TODO: implement SmallMSF + use super::*; + + pub const MAGIC: &[u8; 44] = b"Microsoft C/C++ program database 2.00\r\n\x1a\x4a\x47\x00\x00"; + + /// The PDB header as stored on disk. + /// + /// See the Microsoft code for reference: + #[repr(C)] + #[derive(Debug, Copy, Clone)] + struct RawHeader { + magic: [u8; 44], + page_size: u32, + free_page_map: u16, + pages_used: u16, + directory_size: u32, + _reserved: u32, + } + + impl<'t> TryFromCtx<'t, Endian> for RawHeader { + type Error = scroll::Error; + + fn try_from_ctx( + this: &'t [u8], + le: Endian, + ) -> std::result::Result<(Self, usize), Self::Error> { + let mut offset = 0; + let data = Self { + magic: { + let mut tmp = [0; 44]; + this.gread_inout_with(&mut offset, &mut tmp, le)?; + tmp + }, + page_size: this.gread_with(&mut offset, le)?, + free_page_map: this.gread_with(&mut offset, le)?, + pages_used: this.gread_with(&mut offset, le)?, + directory_size: this.gread_with(&mut offset, le)?, + _reserved: this.gread_with(&mut offset, le)?, + }; + Ok((data, offset)) + } + } + + #[derive(Debug)] + pub struct SmallMSF<'s, S> { + header: Header, + source: S, + stream_table: StreamTable<'s>, + } + + impl<'s, S: Source<'s>> SmallMSF<'s, S> { + pub fn new(mut source: S, header_view: Box>) -> Result> { + let mut buf = ParseBuffer::from(header_view.as_slice()); + + let header: RawHeader = buf.parse()?; + + if &header.magic != MAGIC { + return Err(Error::UnrecognizedFileFormat); + } + + // TODO: check if this is correct for small MSF + if header.page_size.count_ones() != 1 + || header.page_size < 0x100 + || header.page_size > (128 * 0x10000) + { + return Err(Error::InvalidPageSize(header.page_size as _)); + } + + let header_object = Header { + page_size: header.page_size as _, + maximum_valid_page_number: header.pages_used as _, + }; + + // build the stream table page list + let mut stream_table_page_list = PageList::new(header_object.page_size); + let mut i = 0; + + while i < header.directory_size { + let n = buf.parse_u16()? as u32; + stream_table_page_list.push(header_object.validate_page_number(n)?); + i += header.page_size; + } + + // truncate the stream table page list to the correct size + stream_table_page_list.truncate(header.directory_size as _); + + let stream_table_view = view(&mut source, &stream_table_page_list)?; + + Ok(SmallMSF { + header: header_object, + source, + stream_table: StreamTable::Available { stream_table_view }, + }) + } + + fn look_up_stream(&mut self, stream_number: u32) -> Result { + // ensure the stream table is available + let StreamTable::Available { + ref stream_table_view, + } = self.stream_table else { + unreachable!() + }; + + let stream_table_slice = stream_table_view.as_slice(); + let mut stream_table = ParseBuffer::from(stream_table_slice); + + // the stream table is structured as: + // stream_count: u16 + // reserved: u16 + // for _ in 0..stream_count: + // size of stream in bytes: u32 (0xffffffff indicating "stream does not exist") + // reserved: u32 + // stream 0: PageNumber: u16 + // stream 1: PageNumber: u16, PageNumber: u16 + // stream 2: PageNumber: u16, PageNumber: u16, PageNumber; u16, PageNumber: u16, PageNumber: u16 + // stream 3: PageNumber: u16, PageNumber: u16, PageNumber; u16, PageNumber: u16 + // (number of pages determined by number of bytes) + + let stream_count = stream_table.parse_u16()? as u32; + let _reserved = stream_table.parse_u16()?; + + // check if we've already outworn our welcome + if stream_number >= stream_count { + return Err(Error::StreamNotFound(stream_number)); + } + + // we now have {stream_count} u32s describing the length of each stream + + // walk over the streams before the requested stream + // we need to pay attention to how big each one is, since their page numbers come + // before our page numbers in the stream table + let mut page_numbers_to_skip: usize = 0; + + for _ in 0..stream_number { + let bytes = stream_table.parse_u32()?; + let _reserved = stream_table.parse_u32()?; + + if bytes == u32::max_value() { + // stream is not present, ergo nothing to skip + } else { + page_numbers_to_skip += self.header.pages_needed_to_store(bytes as usize); + } + } + + // read our stream's size + let bytes_in_stream = stream_table.parse_u32()?; + let _reserved = stream_table.parse_u32()?; + + if bytes_in_stream == u32::max_value() { + return Err(Error::StreamNotFound(stream_number)); + } + + let pages_in_stream = self.header.pages_needed_to_store(bytes_in_stream as usize); + + // skip the remaining streams' byte counts + let _ = stream_table.take((stream_count - stream_number - 1) as usize * 8)?; + + // skip the preceding streams' page numbers + let _ = stream_table.take(page_numbers_to_skip * 2)?; + + // we're now at the list of pages for our stream + // accumulate them into a PageList + let mut page_list = PageList::new(self.header.page_size); + + for _ in 0..pages_in_stream { + let page_number = stream_table.parse_u16()? as u32; + page_list.push(self.header.validate_page_number(page_number)?); + } + + // truncate to the size of the stream + page_list.truncate(bytes_in_stream as usize); + + // done! + Ok(page_list) + } + } + + impl<'s, S: Source<'s>> Msf<'s, S> for SmallMSF<'s, S> { + fn get(&mut self, stream_number: u32, limit: Option) -> Result> { + // look up the stream + let mut page_list = self.look_up_stream(stream_number)?; + + // apply any limits we have + if let Some(limit) = limit { + page_list.truncate(limit); + } + + // now that we know where this stream lives, we can view it + let view = view(&mut self.source, &page_list)?; + + // pack it into a Stream + let stream = Stream { source_view: view }; + + Ok(stream) + } + } } /// Represents a single Stream within the multi-stream file. @@ -414,8 +607,9 @@ pub fn open_msf<'s, S: Source<'s> + Send + 's>( } if header_matches(header_view.as_slice(), small::MAGIC) { - // sorry - return Err(Error::UnimplementedFeature("small MSF file format")); + // claimed! + let smallmsf = small::SmallMSF::new(source, header_view)?; + return Ok(Box::new(smallmsf)); } Err(Error::UnrecognizedFileFormat) From ebecb2b8e9075d0ebc3273267c9dfc50c240c580 Mon Sep 17 00:00:00 2001 From: Daniel Thaler Date: Fri, 22 Nov 2024 20:49:30 +0100 Subject: [PATCH 19/19] Fixes and improvements for the code review - The From implementation for SourceLanguage used the wrong constants for Rust and Go. - characteristics of the SectionSymbol are of type SectionCharacteristics instead of u32. - The comment for CoffGroupSymbol has been fixed. - CoffGroupSymbol was defined incorrectly: the segment is alreday part of the PdbInternalSectionOffset and should not appear separetely. - A unit test for S_COFFGROUP / CoffGroupSymbol has been added, to verify the handling of the segment - VirtualTableShapeType now contains a Vec of VirtualTableShapeDescriptor, instead of u8. An internal conversion function from u8 to VirtualTableShapeDescriptor was added --- src/symbol/constants.rs | 4 +- src/symbol/mod.rs | 121 ++++++++++++++++++++++++++++------------ src/tpi/data.rs | 41 +++++++++++--- 3 files changed, 119 insertions(+), 47 deletions(-) diff --git a/src/symbol/constants.rs b/src/symbol/constants.rs index 5b6851a..9a02051 100644 --- a/src/symbol/constants.rs +++ b/src/symbol/constants.rs @@ -599,8 +599,8 @@ impl From for SourceLanguage { 0x12 => Self::ObjCXX, 0x13 => Self::Swift, 0x14 => Self::AliasObj, - 0x16 => Self::Rust, - 0x17 => Self::Go, + 0x15 => Self::Rust, + 0x16 => Self::Go, 0x44 => Self::D, _ => Self::Masm, // There is no unknown, so we just force to Masm as the default. } diff --git a/src/symbol/mod.rs b/src/symbol/mod.rs index 28fa16f..e78dfd0 100644 --- a/src/symbol/mod.rs +++ b/src/symbol/mod.rs @@ -12,6 +12,7 @@ use scroll::{ctx::TryFromCtx, Endian, Pread, LE}; use crate::common::*; use crate::msf::*; use crate::FallibleIterator; +use crate::SectionCharacteristics; mod annotations; mod constants; @@ -521,12 +522,19 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let global = matches!(kind, S_PROCREF | S_PROCREF_ST); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_optional_name(&mut buf, kind)?; + let symbol = ProcedureReferenceSymbol { - global: matches!(kind, S_PROCREF | S_PROCREF_ST), - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_optional_name(&mut buf, kind)?, + global, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -557,11 +565,17 @@ impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_optional_name(&mut buf, kind)?; + let symbol = DataReferenceSymbol { - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_optional_name(&mut buf, kind)?, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -592,11 +606,17 @@ impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_symbol_name(&mut buf, kind)?; + let symbol = AnnotationReferenceSymbol { - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_symbol_name(&mut buf, kind)?, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -621,17 +641,23 @@ pub struct TokenReferenceSymbol<'t> { pub name: RawString<'t>, } -impl<'t> TryFromCtx<'t, SymbolKind> for TokenReferenceSymbol <'t> { +impl<'t> TryFromCtx<'t, SymbolKind> for TokenReferenceSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); + let sum_name = buf.parse()?; + let symbol_index = buf.parse()?; + // 1-based module index in the input - presumably 0 means invalid / not present + let module = buf.parse::()?.checked_sub(1).map(usize::from); + let name = parse_symbol_name(&mut buf, kind)?; + let symbol = TokenReferenceSymbol { - sum_name: buf.parse()?, - symbol_index: buf.parse()?, - module: buf.parse::()?.checked_sub(1).map(usize::from), - name: parse_symbol_name(&mut buf, kind)?, + sum_name, + symbol_index, + module, + name, }; Ok((symbol, buf.pos())) @@ -904,11 +930,11 @@ impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { } /// A managed procedure, such as a function or method. -/// +/// /// Symbol kinds: /// - `S_GMANPROC`, `S_GMANPROCIA64` for global procedures /// - `S_LMANPROC`, `S_LMANPROCIA64` for local procedures -/// +/// /// `S_GMANPROCIA64` and `S_LMANPROCIA64` are only mentioned, there is no available source. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ManagedProcedureSymbol<'t> { @@ -1293,7 +1319,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { } /// A managed local variable slot. -/// +/// /// Symbol kind `S_MANSLOT`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ManagedSlotSymbol<'t> { @@ -1658,7 +1684,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SeparatedCodeSymbol { } /// An OEM symbol. -/// +/// /// Symbol kind `S_OEM`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct OemSymbol<'t> { @@ -1667,7 +1693,7 @@ pub struct OemSymbol<'t> { /// Type index. pub type_index: TypeIndex, /// User data with forced 4B-alignment. - /// + /// /// An array of variable size, currently only the first 4B are parsed. pub rgl: u32, } @@ -1699,7 +1725,7 @@ pub struct EnvBlockSymbol<'t> { pub rgsz: Vec>, } -impl<'t> TryFromCtx<'t, SymbolKind> for EnvBlockSymbol <'t> { +impl<'t> TryFromCtx<'t, SymbolKind> for EnvBlockSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { @@ -1737,13 +1763,12 @@ pub struct SectionSymbol<'t> { /// Section's CB. pub cb: u32, /// Section characteristics. - pub characteristics: u32, + pub characteristics: SectionCharacteristics, /// Section name. - pub name: RawString<'t> - + pub name: RawString<'t>, } -impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol <'t> { +impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { @@ -1756,7 +1781,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol <'t> { rva: buf.parse()?, cb: buf.parse()?, characteristics: buf.parse()?, - name: parse_symbol_name(&mut buf, kind)? + name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) @@ -1765,7 +1790,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for SectionSymbol <'t> { /// A COFF section in a PE executable. /// -/// Symbol kind `S_SECTION`. +/// Symbol kind `S_COFFGROUP`. #[derive(Clone, Debug, Eq, PartialEq)] pub struct CoffGroupSymbol<'t> { /// COFF group's CB. @@ -1774,14 +1799,11 @@ pub struct CoffGroupSymbol<'t> { pub characteristics: u32, /// Symbol offset. pub offset: PdbInternalSectionOffset, - /// Symbol segment. - pub segment: u16, /// COFF group name. - pub name: RawString<'t> - + pub name: RawString<'t>, } -impl<'t> TryFromCtx<'t, SymbolKind> for CoffGroupSymbol <'t> { +impl<'t> TryFromCtx<'t, SymbolKind> for CoffGroupSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { @@ -1791,8 +1813,7 @@ impl<'t> TryFromCtx<'t, SymbolKind> for CoffGroupSymbol <'t> { cb: buf.parse()?, characteristics: buf.parse()?, offset: buf.parse()?, - segment: buf.parse()?, - name: parse_symbol_name(&mut buf, kind)? + name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) @@ -2468,6 +2489,32 @@ mod tests { ); } + #[test] + fn kind_1137() { + // 0x1137 is S_COFFGROUP + let data = &[ + 55, 17, 160, 17, 0, 0, 64, 0, 0, 192, 0, 0, 0, 0, 3, 0, 46, 100, 97, 116, 97, 0, + ]; + + let symbol = Symbol { + data, + index: SymbolIndex(0), + }; + assert_eq!(symbol.raw_kind(), 0x1137); + assert_eq!( + symbol.parse().expect("parse"), + SymbolData::CoffGroup(CoffGroupSymbol { + cb: 4512, + characteristics: 0xc000_0040, + offset: PdbInternalSectionOffset { + section: 0x3, + offset: 0 + }, + name: ".data".into(), + }) + ); + } + #[test] fn kind_113c() { let data = &[ diff --git a/src/tpi/data.rs b/src/tpi/data.rs index 59d4684..261dc58 100644 --- a/src/tpi/data.rs +++ b/src/tpi/data.rs @@ -362,9 +362,14 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result> 4); + vtshape + .descriptors + .push(VirtualTableShapeDescriptor::from_u4(desc >> 4)); } } Ok(TypeData::VirtualTableShape(vtshape)) @@ -387,6 +392,7 @@ pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result bool { matches!(self.method_properties(), 0x04 | 0x06) } - + #[inline] pub fn is_pseudo(self) -> bool { self.0 & 0x0020 != 0 } - + #[inline] pub fn noinherit(self) -> bool { self.0 & 0x0040 != 0 } - + #[inline] pub fn noconstruct(self) -> bool { self.0 & 0x0080 != 0 } - + #[inline] pub fn is_compgenx(self) -> bool { self.0 & 0x0100 != 0 } - + #[inline] pub fn sealed(self) -> bool { self.0 & 0x0200 != 0 @@ -913,6 +919,25 @@ pub enum VirtualTableShapeDescriptor { Unused = 0x07, } +impl VirtualTableShapeDescriptor { + // Convert a 4-bit "u4" value in a u8 to a VirtualTableShapeDescriptor + // This is used in the VirtualTableShapeType parsing and is not public + // so we can safely assume that the value is in the correct range. + pub(crate) fn from_u4(val: u8) -> Self { + match val { + 0x00 => VirtualTableShapeDescriptor::Near, + 0x01 => VirtualTableShapeDescriptor::Far, + 0x02 => VirtualTableShapeDescriptor::Thin, + 0x03 => VirtualTableShapeDescriptor::Outer, + 0x04 => VirtualTableShapeDescriptor::Meta, + 0x05 => VirtualTableShapeDescriptor::Near32, + 0x06 => VirtualTableShapeDescriptor::Far32, + 0x07 => VirtualTableShapeDescriptor::Unused, + _ => unreachable!(), + } + } +} + /// The information parsed from a type record with kind /// `LF_CLASS`, `LF_CLASS_ST`, `LF_STRUCTURE`, `LF_STRUCTURE_ST` or `LF_INTERFACE`. // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1631 @@ -1037,7 +1062,7 @@ pub struct VirtualFunctionTablePointerType { /// The information parsed from a type record with kind `LF_VTSHAPE`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct VirtualTableShapeType { - pub descriptors: Vec, + pub descriptors: Vec, } /// The information parsed from a type record with kind `LF_VFTABLE`.