From 3c1c6d8051dc56f8dccc3be333d14c3217029b65 Mon Sep 17 00:00:00 2001 From: Haresh Khanna Date: Mon, 10 Nov 2025 15:19:50 +0000 Subject: [PATCH 1/4] Bump prost to 0.14 --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 6779a22..1e84e97 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,8 +19,8 @@ thiserror = "2" linux-perf-event-reader = "0.10.0" # linux-perf-event-reader = { path = "../linux-perf-event-reader" } linear-map = "1.2.0" -prost = { version = "0.13", default-features = false, features = ["std"] } -prost-derive = "0.13" +prost = { version = "0.14", default-features = false, features = ["std"] } +prost-derive = "0.14" [dev-dependencies] yaxpeax-arch = { version = "0.3", default-features = false } From b10c53d7898b34a537555dbd66e9cd9abc21c8e1 Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Tue, 11 Nov 2025 19:56:24 -0500 Subject: [PATCH 2/4] chore: Release linux-perf-data version 0.12.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1e84e97..fdbadf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "linux-perf-data" -version = "0.11.1" +version = "0.12.0" edition = "2021" license = "MIT OR Apache-2.0" authors = ["Markus Stange "] From 2323e5b5398cf30668b5d5979966fd0d5f85fb89 Mon Sep 17 00:00:00 2001 From: Arthur Pastel Date: Wed, 17 Dec 2025 22:13:28 +0100 Subject: [PATCH 3/4] Support perf pipe data format --- README.md | 45 +++++- examples/perfpipeinfo.rs | 185 ++++++++++++++++++++++++ src/file_reader.rs | 304 +++++++++++++++++++++++++++++++++------ src/header.rs | 28 ++++ src/lib.rs | 27 +++- src/record.rs | 77 +++++++++- 6 files changed, 613 insertions(+), 53 deletions(-) create mode 100644 examples/perfpipeinfo.rs diff --git a/README.md b/README.md index 534d215..432bbd1 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,16 @@ records" from perf / simpleperf. This crate also contains parsing code for jitdump files, which are used in conjunction with perf.data files when profiling JIT runtimes. -# Example +## File Modes + +This crate supports two modes for reading perf.data files: + +- **File mode** (`parse_file`) - For reading regular perf.data files from disk. Requires `Read + Seek`. +- **Pipe mode** (`parse_pipe`) - For streaming perf.data from pipes, stdin, or network streams. Only requires `Read`. + +# Examples + +## File Mode Example ```rust use linux_perf_data::{AttributeDescription, PerfFileReader, PerfFileRecord}; @@ -43,6 +52,40 @@ while let Some(record) = record_iter.next_record(&mut perf_file)? { } ``` +## Pipe Mode Example + +Read perf.data from stdin or a pipe (no seeking required): + +```rust +use linux_perf_data::{PerfFileReader, PerfFileRecord}; + +// Read from stdin +let stdin = std::io::stdin(); +let PerfFileReader { mut perf_file, mut record_iter } = PerfFileReader::parse_pipe(stdin)?; + +println!("Events: {}", perf_file.event_attributes().len()); + +while let Some(record) = record_iter.next_record(&mut perf_file)? { + match record { + PerfFileRecord::EventRecord { attr_index, record } => { + // Process event records + } + PerfFileRecord::UserRecord(record) => { + // Process user records + } + } +} +``` + +**Command-line usage:** +```bash +# Stream directly from perf record +perf record -o - sleep 1 | cargo run --example perfpipeinfo + +# Or pipe an existing file +cat perf.data | cargo run --example perfpipeinfo +``` + ## Jitdump example ```rust diff --git a/examples/perfpipeinfo.rs b/examples/perfpipeinfo.rs new file mode 100644 index 0000000..4e45e16 --- /dev/null +++ b/examples/perfpipeinfo.rs @@ -0,0 +1,185 @@ +//! Example: Read and analyze perf.data from stdin in pipe mode +//! +//! This demonstrates the `parse_pipe()` API which works with streams (Read only). +//! Compare with perfdatainfo.rs which uses `parse_file()` (requires Read + Seek). +//! +//! Usage: +//! # Stream directly from perf record: +//! perf record -o - sleep 1 | cargo run --example perfpipeinfo +//! +//! # Or pipe an existing file: +//! cat perf.data | cargo run --example perfpipeinfo +//! +//! # Or from a network stream: +//! nc server 1234 | cargo run --example perfpipeinfo + +use std::collections::HashMap; + +use linux_perf_data::{PerfFileReader, PerfFileRecord}; +#[allow(unused)] +use linux_perf_event_reader::RecordType; + +fn main() { + let stdin = std::io::stdin(); + let PerfFileReader { + mut perf_file, + mut record_iter, + } = match PerfFileReader::parse_pipe(stdin) { + Ok(reader) => reader, + Err(e) => { + println!("ERROR when creating PerfFileReader: {:?}", e); + return; + } + }; + + if let Ok(Some(arch)) = perf_file.arch() { + println!("Arch: {arch}"); + } + if let Ok(Some(cmdline)) = perf_file.cmdline() { + println!("CmdLine: {cmdline:?}"); + } + if let Ok(Some(cpu_desc)) = perf_file.cpu_desc() { + println!("CPU Desc: {cpu_desc}"); + } + if let Ok(Some(perf_version)) = perf_file.perf_version() { + println!("Perf version: {perf_version}"); + } + if let Ok(Some(frequency)) = perf_file.clock_frequency() { + println!("Clock frequency: {frequency} ns per tick"); + } + if let Ok(Some(clock_data)) = perf_file.clock_data() { + println!("Clock data: {clock_data:?}"); + } + + // Print the feature sections. + let features = perf_file.features(); + let features: String = features + .iter() + .map(|f| format!("{f}")) + .collect::>() + .join(", "); + println!("Features: {features}"); + println!(); + if let Ok(Some(simpleperf_meta_info)) = perf_file.simpleperf_meta_info() { + println!("Simpleperf meta info:"); + for (k, v) in simpleperf_meta_info { + println!(" {k}: {v}"); + } + println!(); + } + if let Ok(Some(simpleperf_file_symbols)) = perf_file.simpleperf_symbol_tables() { + println!("Simpleperf symbol tables for the following files:"); + for f in &simpleperf_file_symbols { + println!(" - {}", f.path); + // println!("{f:#?}"); + } + println!(); + } + + // for event in perf_file.event_attributes() { + // println!("Event: {event:#?}"); + // } + + let mut event_record_map = HashMap::new(); + let mut user_record_map = HashMap::new(); + + while let Some(record) = record_iter.next_record(&mut perf_file).unwrap() { + match record { + PerfFileRecord::EventRecord { attr_index, record } => { + let record_type = record.record_type; + *event_record_map + .entry(attr_index) + .or_insert_with(HashMap::new) + .entry(record_type) + .or_insert(0) += 1; + match record.parse() { + Ok(parsed_record) => { + // let is_interesting = matches!(record_type, RecordType::FORK | RecordType::COMM | RecordType::MMAP| RecordType::MMAP2); + let is_interesting = false; + if !is_interesting { + continue; + } + + if let Some(timestamp) = + record.common_data().ok().and_then(|cd| cd.timestamp) + { + println!( + "{:?} at {} for event {}: {:?}", + record_type, timestamp, attr_index, parsed_record + ); + } else { + println!( + "{:?} for event {}: {:?}", + record_type, attr_index, parsed_record + ); + } + } + Err(e) => { + println!( + "ERROR when parsing {:?} for event {}: {:?}", + record_type, attr_index, e + ); + } + } + } + PerfFileRecord::UserRecord(record) => { + let record_type = record.record_type; + *user_record_map.entry(record_type).or_insert(0) += 1; + match record.parse() { + Ok(_parsed_record) => { + // println!("{:?}: {:?}", record_type, parsed_record); + } + Err(e) => { + println!("ERROR when parsing {:?}: {:?}", record_type, e); + } + } + } + } + } + + let mut event_record_map = event_record_map + .into_iter() + .map(|(attr_index, histogram)| { + let sum = histogram.values().sum::(); + (attr_index, histogram, sum) + }) + .collect::>(); + event_record_map.sort_by_key(|(_attr_index, _histogram, sum)| -(*sum as i64)); + let sum = event_record_map + .iter() + .map(|(_attr_index, _histogram, sum)| sum) + .sum::(); + + println!("Event records: {sum} records"); + println!(); + + for (attr_index, record_counts, sum) in event_record_map { + let mut record_counts = record_counts.into_iter().collect::>(); + record_counts.sort_by_key(|(_record_type, count)| -(*count as i64)); + println!( + " event {} ({}): {} records", + attr_index, + perf_file.event_attributes()[attr_index] + .name() + .unwrap_or(""), + sum + ); + for (record_type, count) in record_counts { + println!(" {:?}: {}", record_type, count); + } + println!(); + } + + let mut user_record_counts = user_record_map.into_iter().collect::>(); + user_record_counts.sort_by_key(|(_record_type, count)| -(*count as i64)); + let sum = user_record_counts + .iter() + .map(|(_record_type, count)| count) + .sum::(); + + println!("User records: {sum} records"); + println!(); + for (record_type, count) in user_record_counts { + println!(" {:?}: {}", record_type, count); + } +} diff --git a/src/file_reader.rs b/src/file_reader.rs index 72f6179..5880760 100644 --- a/src/file_reader.rs +++ b/src/file_reader.rs @@ -12,9 +12,9 @@ use std::io::{Cursor, Read, Seek, SeekFrom}; use super::error::{Error, ReadError}; use super::feature_sections::AttributeDescription; use super::features::Feature; -use super::header::PerfHeader; +use super::header::{PerfHeader, PerfPipeHeader}; use super::perf_file::PerfFile; -use super::record::{PerfFileRecord, RawUserRecord, UserRecordType}; +use super::record::{HeaderAttr, HeaderFeature, PerfFileRecord, RawUserRecord, UserRecordType}; use super::section::PerfFileSection; use super::simpleperf; use super::sorter::Sorter; @@ -191,10 +191,181 @@ impl PerfFileReader { parse_infos, event_id_to_attr_index, read_offset: 0, - record_data_len: header.data_section.size, + record_data_len: Some(header.data_section.size), sorter: Sorter::new(), buffers_for_recycling: VecDeque::new(), current_event_body: Vec::new(), + pending_first_record: None, + }; + + Ok(Self { + perf_file, + record_iter, + }) + } +} + +impl PerfFileReader { + /// Parse a perf.data file in pipe mode (streaming format). + /// + /// Pipe mode is designed for streaming and does not require seeking. + /// Metadata (attributes and features) is embedded in the stream as + /// synthesized records (PERF_RECORD_HEADER_ATTR, PERF_RECORD_HEADER_FEATURE). + pub fn parse_pipe(mut reader: R) -> Result { + let pipe_header = PerfPipeHeader::parse(&mut reader)?; + match &pipe_header.magic { + b"PERFILE2" => Self::parse_pipe_impl::(reader, Endianness::LittleEndian), + b"2ELIFREP" => Self::parse_pipe_impl::(reader, Endianness::BigEndian), + _ => Err(Error::UnrecognizedMagicValue(pipe_header.magic)), + } + } + + fn parse_pipe_impl(mut reader: R, endian: Endianness) -> Result { + let mut attributes = Vec::new(); + let mut feature_sections = LinearMap::new(); + let mut pending_first_record: Option<(PerfEventHeader, Vec)> = None; + + // Read records from the stream until we hit a non-metadata record or EOF + loop { + let header = match PerfEventHeader::parse::<_, T>(&mut reader) { + Ok(header) => header, + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + // Stream ended with only metadata records - this is valid + break; + } + Err(e) => return Err(e.into()), + }; + + let size = header.size as usize; + if size < PerfEventHeader::STRUCT_SIZE { + return Err(Error::InvalidPerfEventSize); + } + + let event_body_len = size - PerfEventHeader::STRUCT_SIZE; + let mut buffer = vec![0; event_body_len]; + match reader.read_exact(&mut buffer) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => { + // Incomplete record at end of stream + return Err(e.into()); + } + Err(e) => return Err(e.into()), + } + + let record_type = RecordType(header.type_); + + match UserRecordType::try_from(record_type) { + Some(UserRecordType::PERF_HEADER_ATTR) => { + let data = RawData::from(&buffer[..]); + let header_attr = HeaderAttr::parse::(data)?; + + attributes.push(AttributeDescription { + attr: header_attr.attr, + name: None, + event_ids: header_attr.ids, + }); + } + Some(UserRecordType::PERF_HEADER_FEATURE) => { + let data = RawData::from(&buffer[..]); + let header_feature = HeaderFeature::parse::(data)?; + + feature_sections.insert(header_feature.feature, header_feature.data); + } + _ => { + // Not a metadata record - this is the first real event + pending_first_record = Some((header, buffer)); + break; + } + } + } + + if attributes.is_empty() { + return Err(Error::NoAttributes); + } + + if let Some(event_desc_data) = feature_sections.get(&Feature::EVENT_DESC) { + let event_desc_attrs = AttributeDescription::parse_event_desc_section::<_, T>( + Cursor::new(&event_desc_data[..]), + )?; + + // Match attributes by event IDs and update names + for attr in attributes.iter_mut() { + // Find matching event in EVENT_DESC by comparing event IDs + if let Some(event_desc_attr) = event_desc_attrs.iter().find(|desc| { + !desc.event_ids.is_empty() + && !attr.event_ids.is_empty() + && desc.event_ids[0] == attr.event_ids[0] + }) { + attr.name = event_desc_attr.name.clone(); + } + } + } + + let mut event_id_to_attr_index = HashMap::new(); + for (attr_index, AttributeDescription { event_ids, .. }) in attributes.iter().enumerate() { + for event_id in event_ids { + event_id_to_attr_index.insert(*event_id, attr_index); + } + } + + let parse_infos: Vec<_> = attributes + .iter() + .map(|attr| RecordParseInfo::new(&attr.attr, endian)) + .collect(); + + let first_attr = attributes.first().ok_or(Error::NoAttributes)?; + let first_has_sample_id_all = first_attr.attr.flags.contains(AttrFlags::SAMPLE_ID_ALL); + let (first_parse_info, remaining_parse_infos) = parse_infos.split_first().unwrap(); + + let id_parse_infos = if remaining_parse_infos.is_empty() { + IdParseInfos::OnlyOneEvent + } else if remaining_parse_infos + .iter() + .all(|parse_info| parse_info.id_parse_info == first_parse_info.id_parse_info) + { + IdParseInfos::Same(first_parse_info.id_parse_info) + } else { + for (attr_index, AttributeDescription { attr, .. }) in attributes.iter().enumerate() { + if !attr.sample_format.contains(SampleFormat::IDENTIFIER) { + return Err(Error::NoIdentifierDespiteMultiEvent(attr_index)); + } + if attr.flags.contains(AttrFlags::SAMPLE_ID_ALL) != first_has_sample_id_all { + return Err(Error::InconsistentSampleIdAllWithMultiEvent(attr_index)); + } + } + IdParseInfos::PerAttribute(first_has_sample_id_all) + }; + + // Infer features from the feature_sections we collected + let mut features_array = [0u64; 4]; + for feature in feature_sections.keys() { + let feature_bit = feature.0; + if feature_bit < 256 { + let chunk_index = (feature_bit / 64) as usize; + let bit_in_chunk = feature_bit % 64; + features_array[chunk_index] |= 1u64 << bit_in_chunk; + } + } + + let perf_file = PerfFile { + endian, + features: super::features::FeatureSet(features_array), + feature_sections, + attributes, + }; + + let record_iter = PerfRecordIter { + reader, + endian, + id_parse_infos, + parse_infos, + event_id_to_attr_index, + read_offset: 0, + record_data_len: None, // Unbounded for pipes + sorter: Sorter::new(), + buffers_for_recycling: VecDeque::new(), + current_event_body: Vec::new(), + pending_first_record, }; Ok(Self { @@ -209,7 +380,8 @@ pub struct PerfRecordIter { reader: R, endian: Endianness, read_offset: u64, - record_data_len: u64, + /// None for pipe mode + record_data_len: Option, current_event_body: Vec, id_parse_infos: IdParseInfos, /// Guaranteed to have at least one element @@ -217,6 +389,8 @@ pub struct PerfRecordIter { event_id_to_attr_index: HashMap, sorter: Sorter, buffers_for_recycling: VecDeque>, + /// For pipe mode: the first non-metadata record that was read during initialization + pending_first_record: Option<(PerfEventHeader, Vec)>, } impl PerfRecordIter { @@ -253,9 +427,32 @@ impl PerfRecordIter { /// Reads events into self.sorter until a FINISHED_ROUND record is found /// and self.sorter is non-empty, or until we've run out of records to read. fn read_next_round_impl(&mut self) -> Result<(), Error> { - while self.read_offset < self.record_data_len { + // Handle pending first record from pipe mode initialization + if let Some((pending_header, pending_buffer)) = self.pending_first_record.take() { + self.process_record::(pending_header, pending_buffer, self.read_offset)?; + self.read_offset += u64::from(pending_header.size); + } + + while self + .record_data_len + .is_none_or(|len| self.read_offset < len) + { let offset = self.read_offset; - let header = PerfEventHeader::parse::<_, T>(&mut self.reader)?; + + // Try to parse the next header. For pipe mode (unbounded), EOF is normal termination. + let header = match PerfEventHeader::parse::<_, T>(&mut self.reader) { + Ok(header) => header, + Err(e) => { + // For pipe mode with unbounded length, EOF just means end of stream + if self.record_data_len.is_none() + && e.kind() == std::io::ErrorKind::UnexpectedEof + { + break; + } + return Err(e.into()); + } + }; + let size = header.size as usize; if size < PerfEventHeader::STRUCT_SIZE { return Err(Error::InvalidPerfEventSize); @@ -279,45 +476,22 @@ impl PerfRecordIter { let event_body_len = size - PerfEventHeader::STRUCT_SIZE; let mut buffer = self.buffers_for_recycling.pop_front().unwrap_or_default(); buffer.resize(event_body_len, 0); - self.reader - .read_exact(&mut buffer) - .map_err(|_| ReadError::PerfEventData)?; - - let data = RawData::from(&buffer[..]); - let record_type = RecordType(header.type_); - let (attr_index, timestamp) = if record_type.is_builtin_type() { - let attr_index = match &self.id_parse_infos { - IdParseInfos::OnlyOneEvent => 0, - IdParseInfos::Same(id_parse_info) => { - get_record_id::(record_type, data, id_parse_info) - .and_then(|id| self.event_id_to_attr_index.get(&id).cloned()) - .unwrap_or(0) - } - IdParseInfos::PerAttribute(sample_id_all) => { - // We have IDENTIFIER (guaranteed by PerAttribute). - get_record_identifier::(record_type, data, *sample_id_all) - .and_then(|id| self.event_id_to_attr_index.get(&id).cloned()) - .unwrap_or(0) + // Try to read the event body. For pipe mode, EOF here also means end of stream. + match self.reader.read_exact(&mut buffer) { + Ok(()) => {} + Err(e) => { + // For pipe mode with unbounded length, EOF just means end of stream + if self.record_data_len.is_none() + && e.kind() == std::io::ErrorKind::UnexpectedEof + { + break; } - }; - let parse_info = self.parse_infos[attr_index]; - let timestamp = get_record_timestamp::(record_type, data, &parse_info); - (Some(attr_index), timestamp) - } else { - // user type - (None, None) - }; + return Err(ReadError::PerfEventData.into()); + } + } - let sort_key = RecordSortKey { timestamp, offset }; - let misc = header.misc; - let pending_record = PendingRecord { - record_type, - misc, - buffer, - attr_index, - }; - self.sorter.insert_unordered(sort_key, pending_record); + self.process_record::(header, buffer, offset)?; } // Everything has been read. @@ -326,6 +500,52 @@ impl PerfRecordIter { Ok(()) } + /// Process a single record and add it to the sorter + fn process_record( + &mut self, + header: PerfEventHeader, + buffer: Vec, + offset: u64, + ) -> Result<(), Error> { + let data = RawData::from(&buffer[..]); + let record_type = RecordType(header.type_); + + let (attr_index, timestamp) = if record_type.is_builtin_type() { + let attr_index = match &self.id_parse_infos { + IdParseInfos::OnlyOneEvent => 0, + IdParseInfos::Same(id_parse_info) => { + get_record_id::(record_type, data, id_parse_info) + .and_then(|id| self.event_id_to_attr_index.get(&id).cloned()) + .unwrap_or(0) + } + IdParseInfos::PerAttribute(sample_id_all) => { + // We have IDENTIFIER (guaranteed by PerAttribute). + get_record_identifier::(record_type, data, *sample_id_all) + .and_then(|id| self.event_id_to_attr_index.get(&id).cloned()) + .unwrap_or(0) + } + }; + let parse_info = self.parse_infos[attr_index]; + let timestamp = get_record_timestamp::(record_type, data, &parse_info); + (Some(attr_index), timestamp) + } else { + // user type + (None, None) + }; + + let sort_key = RecordSortKey { timestamp, offset }; + let misc = header.misc; + let pending_record = PendingRecord { + record_type, + misc, + buffer, + attr_index, + }; + self.sorter.insert_unordered(sort_key, pending_record); + + Ok(()) + } + /// Converts pending_record into an RawRecord which references the data in self.current_event_body. fn convert_pending_record(&mut self, pending_record: PendingRecord) -> PerfFileRecord<'_> { let PendingRecord { diff --git a/src/header.rs b/src/header.rs index 7552086..9000e4d 100644 --- a/src/header.rs +++ b/src/header.rs @@ -65,3 +65,31 @@ impl PerfHeader { }) } } + +/// `perf_pipe_file_header` +/// +/// A minimal header used in pipe mode to avoid seeking. +/// In pipe mode, metadata is embedded in the stream via synthesized events +/// (PERF_RECORD_HEADER_ATTR, PERF_RECORD_HEADER_FEATURE) instead of using +/// file sections. +#[derive(Debug, Clone, Copy)] +pub struct PerfPipeHeader { + pub magic: [u8; 8], + /// size of the header (should be 16) + #[allow(dead_code)] + pub size: u64, +} + +impl PerfPipeHeader { + pub fn parse(mut reader: R) -> Result { + let mut magic = [0; 8]; + reader.read_exact(&mut magic)?; + + let size = if magic[0] == b'P' { + reader.read_u64::()? + } else { + reader.read_u64::()? + }; + Ok(Self { magic, size }) + } +} diff --git a/src/lib.rs b/src/lib.rs index d52f944..fc95d4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,9 +10,11 @@ //! The [`jitdump`] module lets you parse jitdump files, which are used in //! conjunction with perf.data files when profiling JIT runtimes. //! -//! # Example +//! # File Mode Example //! -//! ``` +//! Parse a perf.data file from disk (requires `Seek`): +//! +//! ```no_run //! use linux_perf_data::{AttributeDescription, PerfFileReader, PerfFileRecord}; //! //! # fn wrapper() -> Result<(), linux_perf_data::Error> { @@ -40,6 +42,25 @@ //! # Ok(()) //! # } //! ``` +//! +//! # Pipe Mode Example +//! +//! Parse perf.data from a stream (e.g., stdin, pipe - requires only `Read`): +//! +//! ```no_run +//! use linux_perf_data::{PerfFileReader, PerfFileRecord}; +//! +//! # fn wrapper() -> Result<(), linux_perf_data::Error> { +//! let stdin = std::io::stdin(); +//! let PerfFileReader { mut perf_file, mut record_iter } = PerfFileReader::parse_pipe(stdin)?; +//! +//! while let Some(record) = record_iter.next_record(&mut perf_file)? { +//! // Process records... +//! # let _ = record; +//! } +//! # Ok(()) +//! # } +//! ``` mod build_id_event; mod constants; @@ -74,7 +95,7 @@ pub use feature_sections::{AttributeDescription, NrCpus, SampleTimeRange}; pub use features::{Feature, FeatureSet, FeatureSetIter}; pub use file_reader::{PerfFileReader, PerfRecordIter}; pub use perf_file::PerfFile; -pub use record::{PerfFileRecord, RawUserRecord, UserRecord, UserRecordType}; +pub use record::{HeaderAttr, HeaderFeature, PerfFileRecord, RawUserRecord, UserRecord, UserRecordType}; pub use simpleperf::{ simpleperf_dso_type, SimpleperfDexFileInfo, SimpleperfElfFileInfo, SimpleperfFileRecord, SimpleperfKernelModuleInfo, SimpleperfSymbol, SimpleperfTypeSpecificInfo, diff --git a/src/record.rs b/src/record.rs index eb9cf47..5c3a587 100644 --- a/src/record.rs +++ b/src/record.rs @@ -1,8 +1,9 @@ -use byteorder::{BigEndian, ByteOrder, LittleEndian}; +use byteorder::{BigEndian, ByteOrder, LittleEndian, ReadBytesExt}; use linux_perf_event_reader::RawEventRecord; -use linux_perf_event_reader::{Endianness, RawData, RecordType}; +use linux_perf_event_reader::{Endianness, PerfEventAttr, RawData, RecordType}; use crate::constants::*; +use crate::features::Feature; use crate::thread_map::ThreadMap; /// A record from a perf.data file's data stream. @@ -26,6 +27,8 @@ pub enum PerfFileRecord<'a> { #[non_exhaustive] pub enum UserRecord<'a> { ThreadMap(ThreadMap<'a>), + HeaderAttr(HeaderAttr), + HeaderFeature(HeaderFeature), Raw(RawUserRecord<'a>), } @@ -141,7 +144,15 @@ impl<'a> RawUserRecord<'a> { let record_type = self.record_type; let record = match record_type { - // UserRecordType::PERF_HEADER_ATTR => {}, + UserRecordType::PERF_HEADER_ATTR => { + UserRecord::HeaderAttr(HeaderAttr::parse::(self.data)?) + } + UserRecordType::PERF_THREAD_MAP => { + UserRecord::ThreadMap(ThreadMap::parse::(self.data)?) + } + UserRecordType::PERF_HEADER_FEATURE => { + UserRecord::HeaderFeature(HeaderFeature::parse::(self.data)?) + } // UserRecordType::PERF_HEADER_EVENT_TYPE => {}, // UserRecordType::PERF_HEADER_TRACING_DATA => {}, // UserRecordType::PERF_HEADER_BUILD_ID => {}, @@ -150,16 +161,12 @@ impl<'a> RawUserRecord<'a> { // UserRecordType::PERF_AUXTRACE_INFO => {}, // UserRecordType::PERF_AUXTRACE => {}, // UserRecordType::PERF_AUXTRACE_ERROR => {}, - UserRecordType::PERF_THREAD_MAP => { - UserRecord::ThreadMap(ThreadMap::parse::(self.data)?) - } // UserRecordType::PERF_CPU_MAP => {}, // UserRecordType::PERF_STAT_CONFIG => {}, // UserRecordType::PERF_STAT => {}, // UserRecordType::PERF_STAT_ROUND => {}, // UserRecordType::PERF_EVENT_UPDATE => {}, // UserRecordType::PERF_TIME_CONV => {}, - // UserRecordType::PERF_HEADER_FEATURE => {}, // UserRecordType::PERF_COMPRESSED => {}, // UserRecordType::SIMPLEPERF_KERNEL_SYMBOL => {}, // UserRecordType::SIMPLEPERF_DSO => {}, @@ -175,3 +182,59 @@ impl<'a> RawUserRecord<'a> { Ok(record) } } + +/// PERF_RECORD_HEADER_ATTR - Contains event attribute and associated event IDs +/// +/// Used in pipe mode to transmit event attribute information that would +/// otherwise be in the attrs section of a regular perf.data file. +#[derive(Debug, Clone)] +pub struct HeaderAttr { + pub attr: PerfEventAttr, + pub ids: Vec, +} + +impl HeaderAttr { + pub fn parse(data: RawData) -> Result { + let mut cursor = std::io::Cursor::new(data.as_slice()); + + // Parse the perf_event_attr + let (attr, _attr_size) = PerfEventAttr::parse::<_, T>(&mut cursor)?; + + // Remaining data is the array of event IDs + let mut ids = Vec::new(); + while cursor.position() < data.len() as u64 { + ids.push(cursor.read_u64::()?); + } + + Ok(Self { attr, ids }) + } +} + +/// PERF_RECORD_HEADER_FEATURE - Contains feature section data +/// +/// Used in pipe mode to transmit feature data that would otherwise be in +/// the feature sections at the end of a regular perf.data file. +#[derive(Debug, Clone)] +pub struct HeaderFeature { + pub feature: Feature, + pub data: Vec, +} + +impl HeaderFeature { + pub fn parse(data: RawData) -> Result { + let mut cursor = std::io::Cursor::new(data.as_slice()); + + // First 8 bytes is the feature type + let feature_type = cursor.read_u64::()? as u32; + let feature = Feature(feature_type); + + // Remaining data is the feature data itself + let start_pos = cursor.position() as usize; + let feature_data = data.as_slice()[start_pos..].to_vec(); + + Ok(Self { + feature, + data: feature_data, + }) + } +} From 9729e913fa5a2f412ed060a85e44c5ed50a5f94b Mon Sep 17 00:00:00 2001 From: Arthur Pastel Date: Sat, 20 Dec 2025 00:11:23 +0100 Subject: [PATCH 4/4] Allow using read_file to consume pipe mode data --- src/error.rs | 6 ++++++ src/file_reader.rs | 10 +++++++++- src/header.rs | 24 ++++++++++++++++++------ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/error.rs b/src/error.rs index 74e980e..12baee2 100644 --- a/src/error.rs +++ b/src/error.rs @@ -63,6 +63,12 @@ pub enum Error { #[error("The specified size in the perf event header was smaller than the header itself")] InvalidPerfEventSize, + + #[error("Cannot parse non-streaming perf.data file with parse_pipe. Use parse_file instead.")] + FileFormatDetectedInPipeMode, + + #[error("Detected pipe format in file mode")] + PipeFormatDetectedInFileMode, } impl From for Error { diff --git a/src/file_reader.rs b/src/file_reader.rs index 5880760..279ff95 100644 --- a/src/file_reader.rs +++ b/src/file_reader.rs @@ -58,7 +58,15 @@ pub struct PerfFileReader { impl PerfFileReader { pub fn parse_file(mut cursor: C) -> Result { - let header = PerfHeader::parse(&mut cursor)?; + let header = match PerfHeader::parse(&mut cursor) { + Ok(header) => header, + Err(Error::PipeFormatDetectedInFileMode) => { + // Rewind and parse as pipe format instead + cursor.seek(SeekFrom::Start(0))?; + return Self::parse_pipe(cursor); + } + Err(e) => return Err(e), + }; match &header.magic { b"PERFILE2" => { Self::parse_file_impl::(cursor, header, Endianness::LittleEndian) diff --git a/src/header.rs b/src/header.rs index 9000e4d..1849746 100644 --- a/src/header.rs +++ b/src/header.rs @@ -2,6 +2,7 @@ use std::io::Read; use byteorder::{ByteOrder, ReadBytesExt}; +use super::error::Error; use super::features::FeatureSet; use super::section::PerfFileSection; @@ -28,7 +29,7 @@ pub struct PerfHeader { } impl PerfHeader { - pub fn parse(mut reader: R) -> Result { + pub fn parse(mut reader: R) -> Result { let mut magic = [0; 8]; reader.read_exact(&mut magic)?; @@ -39,11 +40,14 @@ impl PerfHeader { } } - fn parse_impl( - mut reader: R, - magic: [u8; 8], - ) -> Result { + fn parse_impl(mut reader: R, magic: [u8; 8]) -> Result { let header_size = reader.read_u64::()?; + + // Detect if this is actually a pipe format instead of file format. + if header_size == std::mem::size_of::() as u64 { + return Err(Error::PipeFormatDetectedInFileMode); + } + let attr_size = reader.read_u64::()?; let attr_section = PerfFileSection::parse::<_, T>(&mut reader)?; let data_section = PerfFileSection::parse::<_, T>(&mut reader)?; @@ -81,7 +85,7 @@ pub struct PerfPipeHeader { } impl PerfPipeHeader { - pub fn parse(mut reader: R) -> Result { + pub fn parse(mut reader: R) -> Result { let mut magic = [0; 8]; reader.read_exact(&mut magic)?; @@ -90,6 +94,14 @@ impl PerfPipeHeader { } else { reader.read_u64::()? }; + + // Detect if this is actually a file format instead of pipe format. + if size > std::mem::size_of::() as u64 + && size == std::mem::size_of::() as u64 + { + return Err(Error::FileFormatDetectedInPipeMode); + } + Ok(Self { magic, size }) } }