diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4de6a72b..29d89345 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,7 +44,7 @@ jobs: rustc --version cargo --version - name: 'Build RRG executable' - run: cargo build --features 'action-get_file_contents_kmx action-get_filesystem_timeline_tsk' + run: cargo build --features 'action-get_file_contents_kmx action-get_file_metadata_kmx action-get_filesystem_timeline_tsk' # TODO: Add a step that runs tests with all action features disabled. - name: 'Run RRG tests' - run: cargo test --features 'test-chattr test-setfattr test-fuse test-wtmp test-libguestfs action-get_file_contents_kmx action-get_filesystem_timeline_tsk' + run: cargo test --features 'test-chattr test-setfattr test-fuse test-wtmp test-libguestfs action-get_file_contents_kmx action-get_file_metadata_kmx action-get_filesystem_timeline_tsk' diff --git a/crates/rrg-proto/build.rs b/crates/rrg-proto/build.rs index 2de8f443..e518233f 100644 --- a/crates/rrg-proto/build.rs +++ b/crates/rrg-proto/build.rs @@ -19,6 +19,7 @@ const PROTOS: &'static [&'static str] = &[ "../../proto/rrg/action/get_file_contents_kmx.proto", "../../proto/rrg/action/get_file_sha256.proto", "../../proto/rrg/action/get_file_metadata.proto", + "../../proto/rrg/action/get_file_metadata_kmx.proto", "../../proto/rrg/action/get_filesystem_timeline.proto", "../../proto/rrg/action/get_filesystem_timeline_tsk.proto", "../../proto/rrg/action/get_system_metadata.proto", diff --git a/crates/rrg/Cargo.toml b/crates/rrg/Cargo.toml index 30805f60..6d3c6470 100644 --- a/crates/rrg/Cargo.toml +++ b/crates/rrg/Cargo.toml @@ -34,6 +34,7 @@ action-get_file_metadata = [] action-get_file_metadata-md5 = ["action-get_file_metadata", "dep:md-5"] action-get_file_metadata-sha1 = ["action-get_file_metadata", "dep:sha1"] action-get_file_metadata-sha256 = ["action-get_file_metadata", "dep:sha2"] +action-get_file_metadata_kmx = ["dep:keramics-core", "dep:keramics-datetime", "dep:keramics-formats", "dep:keramics-types", "dep:keramics-vfs"] action-get_file_contents = ["dep:sha2"] action-get_file_contents_kmx = ["dep:keramics-core", "dep:keramics-formats", "dep:keramics-types"] action-get_file_sha256 = ["dep:sha2"] @@ -120,6 +121,10 @@ optional = true version = "0.0.0" optional = true +[dependencies.keramics-datetime] +version = "0.0.0" +optional = true + [dependencies.keramics-formats] version = "0.0.0" optional = true @@ -128,6 +133,10 @@ optional = true version = "0.0.0" optional = true +[dependencies.keramics-vfs] +version = "0.0.0" +optional = true + [dependencies.md-5] version = "0.10.6" optional = true diff --git a/crates/rrg/src/action.rs b/crates/rrg/src/action.rs index 8ef0c59f..2094b55c 100644 --- a/crates/rrg/src/action.rs +++ b/crates/rrg/src/action.rs @@ -21,6 +21,9 @@ pub mod get_system_metadata; #[cfg(feature = "action-get_file_metadata")] pub mod get_file_metadata; +#[cfg(feature = "action-get_file_metadata_kmx")] +pub mod get_file_metadata_kmx; + #[cfg(feature = "action-get_file_contents")] pub mod get_file_contents; @@ -109,6 +112,10 @@ where GetFileMetadata => { handle(session, request, self::get_file_metadata::handle) } + #[cfg(feature = "action-get_file_metadata_kmx")] + GetFileMetadataKmx => { + handle(session, request, self::get_file_metadata_kmx::handle) + } #[cfg(feature = "action-get_file_contents")] GetFileContents => { handle(session, request, self::get_file_contents::handle) diff --git a/crates/rrg/src/action/get_file_metadata_kmx.rs b/crates/rrg/src/action/get_file_metadata_kmx.rs new file mode 100644 index 00000000..3bc0f49e --- /dev/null +++ b/crates/rrg/src/action/get_file_metadata_kmx.rs @@ -0,0 +1,469 @@ +// Copyright 2025 Google LLC +// +// Use of this source code is governed by an MIT-style license that can be found +// in the LICENSE file or at https://opensource.org/licenses/MIT. + +/// Arguments of the `get_file_metadata_kmx` action. +pub struct Args { + volume_path: Option, + path: keramics_formats::ntfs::NtfsPath, +} + +/// Result of the `get_file_metadata_kmx` action. +pub struct Item { + path: keramics_formats::ntfs::NtfsPath, + len: u64, +} + +/// Handles invocations of the `get_file_metadata_kmx` action. +pub fn handle(session: &mut S, args: Args) -> crate::session::Result<()> +where + S: crate::session::Session, +{ + // TODO: Add support for inferring the volume from path. + let Some(volume_path) = args.volume_path else { + return Err(crate::session::Error::action(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "volume path must be provided", + ))); + }; + + log::debug!("opening NTFS volume at '{}'", volume_path.display()); + + let volume = std::fs::File::open(&volume_path) + .map_err(|error| crate::session::Error::action(error))?; + let volume_data_stream: keramics_core::DataStreamReference = { + std::sync::Arc::new(std::sync::RwLock::new(volume)) + }; + + log::debug!("parsing NTFS volume at '{}'", volume_path.display()); + + let mut ntfs = keramics_formats::ntfs::NtfsFileSystem::new(); + ntfs.read_data_stream(&volume_data_stream) + .map_err(|error| crate::session::Error::action(error))?; + + log::debug!("collecting metadata for '{:?}'", args.path); + + let file_entry = match ntfs.get_file_entry_by_path(&args.path) { + Ok(Some(file_entry)) => file_entry, + Ok(None) => { + log::error! { + "no metadata for '{:?}'", + args.path, + }; + return Ok(()) + } + Err(error) => { + log::error! { + "failed to collect metadata for '{:?}': {error}", + args.path, + }; + return Ok(()) + } + }; + + log::debug!("sending metadata for '{:?}'", args.path); + + session.reply(Item { + path: args.path, + len: file_entry.get_size(), + })?; + + Ok(()) +} + +impl crate::request::Args for Args { + + type Proto = rrg_proto::get_file_metadata_kmx::Args; + + fn from_proto(proto: Self::Proto) -> Result { + use crate::request::ParseArgsError; + + // TODO: Do not go through UTF-8 conversion. + let path = str::from_utf8(proto.path().raw_bytes()) + .map_err(|error| ParseArgsError::invalid_field("path", error))?; + let path = keramics_formats::ntfs::NtfsPath::from(path); + + Ok(Args { + volume_path: None, + path, + }) + } +} + +impl crate::response::Item for Item { + + type Proto = rrg_proto::get_file_metadata_kmx::Result; + + fn into_proto(self) -> Self::Proto { + // TODO: Use lossless conversion (preferably in Keramics directly). + let path = std::path::PathBuf::from_iter( + self.path.components.iter() + .map(|comp| String::from_utf16_lossy(&comp.elements)) + ); + + let mut proto = rrg_proto::get_file_metadata_kmx::Result::new(); + proto.set_path(path.into()); + proto.mut_metadata().set_size(self.len); + + proto + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn handle_non_existent() { + let ntfs_file = ntfs_temp_file(|_| Ok(())) + .unwrap(); + + let args = Args { + volume_path: Some(ntfs_file.path().to_path_buf()), + path: keramics_formats::ntfs::NtfsPath::from("\\idonotexist"), + }; + + let mut session = crate::session::FakeSession::new(); + assert!(handle(&mut session, args).is_ok()); + + assert_eq!(session.reply_count(), 0); + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn handle_regular_file() { + let ntfs_file = ntfs_temp_file(|ntfs_path| { + std::fs::write(ntfs_path.join("foo"), b"Lorem ipsum.")?; + + Ok(()) + }).unwrap(); + + let args = Args { + volume_path: Some(ntfs_file.path().to_path_buf()), + path: keramics_formats::ntfs::NtfsPath::from("\\foo"), + }; + + let mut session = crate::session::FakeSession::new(); + assert!(handle(&mut session, args).is_ok()); + + assert_eq!(session.reply_count(), 1); + + let item = session.reply::(0); + assert_eq!(item.path, keramics_formats::ntfs::NtfsPath::from("\\foo")); + assert_eq!(item.len, b"Lorem ipsum.".len() as u64); + // TODO: Add assertions about the file type. + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn handle_dir() { + let ntfs_file = ntfs_temp_file(|ntfs_path| { + std::fs::create_dir(ntfs_path.join("foo"))?; + + Ok(()) + }).unwrap(); + + let args = Args { + volume_path: Some(ntfs_file.path().to_path_buf()), + path: keramics_formats::ntfs::NtfsPath::from("\\foo"), + }; + + let mut session = crate::session::FakeSession::new(); + assert!(handle(&mut session, args).is_ok()); + + assert_eq!(session.reply_count(), 1); + + let item = session.reply::(0); + assert_eq!(item.path, keramics_formats::ntfs::NtfsPath::from("\\foo")); + // TODO: Add assertions about the file type. + } + + fn ntfs_temp_file(init: F) -> std::io::Result + where + F: FnOnce(&std::path::Path) -> std::io::Result<()>, + { + use std::io::Write as _; + + let mut file = tempfile::NamedTempFile::new()?; + // We initialize the file to have 2 MiB. Minimum size of NTFS image is + // 1 MiB, so we use 2 MiB just to be on the safe side. + file.write_all(&vec![0; 2 * 1024 * 1024])?; + file.flush()?; + + let output = std::process::Command::new("mkfs.ntfs") + .arg("--force") + .arg(file.path()) + .output()?; + if !output.status.success() { + return Err(std::io::Error::new(std::io::ErrorKind::Other, format! { + "failed to run `mkfs.ntfs` (stdout: {:?}, stderr: {:?})", + String::from_utf8_lossy(&output.stdout).as_ref(), + String::from_utf8_lossy(&output.stderr).as_ref(), + })) + } + + let mountpoint = tempfile::tempdir()?; + + let mount = GuestMount::new(file.path(), mountpoint.path())?; + init(mountpoint.path())?; + mount.unmount()?; + + Ok(file) + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn ntfs_temp_file_empty() { + let file = ntfs_temp_file(|_| Ok(())) + .unwrap(); + + let data_stream: keramics_core::DataStreamReference = { + std::sync::Arc::new(std::sync::RwLock::new(NamedTempFileWrapper(file))) + }; + + let mut ntfs = keramics_formats::ntfs::NtfsFileSystem::new(); + ntfs.read_data_stream(&data_stream) + .unwrap(); + + assert!(ntfs.get_root_directory().is_ok()); + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn ntfs_temp_file_files() { + let file = ntfs_temp_file(|path| { + std::fs::write(path.join("foo"), b"Lorem ipsum.") + .unwrap(); + std::fs::write(path.join("bar"), b"Dolor sit amet.") + .unwrap(); + + Ok(()) + }).unwrap(); + + let data_stream: keramics_core::DataStreamReference = { + std::sync::Arc::new(std::sync::RwLock::new(NamedTempFileWrapper(file))) + }; + + let mut ntfs = keramics_formats::ntfs::NtfsFileSystem::new(); + ntfs.read_data_stream(&data_stream) + .unwrap(); + + let mut entry_root = ntfs.get_root_directory() + .unwrap(); + + let entry_foo = entry_root.get_sub_file_entry_by_name(&keramics_types::Ucs2String::from("foo")) + .unwrap().unwrap(); + assert_eq!(entry_foo.get_size(), b"Lorem ipsum.".len() as u64); + + let entry_bar = entry_root.get_sub_file_entry_by_name(&keramics_types::Ucs2String::from("bar")) + .unwrap().unwrap(); + assert_eq!(entry_bar.get_size(), b"Dolor sit amet.".len() as u64); + } + + // TODO: Keramics defines its own `DataStream` type rather than using + // standard interfaces. Thus, we wrap `NamedTempFile` to provide our own + // implementation of it. + struct NamedTempFileWrapper(tempfile::NamedTempFile); + impl keramics_core::DataStream for NamedTempFileWrapper { + + fn get_size(&mut self) -> Result { + self.0.as_file_mut().get_size() + } + + fn read(&mut self, buf: &mut [u8]) -> Result { + self.0.as_file_mut().read(buf) + } + + fn seek(&mut self, pos: std::io::SeekFrom) -> Result { + self.0.as_file_mut().seek(pos) + } + } + + struct GuestMount { + mountpoint: std::path::PathBuf, + pid: Option, + is_mounted: bool, + } + + impl GuestMount { + + fn new(image: PI, mountpoint: PM) -> std::io::Result + where + PI: AsRef, + PM: AsRef, + { + // `guestmount` spawns a separate process to serve the files. When + // we call `guestunmount` to unmount, even though the call returns, + // the background process still flushes the file in the background. + // To only finish the unmount after everything is properly flushed, + // we wait until the background process is gone [1]. + // + // The only way to get the PID fo the background process seems to be + // through a "PID file" which is written by `guestmount`, so we use + // a temporary file for that. + // + // [1]: https://libguestfs.org/guestmount.1.html#race-conditions-possible-when-shutting-down-the-connection + let pid_file = tempfile::NamedTempFile::new()?; + + let output = std::process::Command::new("guestmount") + .arg("--add").arg(image.as_ref().as_os_str()) + .arg("--mount").arg("/dev/sda:/::ntfs") + .arg("--pid-file").arg(pid_file.path().as_os_str()) + .arg(mountpoint.as_ref().as_os_str()) + .output()?; + if !output.status.success() { + return Err(std::io::Error::new(std::io::ErrorKind::Other, format! { + "failed to run `guestmount` (stdout: {:?}, stderr: {:?})", + String::from_utf8_lossy(&output.stdout).as_ref(), + String::from_utf8_lossy(&output.stderr).as_ref(), + })) + } + + // At this point we successfully created the mount but we have not + // parsed the PID file yet which we mail fail to do so. But even if + // we cannot read the PID file, we should still clean the mount when + // returning an error. + // + // + // Thus we create a `GuestMount` instance here (without PID) an in + // case of an error, RAII will take care of running `guestunmount`. + let mut mount = GuestMount { + mountpoint: mountpoint.as_ref().to_path_buf(), + pid: None, + is_mounted: true, + }; + + let pid = || -> Result> { + let pid_string = String::from_utf8(std::fs::read(pid_file.path())?)?; + Ok(pid_string.trim().parse::()?) + }().map_err(|error| std::io::Error::new(std::io::ErrorKind::InvalidData, format! { + "invalid PID file contents: {error}" + }))?; + mount.pid = Some(pid); + + Ok(mount) + } + + fn unmount(mut self) -> std::io::Result<()> { + assert!(self.is_mounted); + // We set this bit even before the file is actually closed (which + // may fail and not actually close the device!). This is because in + // case closing fails, we don't want to allow closing again. we need + // this behaviour especially because of the `drop` method that is + // bound to run eventually, attempting to close again any unclosed + // device. + self.is_mounted = false; + + let output = std::process::Command::new("guestunmount") + .arg(self.mountpoint.as_os_str()) + .output()?; + if !output.status.success() { + return Err(std::io::Error::new(std::io::ErrorKind::Other, format! { + "failed to run `guestunmount` (stdout: {:?}, stderr: {:?})", + String::from_utf8_lossy(&output.stdout).as_ref(), + String::from_utf8_lossy(&output.stderr).as_ref(), + })) + } + + // See the constructor and [1] for more information about this PID. + // Note that might not have the PID available and still want to run + // the constructor (e.g. in case `guestmount` succeeded but parsing + // the PID file failed). + // + // We use procfs [2] to determine whether the background process is + // done. We do a bit of busy waiting here but this involves a system + // call, so we should not waste too much time. + // + // [1]: https://libguestfs.org/guestmount.1.html#race-conditions-possible-when-shutting-down-the-connection + // [2]: https://en.wikipedia.org/wiki/Procfs + if let Some(pid) = self.pid { + let pid_path = format!("/proc/{}", pid); + while std::fs::exists(&pid_path)? { + std::thread::yield_now(); + } + } + + Ok(()) + } + } + + impl Drop for GuestMount { + + fn drop(&mut self) { + if self.is_mounted { + // `unmount` takes an owned value, so we replace `self` with a + // dummy closed device (it being unmounted is important to avoid + // infinite recursion) and then call explicit close on obtained + // owned value. + let unmounted = GuestMount { + mountpoint: std::path::PathBuf::new(), + pid: None, + is_mounted: false, + }; + + std::mem::replace(self, unmounted).unmount() + .expect("failed to unmount"); + } + } + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn guest_mount_new_and_unmount() { + use std::io::Write as _; + + let mut image = tempfile::NamedTempFile::new() + .unwrap(); + // We initialize the file to have 2 MiB. Minimum size of NTFS image is + // 1 MiB, so we use 2 MiB just to be on the safe side. + image.write_all(&vec![0; 2 * 1024 * 1024]) + .unwrap(); + image.flush() + .unwrap(); + std::process::Command::new("mkfs.ntfs") + .arg("--force") + .arg(image.path()) + .output() + .unwrap(); + + let mountpoint = tempfile::tempdir() + .unwrap(); + + let mount = GuestMount::new(&image, &mountpoint) + .unwrap(); + + mount.unmount() + .unwrap(); + } + + #[cfg_attr(not(all(target_os = "linux", feature = "test-libguestfs")), ignore)] + #[test] + fn guest_mount_new_and_drop() { + use std::io::Write as _; + + let mut image = tempfile::NamedTempFile::new() + .unwrap(); + // We initialize the file to have 2 MiB. Minimum size of NTFS image is + // 1 MiB, so we use 2 MiB just to be on the safe side. + image.write_all(&vec![0; 2 * 1024 * 1024]) + .unwrap(); + image.flush() + .unwrap(); + std::process::Command::new("mkfs.ntfs") + .arg("--force") + .arg(image.path()) + .output() + .unwrap(); + + let mountpoint = tempfile::tempdir() + .unwrap(); + + let mount = GuestMount::new(&image, &mountpoint) + .unwrap(); + + drop(mount) + } +} diff --git a/crates/rrg/src/request.rs b/crates/rrg/src/request.rs index 38d7e767..47780f0b 100644 --- a/crates/rrg/src/request.rs +++ b/crates/rrg/src/request.rs @@ -17,6 +17,8 @@ pub enum Action { GetSystemMetadata, /// Get metadata about the specified file. GetFileMetadata, + /// Get metadata about the specified file using Keramics. + GetFileMetadataKmx, /// Get contents of the specified file. GetFileContents, /// Get contents of the specified file using Keramics. @@ -67,6 +69,7 @@ impl std::fmt::Display for Action { match *self { Action::GetSystemMetadata => write!(fmt, "get_system_metadata"), Action::GetFileMetadata => write!(fmt, "get_file_metadata"), + Action::GetFileMetadataKmx => write!(fmt, "get_file_metadata_kmx"), Action::GetFileContents => write!(fmt, "get_file_contents"), Action::GetFileContentsKmx => write!(fmt, "get_file_contents_kmx"), Action::GetFileSha256 => write!(fmt, "get_file_sha256"), @@ -122,6 +125,7 @@ impl TryFrom for Action { match proto { GET_SYSTEM_METADATA => Ok(Action::GetSystemMetadata), GET_FILE_METADATA => Ok(Action::GetFileMetadata), + GET_FILE_METADATA_KMX => Ok(Action::GetFileMetadataKmx), GET_FILE_CONTENTS => Ok(Action::GetFileContents), GET_FILE_CONTENTS_KMX => Ok(Action::GetFileContentsKmx), GET_FILE_SHA256 => Ok(Action::GetFileSha256), diff --git a/proto/rrg.proto b/proto/rrg.proto index 36704252..483254c0 100644 --- a/proto/rrg.proto +++ b/proto/rrg.proto @@ -60,6 +60,8 @@ enum Action { SCAN_PROCESS_MEMORY_YARA = 22; // Get contents of the specified file using Keramics. GET_FILE_CONTENTS_KMX = 23; + // Get metadata of the specified file using Keramics. + GET_FILE_METADATA_KMX = 24; // TODO: Define more actions that should be supported. diff --git a/proto/rrg/action/get_file_metadata_kmx.proto b/proto/rrg/action/get_file_metadata_kmx.proto new file mode 100644 index 00000000..e084ab68 --- /dev/null +++ b/proto/rrg/action/get_file_metadata_kmx.proto @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Use of this source code is governed by an MIT-style license that can be found +// in the LICENSE file or at https://opensource.org/licenses/MIT. +syntax = "proto3"; + +package rrg.action.get_file_metadata_kmx; + +import "rrg/fs.proto"; + +message Args { + // Root path to the file to get the metadata for. + // + // Note that if a path points to a symbolic link, the metadata associated + // with the link itself will be returned, not the metadata of the file that + // the link points to. + rrg.fs.Path path = 1; // TODO: Add support for multiple paths. + + // TODO: Add the remaining fields that original `get_file_metadata` supports. +} + +message Result { + // Path to the file. + // + // This is the original root path of the file as specified in the arguments, + // possibly with some suffix in case of child files. + rrg.fs.Path path = 1; + + // Metadata of the file. + rrg.fs.FileMetadata metadata = 2; + + // TODO: Add the remaining fields that original `get_file_metadata` supports. +}