From d7a7f152f339a82a9065e46cf726186defec9907 Mon Sep 17 00:00:00 2001 From: nokyan Date: Sat, 14 Dec 2024 19:19:38 +0100 Subject: [PATCH 1/8] Add support for AMD NPUs --- lib/process_data/src/lib.rs | 218 +++++++++++++++++++++++++++--------- src/ui/window.rs | 21 +++- src/utils/app.rs | 47 ++++++++ src/utils/npu/amd.rs | 98 ++++++++++++++++ src/utils/npu/intel.rs | 8 +- src/utils/npu/mod.rs | 57 +++++++--- src/utils/npu/other.rs | 8 +- src/utils/process.rs | 36 +++++- 8 files changed, 414 insertions(+), 79 deletions(-) create mode 100644 src/utils/npu/amd.rs diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 4bc932f..74f9cf8 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -46,11 +46,17 @@ static RE_IO_READ: Lazy = lazy_regex!(r"read_bytes:\s*(\d+)"); static RE_IO_WRITE: Lazy = lazy_regex!(r"write_bytes:\s*(\d+)"); +static RE_DRM_DRIVER: Lazy = lazy_regex!(r"drm-driver:\s*(.+)"); + static RE_DRM_PDEV: Lazy = lazy_regex!(r"drm-pdev:\s*([0-9A-Fa-f]{4}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}\.[0-9A-Fa-f])"); static RE_DRM_CLIENT_ID: Lazy = lazy_regex!(r"drm-client-id:\s*(\d+)"); +// AMD only +static RE_DRM_ENGINE_NPU_AMDXDNA: Lazy = + lazy_regex!(r"drm-engine-npu-amdxdna:\s*(\d+)\s*ns"); + // AMD only static RE_DRM_ENGINE_GFX: Lazy = lazy_regex!(r"drm-engine-gfx:\s*(\d+)\s*ns"); @@ -75,6 +81,8 @@ static RE_DRM_ENGINE_RENDER: Lazy = lazy_regex!(r"drm-engine-render:\s*(\ // Intel only static RE_DRM_ENGINE_VIDEO: Lazy = lazy_regex!(r"drm-engine-video:\s*(\d+)\s*ns"); +static RE_DRM_TOTAL_MEMORY: Lazy = lazy_regex!(r"drm-total-memory:\s*(\d+)\s*KiB"); + static NVML: Lazy> = Lazy::new(Nvml::init); static NVML_DEVICES: Lazy> = Lazy::new(|| { @@ -135,6 +143,13 @@ pub struct GpuUsageStats { pub nvidia: bool, } +/// Represents NPU usage statistics per-process. +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, Copy)] +pub struct NpuUsageStats { + pub usage: u64, + pub mem: u64, +} + /// Data that could be transferred using `resources-processes`, separated from /// `Process` mainly due to `Icon` not being able to derive `Serialize` and /// `Deserialize`. @@ -159,6 +174,7 @@ pub struct ProcessData { pub timestamp: u64, /// Key: PCI Slot ID of the GPU pub gpu_usage_stats: BTreeMap, + pub npu_usage_stats: BTreeMap, } impl ProcessData { @@ -365,6 +381,8 @@ impl ProcessData { let gpu_usage_stats = Self::gpu_usage_stats(proc_path, pid); + let npu_usage_stats = Self::npu_usage_stats(proc_path, pid).unwrap_or_default(); + let timestamp = unix_as_millis(); Ok(Self { @@ -386,6 +404,7 @@ impl ProcessData { write_bytes, timestamp, gpu_usage_stats, + npu_usage_stats, }) } @@ -396,6 +415,68 @@ impl ProcessData { other_stats } + /// Returns the fd_num and the plausibility of whether this file might contain drm fdinfo data. + /// This function is cautious and will signal plausibility if there's an error during evaluation. + fn drm_fdinfo_plausible>( + fdinfo_path: P, + pid: libc::pid_t, + seen_fds: &HashSet, + ) -> (bool, usize) { + let fdinfo_path = fdinfo_path.as_ref(); + + // if our fd is 0, 1 or 2 it's probably just a std stream so skip it + let fd_num = fdinfo_path + .file_name() + .and_then(|osstr| osstr.to_str()) + .unwrap_or("0") + .parse::() + .unwrap_or(0); + if fd_num <= 2 { + return (false, fd_num); + } + + let _file = std::fs::File::open(&fdinfo_path); + if _file.is_err() { + return (true, fd_num); + } + let file = _file.unwrap(); + + let _metadata = file.metadata(); + if _metadata.is_err() { + return (true, fd_num); + } + let metadata = _metadata.unwrap(); + + if !metadata.is_file() { + return (false, fd_num); + } + + // Adapted from nvtop's `is_drm_fd()` + // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c + let fd_path = fdinfo_path.to_str().map(|s| s.replace("fdinfo", "fd")); + if let Some(fd_path) = fd_path { + if let Ok(fd_metadata) = std::fs::metadata(fd_path) { + let major = unsafe { libc::major(fd_metadata.st_rdev()) }; + if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { + return (false, fd_num); + } + } + } + + // Adapted from nvtop's `processinfo_sweep_fdinfos()` + // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c + // if we've already seen the file this fd refers to, skip + let not_unique = seen_fds.iter().any(|seen_fd| unsafe { + syscalls::syscall!(syscalls::Sysno::kcmp, pid, pid, 0, fd_num, *seen_fd).unwrap_or(0) + == 0 + }); + if not_unique { + return (false, fd_num); + } + + (true, fd_num) + } + fn other_gpu_usage_stats( proc_path: &Path, pid: i32, @@ -409,60 +490,14 @@ impl ProcessData { let entry = entry?; let fdinfo_path = entry.path(); - let _file = std::fs::File::open(&fdinfo_path); - if _file.is_err() { - continue; - } - let mut file = _file.unwrap(); - - let _metadata = file.metadata(); - if _metadata.is_err() { - continue; - } - let metadata = _metadata.unwrap(); - - // if our fd is 0, 1 or 2 it's probably just a std stream so skip it - let fd_num = fdinfo_path - .file_name() - .and_then(|osstr| osstr.to_str()) - .unwrap_or("0") - .parse::() - .unwrap_or(0); - if fd_num <= 2 { - continue; - } - - if !metadata.is_file() { - continue; - } - - // Adapted from nvtop's `is_drm_fd()` - // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c - let fd_path = fdinfo_path.to_str().map(|s| s.replace("fdinfo", "fd")); - if let Some(fd_path) = fd_path { - if let Ok(fd_metadata) = std::fs::metadata(fd_path) { - let major = unsafe { libc::major(fd_metadata.st_rdev()) }; - if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { - continue; - } - } - } - - // Adapted from nvtop's `processinfo_sweep_fdinfos()` - // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c - // if we've already seen the file this fd refers to, skip - let not_unique = seen_fds.iter().any(|seen_fd| unsafe { - syscalls::syscall!(syscalls::Sysno::kcmp, pid, pid, 0, fd_num, *seen_fd) - .unwrap_or(0) - == 0 - }); - if not_unique { + let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); + if !plausible { continue; } seen_fds.insert(fd_num); - if let Ok(stats) = Self::read_fdinfo(&mut file, metadata.len() as usize) { + if let Ok(stats) = Self::read_gpu_fdinfo(&fdinfo_path) { return_map .entry(stats.0) .and_modify(|existing_value: &mut GpuUsageStats| { @@ -486,11 +521,86 @@ impl ProcessData { Ok(return_map) } - fn read_fdinfo( - fdinfo_file: &mut File, - file_size: usize, - ) -> Result<(PciSlot, GpuUsageStats, i64)> { - let mut content = String::with_capacity(file_size); + fn npu_usage_stats(proc_path: &Path, pid: i32) -> Result> { + let fdinfo_dir = proc_path.join("fdinfo"); + + let mut seen_fds = HashSet::new(); + + let mut return_map = BTreeMap::new(); + for entry in std::fs::read_dir(fdinfo_dir)? { + let entry = entry?; + let fdinfo_path = entry.path(); + + let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); + if !plausible { + continue; + } + + seen_fds.insert(fd_num); + + if let Ok((pci_slot, stats)) = Self::read_npu_fdinfo(&fdinfo_path) { + return_map + .entry(pci_slot) + .and_modify(|existing_value: &mut NpuUsageStats| { + if stats.usage > existing_value.usage { + existing_value.usage = stats.usage; + } + if stats.mem > existing_value.mem { + existing_value.mem = stats.mem; + } + }) + .or_insert(stats); + } + } + + Ok(return_map) + } + + fn read_npu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, NpuUsageStats)> { + let mut content = String::new(); + let mut fdinfo_file = File::open(fdinfo_path.as_ref())?; + fdinfo_file.read_to_string(&mut content)?; + fdinfo_file.flush()?; + + let driver = RE_DRM_DRIVER + .captures(&content) + .and_then(|captures| captures.get(1)) + .map(|capture| capture.as_str()); + + if driver.is_some() { + let pci_slot = RE_DRM_PDEV + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) + .unwrap_or_default(); + + let usage = RE_DRM_ENGINE_NPU_AMDXDNA + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let total_memory = RE_DRM_TOTAL_MEMORY + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); + + let stats = NpuUsageStats { + usage, + mem: total_memory, + }; + + return Ok((pci_slot, stats)); + } + + bail!("unable to find gpu information in this fdinfo"); + } + + fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, GpuUsageStats, i64)> { + let mut content = String::new(); + let mut fdinfo_file = File::open(fdinfo_path.as_ref())?; fdinfo_file.read_to_string(&mut content)?; fdinfo_file.flush()?; diff --git a/src/ui/window.rs b/src/ui/window.rs index 8e7862a..89b7976 100644 --- a/src/ui/window.rs +++ b/src/ui/window.rs @@ -578,17 +578,32 @@ impl MainWindow { page.refresh_page(&gpu_data); } - std::mem::drop(apps_context); - /* * Npu */ let npu_pages = imp.npu_pages.borrow(); - for ((_, page), npu_data) in npu_pages.values().zip(npu_data) { + for ((_, page), mut npu_data) in npu_pages.values().zip(npu_data) { let page = page.content().and_downcast::().unwrap(); + + let processes_npu_fraction = apps_context.npu_fraction(npu_data.pci_slot); + npu_data.usage_fraction = Some(f64::max( + npu_data.usage_fraction.unwrap_or(0.0), + processes_npu_fraction.into(), + )); + + if npu_data.total_memory.is_some() { + let processes_npu_memory_fraction = apps_context.npu_mem(npu_data.pci_slot); + npu_data.used_memory = Some(usize::max( + npu_data.used_memory.unwrap_or(0), + processes_npu_memory_fraction as usize, + )); + } + page.refresh_page(&npu_data); } + std::mem::drop(apps_context); + /* * Cpu */ diff --git a/src/utils/app.rs b/src/utils/app.rs index 3e684a9..ad1d6fb 100644 --- a/src/utils/app.rs +++ b/src/utils/app.rs @@ -638,6 +638,52 @@ impl AppsContext { .clamp(0.0, 1.0) } + pub fn npu_fraction(&self, pci_slot: PciSlot) -> f32 { + self.processes_iter() + .map(|process| { + ( + &process.data.npu_usage_stats, + &process.npu_usage_stats_last, + process.data.timestamp, + process.timestamp_last, + ) + }) + .map(|(new, old, timestamp, timestamp_last)| { + ( + new.get(&pci_slot), + old.get(&pci_slot), + timestamp, + timestamp_last, + ) + }) + .filter_map(|(new, old, timestamp, timestamp_last)| match (new, old) { + (Some(new), Some(old)) => Some((new, old, timestamp, timestamp_last)), + _ => None, + }) + .map(|(new, old, timestamp, timestamp_last)| { + if old.usage == 0 { + 0.0 + } else { + ((new.usage.saturating_sub(old.usage) as f32) + / (timestamp.saturating_sub(timestamp_last) as f32)) + .finite_or_default() + / 1_000_000.0 + } + }) + .sum::() + .clamp(0.0, 1.0) + } + + pub fn npu_mem(&self, pci_slot: PciSlot) -> u64 { + self.processes_iter() + .map(|process| process.data.npu_usage_stats.get(&pci_slot)) + .map(|stats| match stats { + Some(stats) => stats.mem, + None => 0, + }) + .sum() + } + fn app_associated_with_process(&self, process: &Process) -> Option { // TODO: tidy this up // ↓ look for whether we can find an ID in the cgroup @@ -787,6 +833,7 @@ impl AppsContext { old_process.read_bytes_last = old_process.data.read_bytes; old_process.write_bytes_last = old_process.data.write_bytes; old_process.gpu_usage_stats_last = old_process.data.gpu_usage_stats.clone(); + old_process.npu_usage_stats_last = old_process.data.npu_usage_stats.clone(); old_process.data = process_data.clone(); } else { diff --git a/src/utils/npu/amd.rs b/src/utils/npu/amd.rs new file mode 100644 index 0000000..8eba9c9 --- /dev/null +++ b/src/utils/npu/amd.rs @@ -0,0 +1,98 @@ +use anyhow::Result; +use process_data::pci_slot::PciSlot; + +use std::path::PathBuf; + +use crate::utils::pci::Device; + +use super::NpuImpl; + +#[derive(Debug, Clone, Default)] + +pub struct AmdNpu { + pub device: Option<&'static Device>, + pub pci_slot: PciSlot, + pub driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, +} + +impl AmdNpu { + pub fn new( + device: Option<&'static Device>, + pci_slot: PciSlot, + driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, + ) -> Self { + Self { + device, + pci_slot, + driver, + sysfs_path, + first_hwmon_path, + } + } +} + +impl NpuImpl for AmdNpu { + fn device(&self) -> Option<&'static Device> { + self.device + } + + fn pci_slot(&self) -> PciSlot { + self.pci_slot + } + + fn driver(&self) -> String { + self.driver.clone() + } + + fn sysfs_path(&self) -> PathBuf { + self.sysfs_path.clone() + } + + fn first_hwmon(&self) -> Option { + self.first_hwmon_path.clone() + } + + fn name(&self) -> Result { + self.drm_name() + } + + fn usage(&self) -> Result { + self.drm_usage().map(|usage| usage as f64 / 100.0) + } + + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) + } + + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) + } + + fn temperature(&self) -> Result { + self.hwmon_temperature() + } + + fn power_usage(&self) -> Result { + self.hwmon_power_usage() + } + + fn core_frequency(&self) -> Result { + self.hwmon_core_frequency() + } + + fn memory_frequency(&self) -> Result { + self.hwmon_vram_frequency() + } + + fn power_cap(&self) -> Result { + self.hwmon_power_cap() + } + + fn power_cap_max(&self) -> Result { + self.hwmon_power_cap_max() + } +} diff --git a/src/utils/npu/intel.rs b/src/utils/npu/intel.rs index bf9990a..62029c0 100644 --- a/src/utils/npu/intel.rs +++ b/src/utils/npu/intel.rs @@ -82,12 +82,12 @@ impl NpuImpl for IntelNpu { Ok((delta_busy_time / delta_timestamp) / 1000.0) } - fn used_vram(&self) -> Result { - self.drm_used_vram().map(|usage| usage as usize) + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { - self.drm_total_vram().map(|usage| usage as usize) + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) } fn temperature(&self) -> Result { diff --git a/src/utils/npu/mod.rs b/src/utils/npu/mod.rs index 97e80f5..d2c6339 100644 --- a/src/utils/npu/mod.rs +++ b/src/utils/npu/mod.rs @@ -1,6 +1,8 @@ +mod amd; mod intel; mod other; +use amd::AmdNpu; use anyhow::{bail, Context, Result}; use log::{debug, info}; use process_data::pci_slot::PciSlot; @@ -21,6 +23,7 @@ use self::{intel::IntelNpu, other::OtherNpu}; use super::pci::Vendor; +pub const VID_AMD: u16 = 4098; pub const VID_INTEL: u16 = 0x8086; #[derive(Debug)] @@ -48,8 +51,8 @@ impl NpuData { let usage_fraction = npu.usage().ok(); - let total_memory = npu.total_vram().ok(); - let used_memory = npu.used_vram().ok(); + let total_memory = npu.total_memory().ok(); + let used_memory = npu.used_memory().ok(); let clock_speed = npu.core_frequency().ok(); let vram_speed = npu.memory_frequency().ok(); @@ -77,6 +80,7 @@ impl NpuData { #[derive(Debug, Clone)] pub enum Npu { + Amd(AmdNpu), Intel(IntelNpu), Other(OtherNpu), } @@ -96,8 +100,8 @@ pub trait NpuImpl { fn name(&self) -> Result; fn usage(&self) -> Result; - fn used_vram(&self) -> Result; - fn total_vram(&self) -> Result; + fn used_memory(&self) -> Result; + fn total_memory(&self) -> Result; fn temperature(&self) -> Result; fn power_usage(&self) -> Result; fn core_frequency(&self) -> Result; @@ -141,14 +145,17 @@ pub trait NpuImpl { } fn drm_usage(&self) -> Result { - bail!("usage fallback not implemented") + // This is purely a guess for the future since no NPU driver has actually implemented this statistic + self.read_device_int("npu_busy_percent") } - fn drm_used_vram(&self) -> Result { + fn drm_used_memory(&self) -> Result { + // This is purely a guess for the future since no NPU driver has actually implemented this statistic self.read_device_int("mem_info_vram_used") } - fn drm_total_vram(&self) -> Result { + fn drm_total_memory(&self) -> Result { + // This is purely a guess for the future since no NPU driver has actually implemented this statistic self.read_device_int("mem_info_vram_total") } @@ -256,6 +263,17 @@ impl Npu { )), "Intel", ) + } else if vid == VID_AMD || driver == "amdxdna" { + ( + Npu::Amd(AmdNpu::new( + device, + pci_slot, + driver, + path, + hwmon_vec.first().cloned(), + )), + "AMD", + ) } else { ( Npu::Other(OtherNpu::new( @@ -280,6 +298,7 @@ impl Npu { pub fn get_vendor(&self) -> Result<&'static Vendor> { Ok(match self { + Npu::Amd(npu) => npu.device(), Npu::Intel(npu) => npu.device(), Npu::Other(npu) => npu.device(), } @@ -289,6 +308,7 @@ impl Npu { pub fn pci_slot(&self) -> PciSlot { match self { + Npu::Amd(npu) => npu.pci_slot(), Npu::Intel(npu) => npu.pci_slot(), Npu::Other(npu) => npu.pci_slot(), } @@ -296,6 +316,7 @@ impl Npu { pub fn driver(&self) -> String { match self { + Npu::Amd(npu) => npu.driver(), Npu::Intel(npu) => npu.driver(), Npu::Other(npu) => npu.driver(), } @@ -303,6 +324,7 @@ impl Npu { pub fn name(&self) -> Result { match self { + Npu::Amd(npu) => npu.name(), Npu::Intel(npu) => npu.name(), Npu::Other(npu) => npu.name(), } @@ -310,27 +332,31 @@ impl Npu { pub fn usage(&self) -> Result { match self { + Npu::Amd(npu) => npu.usage(), Npu::Intel(npu) => npu.usage(), Npu::Other(npu) => npu.usage(), } } - pub fn used_vram(&self) -> Result { + pub fn used_memory(&self) -> Result { match self { - Npu::Intel(npu) => npu.used_vram(), - Npu::Other(npu) => npu.used_vram(), + Npu::Amd(npu) => npu.used_memory(), + Npu::Intel(npu) => npu.used_memory(), + Npu::Other(npu) => npu.used_memory(), } } - pub fn total_vram(&self) -> Result { + pub fn total_memory(&self) -> Result { match self { - Npu::Intel(npu) => npu.total_vram(), - Npu::Other(npu) => npu.total_vram(), + Npu::Amd(npu) => npu.total_memory(), + Npu::Intel(npu) => npu.total_memory(), + Npu::Other(npu) => npu.total_memory(), } } pub fn temperature(&self) -> Result { match self { + Npu::Amd(npu) => npu.temperature(), Npu::Intel(npu) => npu.temperature(), Npu::Other(npu) => npu.temperature(), } @@ -338,6 +364,7 @@ impl Npu { pub fn power_usage(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_usage(), Npu::Intel(npu) => npu.power_usage(), Npu::Other(npu) => npu.power_usage(), } @@ -345,6 +372,7 @@ impl Npu { pub fn core_frequency(&self) -> Result { match self { + Npu::Amd(npu) => npu.core_frequency(), Npu::Intel(npu) => npu.core_frequency(), Npu::Other(npu) => npu.core_frequency(), } @@ -352,6 +380,7 @@ impl Npu { pub fn memory_frequency(&self) -> Result { match self { + Npu::Amd(npu) => npu.memory_frequency(), Npu::Intel(npu) => npu.memory_frequency(), Npu::Other(npu) => npu.memory_frequency(), } @@ -359,6 +388,7 @@ impl Npu { pub fn power_cap(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_cap(), Npu::Intel(npu) => npu.power_cap(), Npu::Other(npu) => npu.power_cap(), } @@ -366,6 +396,7 @@ impl Npu { pub fn power_cap_max(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_cap(), Npu::Intel(npu) => npu.power_cap_max(), Npu::Other(npu) => npu.power_cap_max(), } diff --git a/src/utils/npu/other.rs b/src/utils/npu/other.rs index 2104e30..8f85185 100644 --- a/src/utils/npu/other.rs +++ b/src/utils/npu/other.rs @@ -64,12 +64,12 @@ impl NpuImpl for OtherNpu { self.drm_usage().map(|usage| usage as f64 / 100.0) } - fn used_vram(&self) -> Result { - self.drm_used_vram().map(|usage| usage as usize) + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { - self.drm_total_vram().map(|usage| usage as usize) + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) } fn temperature(&self) -> Result { diff --git a/src/utils/process.rs b/src/utils/process.rs index fa24ac0..0680f71 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -1,7 +1,7 @@ use anyhow::{bail, Context, Result}; use config::LIBEXECDIR; use log::{debug, error, info}; -use process_data::{pci_slot::PciSlot, GpuUsageStats, Niceness, ProcessData}; +use process_data::{pci_slot::PciSlot, GpuUsageStats, Niceness, NpuUsageStats, ProcessData}; use std::{ collections::BTreeMap, ffi::{OsStr, OsString}, @@ -69,6 +69,7 @@ pub struct Process { pub read_bytes_last: Option, pub write_bytes_last: Option, pub gpu_usage_stats_last: BTreeMap, + pub npu_usage_stats_last: BTreeMap, pub display_name: String, } @@ -152,6 +153,7 @@ impl Process { read_bytes_last, write_bytes_last, gpu_usage_stats_last: Default::default(), + npu_usage_stats_last: Default::default(), display_name, } } @@ -456,6 +458,38 @@ impl Process { .sum() } + #[must_use] + pub fn npu_usage(&self) -> f32 { + let mut returned_npu_usage = 0.0; + for (npu, usage) in &self.data.npu_usage_stats { + if let Some(old_usage) = self.npu_usage_stats_last.get(npu) { + let this_npu_usage = if old_usage.usage == 0 { + 0.0 + } else { + ((usage.usage.saturating_sub(old_usage.usage) as f32) + / (self.data.timestamp.saturating_sub(self.timestamp_last) as f32) + .finite_or_default()) + / 1_000_000.0 + }; + + if this_npu_usage > returned_npu_usage { + returned_npu_usage = this_npu_usage; + } + } + } + + returned_npu_usage + } + + #[must_use] + pub fn npu_mem_usage(&self) -> u64 { + self.data + .npu_usage_stats + .values() + .map(|stats| stats.mem) + .sum() + } + #[must_use] pub fn starttime(&self) -> f64 { self.data.starttime as f64 / *TICK_RATE as f64 From 40fe09b2c60d38098d635c7a884d09a18f77d08c Mon Sep 17 00:00:00 2001 From: nokyan Date: Sat, 14 Dec 2024 22:34:56 +0100 Subject: [PATCH 2/8] Use driver name to differentiate between NPU and GPU usage stats --- lib/process_data/src/lib.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 74f9cf8..6a97053 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -26,6 +26,9 @@ const STAT_SYSTEM_CPU_TIME: usize = 14 - STAT_OFFSET; const STAT_NICE: usize = 18 - STAT_OFFSET; const STAT_STARTTIME: usize = 21 - STAT_OFFSET; +const GPU_DRIVER_NAMES: &[&str] = &["amdgpu", "i915"]; +const NPU_DRIVER_NAMES: &[&str] = &["amdxdna_accel_driver"]; + static USERS_CACHE: LazyLock> = LazyLock::new(|| unsafe { uzers::all_users() .map(|user| (user.uid(), user.name().to_string_lossy().to_string())) @@ -567,7 +570,11 @@ impl ProcessData { .and_then(|captures| captures.get(1)) .map(|capture| capture.as_str()); - if driver.is_some() { + if let Some(driver) = driver { + if !NPU_DRIVER_NAMES.contains(&driver) { + bail!("this is not an NPU") + } + let pci_slot = RE_DRM_PDEV .captures(&content) .and_then(|captures| captures.get(1)) @@ -614,7 +621,16 @@ impl ProcessData { .and_then(|captures| captures.get(1)) .and_then(|capture| capture.as_str().parse::().ok()); + let driver = RE_DRM_DRIVER + .captures(&content) + .and_then(|captures| captures.get(1)) + .map(|capture| capture.as_str()); + if let (Some(pci_slot), Some(client_id)) = (pci_slot, client_id) { + if !GPU_DRIVER_NAMES.contains(&driver.unwrap_or_default()) { + bail!("this is not a GPU"); + } + let gfx = RE_DRM_ENGINE_GFX // amd .captures(&content) .and_then(|captures| captures.get(1)) From 6935f679ee003aa2aad287027064da9b04bfe778 Mon Sep 17 00:00:00 2001 From: nokyan Date: Sun, 15 Dec 2024 08:08:44 +0100 Subject: [PATCH 3/8] Small cleanups --- lib/process_data/src/lib.rs | 169 ++++++++++++++++-------------------- 1 file changed, 76 insertions(+), 93 deletions(-) diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 6a97053..e9e0ccc 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -11,8 +11,6 @@ use nvml_wrapper::{Device, Nvml}; use pci_slot::PciSlot; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, HashMap, HashSet}; -use std::fs::File; -use std::io::{Read, Write}; use std::os::linux::fs::MetadataExt; use std::path::Path; use std::str::FromStr; @@ -54,8 +52,6 @@ static RE_DRM_DRIVER: Lazy = lazy_regex!(r"drm-driver:\s*(.+)"); static RE_DRM_PDEV: Lazy = lazy_regex!(r"drm-pdev:\s*([0-9A-Fa-f]{4}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}\.[0-9A-Fa-f])"); -static RE_DRM_CLIENT_ID: Lazy = lazy_regex!(r"drm-client-id:\s*(\d+)"); - // AMD only static RE_DRM_ENGINE_NPU_AMDXDNA: Lazy = lazy_regex!(r"drm-engine-npu-amdxdna:\s*(\d+)\s*ns"); @@ -500,24 +496,24 @@ impl ProcessData { seen_fds.insert(fd_num); - if let Ok(stats) = Self::read_gpu_fdinfo(&fdinfo_path) { + if let Ok((pci_slot, stats)) = Self::read_gpu_fdinfo(&fdinfo_path) { return_map - .entry(stats.0) + .entry(pci_slot) .and_modify(|existing_value: &mut GpuUsageStats| { - if stats.1.gfx > existing_value.gfx { - existing_value.gfx = stats.1.gfx; + if stats.gfx > existing_value.gfx { + existing_value.gfx = stats.gfx; } - if stats.1.dec > existing_value.dec { - existing_value.dec = stats.1.dec; + if stats.dec > existing_value.dec { + existing_value.dec = stats.dec; } - if stats.1.enc > existing_value.enc { - existing_value.enc = stats.1.enc; + if stats.enc > existing_value.enc { + existing_value.enc = stats.enc; } - if stats.1.mem > existing_value.mem { - existing_value.mem = stats.1.mem; + if stats.mem > existing_value.mem { + existing_value.mem = stats.mem; } }) - .or_insert(stats.1); + .or_insert(stats); } } @@ -560,10 +556,7 @@ impl ProcessData { } fn read_npu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, NpuUsageStats)> { - let mut content = String::new(); - let mut fdinfo_file = File::open(fdinfo_path.as_ref())?; - fdinfo_file.read_to_string(&mut content)?; - fdinfo_file.flush()?; + let content = std::fs::read_to_string(fdinfo_path.as_ref())?; let driver = RE_DRM_DRIVER .captures(&content) @@ -605,96 +598,86 @@ impl ProcessData { bail!("unable to find gpu information in this fdinfo"); } - fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, GpuUsageStats, i64)> { - let mut content = String::new(); - let mut fdinfo_file = File::open(fdinfo_path.as_ref())?; - fdinfo_file.read_to_string(&mut content)?; - fdinfo_file.flush()?; + fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, GpuUsageStats)> { + let content = std::fs::read_to_string(fdinfo_path.as_ref())?; let pci_slot = RE_DRM_PDEV .captures(&content) .and_then(|captures| captures.get(1)) - .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()); - - let client_id = RE_DRM_CLIENT_ID - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()); + .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) + .context("can't parse PCI slot ID")?; let driver = RE_DRM_DRIVER .captures(&content) .and_then(|captures| captures.get(1)) - .map(|capture| capture.as_str()); - - if let (Some(pci_slot), Some(client_id)) = (pci_slot, client_id) { - if !GPU_DRIVER_NAMES.contains(&driver.unwrap_or_default()) { - bail!("this is not a GPU"); - } - - let gfx = RE_DRM_ENGINE_GFX // amd - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .or_else(|| { - // intel - RE_DRM_ENGINE_RENDER - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - }) - .unwrap_or_default(); + .map(|capture| capture.as_str()) + .unwrap_or_default(); - let compute = RE_DRM_ENGINE_COMPUTE - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + if !GPU_DRIVER_NAMES.contains(&driver) { + bail!("this is not a GPU"); + } - let enc = RE_DRM_ENGINE_ENC // amd - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .or_else(|| { - // intel - RE_DRM_ENGINE_VIDEO - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - }) - .unwrap_or_default(); + let gfx = RE_DRM_ENGINE_GFX // amd + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .or_else(|| { + // intel + RE_DRM_ENGINE_RENDER + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + }) + .unwrap_or_default(); - let dec = RE_DRM_ENGINE_DEC - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + let compute = RE_DRM_ENGINE_COMPUTE + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); - let vram = RE_DRM_MEMORY_VRAM - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); + let enc = RE_DRM_ENGINE_ENC // amd + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .or_else(|| { + // intel + RE_DRM_ENGINE_VIDEO + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + }) + .unwrap_or_default(); - let gtt = RE_DRM_MEMORY_GTT - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); + let dec = RE_DRM_ENGINE_DEC + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); - let stats = GpuUsageStats { - gfx: gfx.saturating_add(compute), - mem: vram.saturating_add(gtt), - enc, - dec, - nvidia: false, - }; + let vram = RE_DRM_MEMORY_VRAM + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); - return Ok((pci_slot, stats, client_id)); - } + let gtt = RE_DRM_MEMORY_GTT + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); + + let stats = GpuUsageStats { + gfx: gfx.saturating_add(compute), + mem: vram.saturating_add(gtt), + enc, + dec, + nvidia: false, + }; - bail!("unable to find gpu information in this fdinfo"); + return Ok((pci_slot, stats)); } fn nvidia_gpu_stats_all(pid: i32) -> BTreeMap { From 1514eee49ab5dbdd38f62a264070b1d8623c214a Mon Sep 17 00:00:00 2001 From: nokyan Date: Sun, 15 Dec 2024 08:20:49 +0100 Subject: [PATCH 4/8] Don't use '?' in for loops --- lib/process_data/src/lib.rs | 63 +++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index e9e0ccc..506a359 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -485,8 +485,7 @@ impl ProcessData { let mut seen_fds = HashSet::new(); let mut return_map = BTreeMap::new(); - for entry in std::fs::read_dir(fdinfo_dir)? { - let entry = entry?; + for entry in std::fs::read_dir(fdinfo_dir)?.flatten() { let fdinfo_path = entry.path(); let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); @@ -526,8 +525,7 @@ impl ProcessData { let mut seen_fds = HashSet::new(); let mut return_map = BTreeMap::new(); - for entry in std::fs::read_dir(fdinfo_dir)? { - let entry = entry?; + for entry in std::fs::read_dir(fdinfo_dir)?.flatten() { let fdinfo_path = entry.path(); let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); @@ -558,44 +556,41 @@ impl ProcessData { fn read_npu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, NpuUsageStats)> { let content = std::fs::read_to_string(fdinfo_path.as_ref())?; - let driver = RE_DRM_DRIVER + let pci_slot = RE_DRM_PDEV .captures(&content) .and_then(|captures| captures.get(1)) - .map(|capture| capture.as_str()); - - if let Some(driver) = driver { - if !NPU_DRIVER_NAMES.contains(&driver) { - bail!("this is not an NPU") - } + .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) + .context("can't parse PCI slot ID")?; - let pci_slot = RE_DRM_PDEV - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) - .unwrap_or_default(); + let driver = RE_DRM_DRIVER + .captures(&content) + .and_then(|captures| captures.get(1)) + .map(|capture| capture.as_str()) + .unwrap_or_default(); - let usage = RE_DRM_ENGINE_NPU_AMDXDNA - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + if !NPU_DRIVER_NAMES.contains(&driver) { + bail!("this is not an NPU") + } - let total_memory = RE_DRM_TOTAL_MEMORY - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); + let usage = RE_DRM_ENGINE_NPU_AMDXDNA + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); - let stats = NpuUsageStats { - usage, - mem: total_memory, - }; + let total_memory = RE_DRM_TOTAL_MEMORY + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); - return Ok((pci_slot, stats)); - } + let stats = NpuUsageStats { + usage, + mem: total_memory, + }; - bail!("unable to find gpu information in this fdinfo"); + return Ok((pci_slot, stats)); } fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, GpuUsageStats)> { From 1d9b04e8859cedf2c24880ea76632353a04706cd Mon Sep 17 00:00:00 2001 From: nokyan Date: Sun, 15 Dec 2024 09:58:32 +0100 Subject: [PATCH 5/8] Add debug and trace prints for resources-processes --- Cargo.lock | 2 + lib/process_data/Cargo.toml | 2 + lib/process_data/src/lib.rs | 117 +++++++++++++++++++++++++++------ src/bin/resources-processes.rs | 11 ++++ src/utils/gpu/nvidia.rs | 12 +--- src/utils/process.rs | 17 ++++- 6 files changed, 128 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e946152..8f347de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1278,9 +1278,11 @@ dependencies = [ "glob", "lazy-regex", "libc", + "log", "num_cpus", "nutype", "nvml-wrapper", + "pretty_env_logger", "serde", "syscalls", "sysconf", diff --git a/lib/process_data/Cargo.toml b/lib/process_data/Cargo.toml index 52489eb..9de3bcc 100644 --- a/lib/process_data/Cargo.toml +++ b/lib/process_data/Cargo.toml @@ -21,9 +21,11 @@ anyhow = "1.0.94" glob = "0.3.1" lazy-regex = "3.3.0" libc = "0.2.167" +log = "0.4.22" num_cpus = "1.16.0" nutype = { version = "0.5.0", features = ["serde"] } nvml-wrapper = "0.10.0" +pretty_env_logger = "0.5" serde = { version = "1.0.215", features = ["serde_derive"] } syscalls = { version = "0.6.18", features = ["all"] } sysconf = "0.3.4" diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 506a359..e2ffbf9 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -3,6 +3,7 @@ pub mod pci_slot; use anyhow::{bail, Context, Result}; use glob::glob; use lazy_regex::{lazy_regex, Lazy, Regex}; +use log::{debug, trace, warn}; use nutype::nutype; use nvml_wrapper::enums::device::UsedGpuMemory; use nvml_wrapper::error::NvmlError; @@ -27,10 +28,18 @@ const STAT_STARTTIME: usize = 21 - STAT_OFFSET; const GPU_DRIVER_NAMES: &[&str] = &["amdgpu", "i915"]; const NPU_DRIVER_NAMES: &[&str] = &["amdxdna_accel_driver"]; +const MAJOR: u32 = 226; + static USERS_CACHE: LazyLock> = LazyLock::new(|| unsafe { - uzers::all_users() - .map(|user| (user.uid(), user.name().to_string_lossy().to_string())) - .collect() + debug!("Initializing users cache…"); + let users: HashMap = uzers::all_users() + .map(|user| { + trace!("Found user {}", user.name().to_string_lossy()); + (user.uid(), user.name().to_string_lossy().to_string()) + }) + .collect(); + debug!("Found {} users", users.len()); + users }); static PAGESIZE: LazyLock = LazyLock::new(sysconf::pagesize); @@ -82,16 +91,25 @@ static RE_DRM_ENGINE_VIDEO: Lazy = lazy_regex!(r"drm-engine-video:\s*(\d+ static RE_DRM_TOTAL_MEMORY: Lazy = lazy_regex!(r"drm-total-memory:\s*(\d+)\s*KiB"); -static NVML: Lazy> = Lazy::new(Nvml::init); +static NVML: Lazy> = Lazy::new(|| { + debug!("Initializing connection to NVML…"); + Nvml::init().inspect_err(|err| warn!("Unable to connect to NVML: {err}")) +}); static NVML_DEVICES: Lazy> = Lazy::new(|| { if let Ok(nvml) = NVML.as_ref() { + debug!("Looking for NVIDIA devices…"); let device_count = nvml.device_count().unwrap_or(0); let mut return_vec = Vec::with_capacity(device_count as usize); for i in 0..device_count { if let Ok(gpu) = nvml.device_by_index(i) { if let Ok(pci_slot) = gpu.pci_info().map(|pci_info| pci_info.bus_id) { let pci_slot = PciSlot::from_str(&pci_slot).unwrap(); + debug!( + "Found {} at {}", + gpu.name().unwrap_or("N/A".into()), + pci_slot + ); return_vec.push((pci_slot, gpu)); } } @@ -253,13 +271,6 @@ impl ProcessData { pub fn try_from_path>(proc_path: P) -> Result { let proc_path = proc_path.as_ref(); - let stat = std::fs::read_to_string(proc_path.join("stat"))?; - let statm = std::fs::read_to_string(proc_path.join("statm"))?; - let status = std::fs::read_to_string(proc_path.join("status"))?; - let comm = std::fs::read_to_string(proc_path.join("comm"))?; - let commandline = std::fs::read_to_string(proc_path.join("cmdline"))?; - let io = std::fs::read_to_string(proc_path.join("io")).ok(); - let pid = proc_path .file_name() .context("proc_path terminates in ..")? @@ -267,6 +278,23 @@ impl ProcessData { .context("can't turn OsStr to str")? .parse()?; + trace!("Inspecting process {pid}…"); + + trace!("Reading info files…"); + let stat = std::fs::read_to_string(proc_path.join("stat")) + .inspect_err(|err| trace!("Error reading 'stat': {err}"))?; + let statm = std::fs::read_to_string(proc_path.join("statm")) + .inspect_err(|err| trace!("Error reading 'statm': {err}"))?; + let status = std::fs::read_to_string(proc_path.join("status")) + .inspect_err(|err| trace!("Error reading 'status': {err}"))?; + let comm = std::fs::read_to_string(proc_path.join("comm")) + .inspect_err(|err| trace!("Error reading 'comm': {err}"))?; + let commandline = std::fs::read_to_string(proc_path.join("cmdline")) + .inspect_err(|err| trace!("Error reading 'cmdline': {err}"))?; + let io = std::fs::read_to_string(proc_path.join("io")) + .inspect_err(|err| trace!("Error reading 'io': {err}")) + .ok(); + let user = USERS_CACHE .get(&Self::get_uid(proc_path)?) .cloned() @@ -275,7 +303,8 @@ impl ProcessData { let stat = stat .split(')') // since we don't care about the pid or the executable name, split after the executable name to make our life easier .last() - .context("stat doesn't have ')'")? + .context("stat doesn't have ')'") + .inspect_err(|err| trace!("Can't parse 'stat': {err}"))? .split(' ') .skip(1) // the first element would be a space, let's ignore that .collect::>(); @@ -288,23 +317,28 @@ impl ProcessData { let parent_pid = stat .get(STAT_PARENT_PID) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse parent pid from 'stat': {err}"))?; let user_cpu_time = stat .get(STAT_USER_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse user cpu time from 'stat': {err}"))?; let system_cpu_time = stat .get(STAT_SYSTEM_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse system cpu time from 'stat': {err}"))?; let nice = stat .get(STAT_NICE) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse nice from 'stat': {err}"))?; let starttime = stat .get(STAT_STARTTIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse start time from 'stat': {err}"))?; let mut affinity = Vec::with_capacity(*NUM_CPUS); RE_AFFINITY @@ -340,7 +374,8 @@ impl ProcessData { .and_then(|x| { x.parse::() .context("couldn't parse statm file content") - })? + }) + .inspect_err(|err| trace!("Can't parse memory usage from 'statm': {err}"))? .saturating_sub( statm .get(2) @@ -353,6 +388,7 @@ impl ProcessData { .saturating_mul(*PAGESIZE); let cgroup = std::fs::read_to_string(proc_path.join("cgroup")) + .inspect_err(|err| trace!("Can't read cgroup: {err}")) .ok() .and_then(Self::sanitize_cgroup); @@ -408,6 +444,7 @@ impl ProcessData { } fn gpu_usage_stats(proc_path: &Path, pid: i32) -> BTreeMap { + trace!("Gathering GPU stats…"); let nvidia_stats = Self::nvidia_gpu_stats_all(pid); let mut other_stats = Self::other_gpu_usage_stats(proc_path, pid).unwrap_or_default(); other_stats.extend(nvidia_stats); @@ -431,22 +468,30 @@ impl ProcessData { .parse::() .unwrap_or(0); if fd_num <= 2 { + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: fd_num ≤ 2 (probably std stream)" + ); return (false, fd_num); } let _file = std::fs::File::open(&fdinfo_path); if _file.is_err() { - return (true, fd_num); + trace!("fdinfo {fd_num} deemed as not plausible. Reason: File can't be opened"); + return (false, fd_num); } let file = _file.unwrap(); let _metadata = file.metadata(); if _metadata.is_err() { - return (true, fd_num); + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: File's metadata can't be read" + ); + return (false, fd_num); } let metadata = _metadata.unwrap(); if !metadata.is_file() { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: Not a file"); return (false, fd_num); } @@ -455,8 +500,15 @@ impl ProcessData { let fd_path = fdinfo_path.to_str().map(|s| s.replace("fdinfo", "fd")); if let Some(fd_path) = fd_path { if let Ok(fd_metadata) = std::fs::metadata(fd_path) { + if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: Wrong st_mode"); + return (false, fd_num); + } let major = unsafe { libc::major(fd_metadata.st_rdev()) }; - if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { + if major != MAJOR { + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: Wrong major (expected: {MAJOR}, got: {major})" + ); return (false, fd_num); } } @@ -470,9 +522,11 @@ impl ProcessData { == 0 }); if not_unique { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: kcmp indicated that we've already seen this file"); return (false, fd_num); } + trace!("fdinfo {fd_num} deemed as plausible"); (true, fd_num) } @@ -480,6 +534,7 @@ impl ProcessData { proc_path: &Path, pid: i32, ) -> Result> { + trace!("Gathering other GPU stats…"); let fdinfo_dir = proc_path.join("fdinfo"); let mut seen_fds = HashSet::new(); @@ -520,6 +575,7 @@ impl ProcessData { } fn npu_usage_stats(proc_path: &Path, pid: i32) -> Result> { + trace!("Gathering NPU stats…"); let fdinfo_dir = proc_path.join("fdinfo"); let mut seen_fds = HashSet::new(); @@ -554,6 +610,10 @@ impl ProcessData { } fn read_npu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, NpuUsageStats)> { + trace!( + "Reading and parsing {} for NPU stats…", + fdinfo_path.as_ref().to_string_lossy() + ); let content = std::fs::read_to_string(fdinfo_path.as_ref())?; let pci_slot = RE_DRM_PDEV @@ -569,6 +629,7 @@ impl ProcessData { .unwrap_or_default(); if !NPU_DRIVER_NAMES.contains(&driver) { + trace!("Driver '{driver}' is not known to be NPU-related, skipping"); bail!("this is not an NPU") } @@ -590,10 +651,16 @@ impl ProcessData { mem: total_memory, }; + trace!("Success reading GPU data for {pci_slot}: {stats:?}"); + return Ok((pci_slot, stats)); } fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, GpuUsageStats)> { + trace!( + "Reading and parsing {} for GPU stats…", + fdinfo_path.as_ref().to_string_lossy() + ); let content = std::fs::read_to_string(fdinfo_path.as_ref())?; let pci_slot = RE_DRM_PDEV @@ -609,6 +676,7 @@ impl ProcessData { .unwrap_or_default(); if !GPU_DRIVER_NAMES.contains(&driver) { + trace!("Driver {driver} is not known to be GPU-related, skipping"); bail!("this is not a GPU"); } @@ -672,10 +740,14 @@ impl ProcessData { nvidia: false, }; + trace!("Success reading GPU data for {pci_slot}: {stats:?}"); + return Ok((pci_slot, stats)); } fn nvidia_gpu_stats_all(pid: i32) -> BTreeMap { + trace!("Gathering NVIDIA GPU stats…"); + let mut return_map = BTreeMap::new(); for (pci_slot, _) in NVML_DEVICES.iter() { @@ -688,6 +760,7 @@ impl ProcessData { } fn nvidia_gpu_stats(pid: i32, pci_slot: PciSlot) -> Result { + trace!("Gathering GPU stats for NVIDIA GPU at {pci_slot}…"); let this_process_stats = NVIDIA_PROCESSES_STATS .read() .unwrap() @@ -722,6 +795,7 @@ impl ProcessData { } fn nvidia_process_infos() -> HashMap> { + trace!("Refreshing NVIDIA process infos…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { @@ -735,6 +809,7 @@ impl ProcessData { } fn nvidia_process_stats() -> HashMap> { + trace!("Refreshing NVIDIA process stats…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { diff --git a/src/bin/resources-processes.rs b/src/bin/resources-processes.rs index 6456971..154d01a 100644 --- a/src/bin/resources-processes.rs +++ b/src/bin/resources-processes.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use log::{info, trace}; use process_data::ProcessData; use ron::ser::PrettyConfig; use std::io::{Read, Write}; @@ -18,6 +19,11 @@ struct Args { } fn main() -> Result<()> { + // Initialize logger + pretty_env_logger::init(); + + info!("Starting resources-processes…"); + let args = Args::parse(); if args.once { @@ -29,12 +35,14 @@ fn main() -> Result<()> { let mut buffer = [0; 1]; std::io::stdin().read_exact(&mut buffer)?; + trace!("Received character"); output(args.ron)?; } } fn output(ron: bool) -> Result<()> { + trace!("Gathering process data…"); let data = ProcessData::all_process_data()?; let encoded = if ron { @@ -50,10 +58,13 @@ fn output(ron: bool) -> Result<()> { let stdout = std::io::stdout(); let mut handle = stdout.lock(); + trace!("Sending content length ({})…", encoded.len()); handle.write_all(&len_byte_array)?; + trace!("Sending content…"); handle.write_all(&encoded)?; + trace!("Flushing…"); handle.flush()?; Ok(()) } diff --git a/src/utils/gpu/nvidia.rs b/src/utils/gpu/nvidia.rs index f1668f3..4f47cfd 100644 --- a/src/utils/gpu/nvidia.rs +++ b/src/utils/gpu/nvidia.rs @@ -10,15 +10,9 @@ use process_data::pci_slot::PciSlot; use std::{path::PathBuf, sync::LazyLock}; static NVML: LazyLock> = LazyLock::new(|| { - let nvml = Nvml::init(); - - if let Err(error) = nvml.as_ref() { - warn!("Connection to NVML failed, reason: {error}"); - } else { - debug!("Successfully connected to NVML"); - } - - nvml + Nvml::init() + .inspect_err(|err| warn!("Unable to connect to NVML: {err}")) + .inspect(|_| debug!("Successfully connected to NVML")) }); use crate::utils::pci::Device; diff --git a/src/utils/process.rs b/src/utils/process.rs index 0680f71..bf228d8 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -35,18 +35,29 @@ static OTHER_PROCESS: LazyLock> = LazyLock::new let child = if *IS_FLATPAK { debug!("Spawning resources-processes in Flatpak mode ({proxy_path})"); Command::new(FLATPAK_SPAWN) - .args(["--host", proxy_path.as_str()]) + .args([ + &format!( + "--env=RUST_LOG={}", + std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) + ), + "--host", + proxy_path.as_str(), + ]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() } else { debug!("Spawning resources-processes in native mode ({proxy_path})"); Command::new(proxy_path) + .arg(&format!( + "--env=RUST_LOG={}", + std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) + )) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() }; From 5fb2982d8ad721779f62b4bfda53efb2a23d0d58 Mon Sep 17 00:00:00 2001 From: nokyan Date: Sun, 15 Dec 2024 10:11:49 +0100 Subject: [PATCH 6/8] Small typo --- lib/process_data/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index e2ffbf9..477e025 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -651,7 +651,7 @@ impl ProcessData { mem: total_memory, }; - trace!("Success reading GPU data for {pci_slot}: {stats:?}"); + trace!("Success reading NPU data for {pci_slot}: {stats:?}"); return Ok((pci_slot, stats)); } From f375eeb7c811672ea6a72af9b42cb132cfbf755a Mon Sep 17 00:00:00 2001 From: "Sv. Lockal" Date: Mon, 8 Dec 2025 00:41:14 +0800 Subject: [PATCH 7/8] Fix amdxdna collection and aggregation 1. fdinfo collection only accepted DRM devices (major 226), but amdxdna uses the accel subsystem (major 261) 2. Driver name in fdinfo is "amdxdna_accel_driver", not "amdxdna" 3. Memory usage was unavailable because amdxdna doesn't expose it via sysfs, only via per-process fdinfo 4. There is no "--env" parameter in resources-processes This change was tested against the latest release of amdxdna driver, which is massively different from the driver shipped in Linux kernel. Maybe older driver uses "amdxdna" instead of "amdxdna_accel_driver". However, as new software works only with the new driver, I don't even think it is possible to test something with original driver. More details: https://wiki.gentoo.org/wiki/User:Lockal/AMDXDNA#Kernel Another note: Temperature, power, and frequency remain N/A as amdxdna does not expose hwmon interface (at least for my Asus ROG Z13 device). Maybe it makes sense to remove these fields from UI. --- lib/process_data/src/lib.rs | 13 +++++++++---- src/ui/window.rs | 12 ++++++++++++ src/utils/process.rs | 13 +------------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 87884ca..bbe7504 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -36,6 +36,9 @@ const DRM_DRIVER: &str = "drm-driver"; const DRM_PDEV: &str = "drm-pdev"; +const DRM_MAJOR: u32 = 226; +const ACCEL_MAJOR: u32 = 261; + static USERS_CACHE: LazyLock> = LazyLock::new(|| unsafe { debug!("Initializing users cache…"); let users: HashMap = uzers::all_users() @@ -105,12 +108,12 @@ static DEC_NS_DRM_FIELDS: Lazy>> = Lazy::new(|| HashMap::from_iter([("amdgpu", vec!["drm-engine-dec"])])); static NPU_NS_FIELDS: Lazy>> = - Lazy::new(|| HashMap::from_iter([("amdxdna", vec!["drm-engine-npu-amdxdna"])])); + Lazy::new(|| HashMap::from_iter([("amdxdna_accel_driver", vec!["drm-engine-npu-amdxdna"])])); static MEM_DRM_FIELDS: Lazy>> = Lazy::new(|| { HashMap::from_iter([ ("amdgpu", vec!["drm-memory-gtt", "drm-memory-vram"]), - ("amdxdna", vec!["drm-total-memory"]), + ("amdxdna_accel_driver", vec!["drm-total-memory"]), ("i915", vec!["drm-total-local0", "drm-total-system0"]), ("v3d", vec!["drm-total-memory"]), ("xe", vec!["drm-total-gtt", "drm-total-vram0"]), @@ -508,7 +511,9 @@ impl ProcessData { if let Some(fd_path) = fd_path { if let Ok(fd_metadata) = std::fs::metadata(fd_path) { let major = libc::major(fd_metadata.st_rdev()); - if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { + if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR + || (major != DRM_MAJOR && major != ACCEL_MAJOR) + { continue; } } @@ -578,7 +583,7 @@ impl ProcessData { .unwrap_or_default(); let stats = match driver.as_str() { - "amdxdna" => NpuUsageStats::AmdxdnaStats { + "amdxdna_accel_driver" => NpuUsageStats::AmdxdnaStats { usage_ns: NPU_NS_FIELDS .get(driver.as_str()) .map(|names| Self::parse_drm_fields(fdinfo, names, &RE_DRM_TIME)) diff --git a/src/ui/window.rs b/src/ui/window.rs index bda9072..6f6049d 100644 --- a/src/ui/window.rs +++ b/src/ui/window.rs @@ -591,6 +591,18 @@ impl MainWindow { .unwrap_or_default() }; + for npu_data_entry in &mut npu_data { + if npu_data_entry.used_memory.is_none() { + npu_data_entry.used_memory = Some( + process_data + .iter() + .filter_map(|p| p.npu_usage_stats.get(&npu_data_entry.pci_slot)) + .filter_map(|stats| stats.mem()) + .sum::() as usize, + ); + } + } + let refresh_data = RefreshData { cpu_data, mem_data, diff --git a/src/utils/process.rs b/src/utils/process.rs index f0431c1..61de356 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -41,14 +41,7 @@ static COMPANION_PROCESS: LazyLock> = LazyLock: let child = if *IS_FLATPAK { debug!("Spawning resources-processes in Flatpak mode ({proxy_path})"); Command::new(FLATPAK_SPAWN) - .args([ - &format!( - "--env=RUST_LOG={}", - std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) - ), - "--host", - proxy_path.as_str(), - ]) + .args(["--host", proxy_path.as_str()]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) @@ -57,10 +50,6 @@ static COMPANION_PROCESS: LazyLock> = LazyLock: } else { debug!("Spawning resources-processes in native mode ({proxy_path})"); Command::new(proxy_path) - .arg(&format!( - "--env=RUST_LOG={}", - std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) - )) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()) From 60d4287fff10edf66432cc4e1d84b3e8ec74a744 Mon Sep 17 00:00:00 2001 From: "Sv. Lockal" Date: Mon, 8 Dec 2025 01:47:43 +0800 Subject: [PATCH 8/8] Read frequencies for amdxdna Driver exposes 2 values via DRM_AMDXDNA_QUERY_CLOCK_METADATA: "MP-NPU clock" and "H clock". Device has multiple power profiles, which switches both of them. Examples: ``` [Power Mode] Mode ID: 1 Mode Name: POWERSAVER [Clock Frequencies] MP-NPU Clock: 396 MHz H Clock: 792 MHz ``` ``` [Power Mode] Mode ID: 4 Mode Name: TURBO [Clock Frequencies] MP-NPU Clock: 1267 MHz H Clock: 1800 MHz ``` There is no description about these abbreviations, but it looks like H is hardware, MP is memory port, so these values are mapped to core_frequency/memory_frequency functions. --- src/utils/npu/amd.rs | 81 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) diff --git a/src/utils/npu/amd.rs b/src/utils/npu/amd.rs index 9d14bdd..1522e3c 100644 --- a/src/utils/npu/amd.rs +++ b/src/utils/npu/amd.rs @@ -1,12 +1,52 @@ -use anyhow::Result; +use anyhow::{Context, Result}; use process_data::pci_slot::PciSlot; +use std::fs::File; +use std::os::unix::io::AsRawFd; use std::path::{Path, PathBuf}; use crate::utils::pci::Device; use super::NpuImpl; +const DRM_IOCTL_BASE: u8 = b'd'; +const DRM_COMMAND_BASE: u8 = 0x40; +const DRM_AMDXDNA_GET_INFO: u8 = 7; +const DRM_AMDXDNA_QUERY_CLOCK_METADATA: u32 = 3; + +const IOC_WRITE: u32 = 1; +const IOC_READ: u32 = 2; + +const fn ioc(dir: u32, ty: u8, nr: u8, size: usize) -> libc::c_ulong { + ((dir << 30) | ((ty as u32) << 8) | (nr as u32) | ((size as u32) << 16)) as libc::c_ulong +} + +const fn iowr(ty: u8, nr: u8) -> libc::c_ulong { + ioc(IOC_READ | IOC_WRITE, ty, nr, std::mem::size_of::()) +} + +#[repr(C)] +struct AmdxdnaDrmGetInfo { + param: u32, + buffer_size: u32, + buffer: u64, +} + +#[repr(C)] +#[derive(Default)] +struct AmdxdnaDrmQueryClock { + name: [u8; 16], + freq_mhz: u32, + _pad: u32, +} + +#[repr(C)] +#[derive(Default)] +struct AmdxdnaDrmQueryClockMetadata { + mp_npu_clock: AmdxdnaDrmQueryClock, + h_clock: AmdxdnaDrmQueryClock, +} + #[derive(Debug, Clone, Default)] pub struct AmdNpu { @@ -33,6 +73,39 @@ impl AmdNpu { first_hwmon_path, } } + + fn query_clock_metadata(&self) -> Result<(u64, u64)> { + let accel_name = self + .sysfs_path + .file_name() + .context("invalid sysfs path")? + .to_str() + .context("invalid accel name")?; + let dev_path = format!("/dev/accel/{accel_name}"); + + let file = File::open(&dev_path).context("failed to open accel device")?; + let fd = file.as_raw_fd(); + + let mut clock_metadata = AmdxdnaDrmQueryClockMetadata::default(); + let mut get_info = AmdxdnaDrmGetInfo { + param: DRM_AMDXDNA_QUERY_CLOCK_METADATA, + buffer_size: std::mem::size_of::() as u32, + buffer: &mut clock_metadata as *mut _ as u64, + }; + + let ioctl_cmd = + iowr::(DRM_IOCTL_BASE, DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO); + + let ret = unsafe { libc::ioctl(fd, ioctl_cmd, &mut get_info) }; + if ret < 0 { + anyhow::bail!("ioctl failed: {}", std::io::Error::last_os_error()); + } + + let h_clock_hz = clock_metadata.h_clock.freq_mhz as u64 * 1_000_000; + let mp_npu_clock_hz = clock_metadata.mp_npu_clock.freq_mhz as u64 * 1_000_000; + + Ok((h_clock_hz, mp_npu_clock_hz)) + } } impl NpuImpl for AmdNpu { @@ -81,11 +154,13 @@ impl NpuImpl for AmdNpu { } fn core_frequency(&self) -> Result { - self.hwmon_core_frequency() + self.query_clock_metadata() + .map(|(h_clock_hz, _)| h_clock_hz as f64) } fn memory_frequency(&self) -> Result { - self.hwmon_memory_frequency() + self.query_clock_metadata() + .map(|(_, mp_npu_clock_hz)| mp_npu_clock_hz as f64) } fn power_cap(&self) -> Result {