diff --git a/Cargo.lock b/Cargo.lock index e7bbec9..bc90c9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -984,9 +984,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "malloc_buf" @@ -1331,6 +1331,7 @@ dependencies = [ "glob", "lazy-regex", "libc", + "log", "num_cpus", "nutype", "nvml-wrapper", diff --git a/lib/process_data/Cargo.lock b/lib/process_data/Cargo.lock index 1d98f2c..640f2a4 100644 --- a/lib/process_data/Cargo.lock +++ b/lib/process_data/Cargo.lock @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "memchr" @@ -312,6 +312,7 @@ dependencies = [ "glob", "lazy-regex", "libc", + "log", "num_cpus", "nutype", "nvml-wrapper", diff --git a/lib/process_data/Cargo.toml b/lib/process_data/Cargo.toml index 6f06279..18af93d 100644 --- a/lib/process_data/Cargo.toml +++ b/lib/process_data/Cargo.toml @@ -21,6 +21,7 @@ anyhow = "1.0.100" glob = "0.3.3" lazy-regex = "3.4.2" libc = "0.2.177" +log = "0.4.29" num_cpus = "1.17.0" nutype = { version = "0.6.2", features = ["serde"] } nvml-wrapper = "0.11.0" diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 9e2515e..bbe7504 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -1,9 +1,11 @@ pub mod gpu_usage; +pub mod npu_usage; pub mod pci_slot; use anyhow::{Context, Result, bail}; use glob::glob; use lazy_regex::{Lazy, Regex, lazy_regex}; +use log::{debug, trace, warn}; use nutype::nutype; use nvml_wrapper::enums::device::UsedGpuMemory; use nvml_wrapper::error::NvmlError; @@ -21,6 +23,7 @@ use std::sync::{LazyLock, RwLock}; use std::time::SystemTime; use crate::gpu_usage::{GpuIdentifier, GpuUsageStats, IntegerPercentage}; +use crate::npu_usage::NpuUsageStats; const STAT_OFFSET: usize = 2; // we split the stat contents where the executable name ends, which is the second element const STAT_PARENT_PID: usize = 3 - STAT_OFFSET; @@ -33,10 +36,19 @@ const DRM_DRIVER: &str = "drm-driver"; const DRM_PDEV: &str = "drm-pdev"; +const DRM_MAJOR: u32 = 226; +const ACCEL_MAJOR: u32 = 261; + static USERS_CACHE: LazyLock> = LazyLock::new(|| unsafe { - uzers::all_users() - .map(|user| (user.uid(), user.name().to_string_lossy().to_string())) - .collect() + debug!("Initializing users cache…"); + let users: HashMap = uzers::all_users() + .map(|user| { + trace!("Found user {}", user.name().to_string_lossy()); + (user.uid(), user.name().to_string_lossy().to_string()) + }) + .collect(); + debug!("Found {} users", users.len()); + users }); static PAGESIZE: LazyLock = LazyLock::new(sysconf::pagesize); @@ -95,25 +107,38 @@ static ENC_TOTAL_CYCLES_DRM_FIELDS: Lazy>> = static DEC_NS_DRM_FIELDS: Lazy>> = Lazy::new(|| HashMap::from_iter([("amdgpu", vec!["drm-engine-dec"])])); +static NPU_NS_FIELDS: Lazy>> = + Lazy::new(|| HashMap::from_iter([("amdxdna_accel_driver", vec!["drm-engine-npu-amdxdna"])])); + static MEM_DRM_FIELDS: Lazy>> = Lazy::new(|| { HashMap::from_iter([ ("amdgpu", vec!["drm-memory-gtt", "drm-memory-vram"]), + ("amdxdna_accel_driver", vec!["drm-total-memory"]), ("i915", vec!["drm-total-local0", "drm-total-system0"]), ("v3d", vec!["drm-total-memory"]), ("xe", vec!["drm-total-gtt", "drm-total-vram0"]), ]) }); -static NVML: Lazy> = Lazy::new(Nvml::init); +static NVML: Lazy> = Lazy::new(|| { + debug!("Initializing connection to NVML…"); + Nvml::init().inspect_err(|err| warn!("Unable to connect to NVML: {err}")) +}); static NVML_DEVICES: Lazy> = Lazy::new(|| { if let Ok(nvml) = NVML.as_ref() { + debug!("Looking for NVIDIA devices…"); let device_count = nvml.device_count().unwrap_or(0); let mut return_vec = Vec::with_capacity(device_count as usize); for i in 0..device_count { if let Ok(gpu) = nvml.device_by_index(i) { if let Ok(pci_slot) = gpu.pci_info().map(|pci_info| pci_info.bus_id) { let pci_slot = PciSlot::from_str(&pci_slot).unwrap(); + debug!( + "Found {} at {}", + gpu.name().unwrap_or("N/A".into()), + pci_slot + ); return_vec.push((pci_slot, gpu)); } } @@ -164,7 +189,7 @@ pub enum Containerization { Snap, } -/// Data that could be transferred us>ing `resources-processes`, separated from +/// Data that could be transferred using `resources-processes`, separated from /// `Process` mainly due to `Icon` not being able to derive `Serialize` and /// `Deserialize`. #[derive(Debug, Default, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] @@ -187,6 +212,7 @@ pub struct ProcessData { pub write_bytes: Option, pub timestamp: u64, pub gpu_usage_stats: BTreeMap, + pub npu_usage_stats: BTreeMap, } impl ProcessData { @@ -266,13 +292,6 @@ impl ProcessData { pub fn try_from_path>(proc_path: P) -> Result { let proc_path = proc_path.as_ref(); - let stat = std::fs::read_to_string(proc_path.join("stat"))?; - let statm = std::fs::read_to_string(proc_path.join("statm"))?; - let status = std::fs::read_to_string(proc_path.join("status"))?; - let comm = std::fs::read_to_string(proc_path.join("comm"))?; - let commandline = std::fs::read_to_string(proc_path.join("cmdline"))?; - let io = std::fs::read_to_string(proc_path.join("io")).ok(); - let pid = proc_path .file_name() .context("proc_path terminates in ..")? @@ -280,6 +299,23 @@ impl ProcessData { .context("can't turn OsStr to str")? .parse()?; + trace!("Inspecting process {pid}…"); + + trace!("Reading info files…"); + let stat = std::fs::read_to_string(proc_path.join("stat")) + .inspect_err(|err| trace!("Error reading 'stat': {err}"))?; + let statm = std::fs::read_to_string(proc_path.join("statm")) + .inspect_err(|err| trace!("Error reading 'statm': {err}"))?; + let status = std::fs::read_to_string(proc_path.join("status")) + .inspect_err(|err| trace!("Error reading 'status': {err}"))?; + let comm = std::fs::read_to_string(proc_path.join("comm")) + .inspect_err(|err| trace!("Error reading 'comm': {err}"))?; + let commandline = std::fs::read_to_string(proc_path.join("cmdline")) + .inspect_err(|err| trace!("Error reading 'cmdline': {err}"))?; + let io = std::fs::read_to_string(proc_path.join("io")) + .inspect_err(|err| trace!("Error reading 'io': {err}")) + .ok(); + let user = USERS_CACHE .get(&Self::get_uid(proc_path)?) .cloned() @@ -288,7 +324,8 @@ impl ProcessData { let stat = stat .split(')') // since we don't care about the pid or the executable name, split after the executable name to make our life easier .last() - .context("stat doesn't have ')'")? + .context("stat doesn't have ')'") + .inspect_err(|err| trace!("Can't parse 'stat': {err}"))? .split(' ') .skip(1) // the first element would be a space, let's ignore that .collect::>(); @@ -300,23 +337,28 @@ impl ProcessData { let parent_pid = stat .get(STAT_PARENT_PID) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse parent pid from 'stat': {err}"))?; let user_cpu_time = stat .get(STAT_USER_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse user cpu time from 'stat': {err}"))?; let system_cpu_time = stat .get(STAT_SYSTEM_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse system cpu time from 'stat': {err}"))?; let nice = stat .get(STAT_NICE) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse nice from 'stat': {err}"))?; let starttime = stat .get(STAT_STARTTIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse start time from 'stat': {err}"))?; let mut affinity = Vec::with_capacity(*NUM_CPUS); RE_AFFINITY @@ -352,7 +394,8 @@ impl ProcessData { .and_then(|x| { x.parse::() .context("couldn't parse statm file content") - })? + }) + .inspect_err(|err| trace!("Can't parse memory usage from 'statm': {err}"))? .saturating_sub( statm .get(2) @@ -365,6 +408,7 @@ impl ProcessData { .saturating_mul(*PAGESIZE); let cgroup = std::fs::read_to_string(proc_path.join("cgroup")) + .inspect_err(|err| trace!("Can't read cgroup: {err}")) .ok() .and_then(Self::sanitize_cgroup); @@ -396,6 +440,8 @@ impl ProcessData { let mut gpu_usage_stats = Self::other_gpu_usage_stats(&fdinfos).unwrap_or_default(); gpu_usage_stats.extend(nvidia_stats); + let npu_usage_stats = Self::npu_usage_stats(&fdinfos).unwrap_or_default(); + let timestamp = unix_as_millis(); Ok(Self { @@ -417,6 +463,7 @@ impl ProcessData { write_bytes, timestamp, gpu_usage_stats, + npu_usage_stats, }) } @@ -464,7 +511,9 @@ impl ProcessData { if let Some(fd_path) = fd_path { if let Ok(fd_metadata) = std::fs::metadata(fd_path) { let major = libc::major(fd_metadata.st_rdev()); - if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { + if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR + || (major != DRM_MAJOR && major != ACCEL_MAJOR) + { continue; } } @@ -503,6 +552,59 @@ impl ProcessData { ) } + fn npu_usage_stats( + fdinfos: &[HashMap], + ) -> Result> { + let mut return_map = BTreeMap::new(); + + for fdinfo in fdinfos { + if let Ok((identifier, stats)) = Self::extract_npu_usage_from_fdinfo(fdinfo) { + return_map + .entry(identifier) + .and_modify(|existing_value: &mut NpuUsageStats| { + *existing_value = existing_value.greater(&stats) + }) + .or_insert(stats); + } + } + + Ok(return_map) + } + + fn extract_npu_usage_from_fdinfo( + fdinfo: &HashMap, + ) -> Result<(PciSlot, NpuUsageStats)> { + let driver = fdinfo.get(DRM_DRIVER); + + if let Some(driver) = driver { + let gpu_identifier = fdinfo + .get(DRM_PDEV) + .and_then(|field| PciSlot::from_str(field).ok()) + .unwrap_or_default(); + + let stats = match driver.as_str() { + "amdxdna_accel_driver" => NpuUsageStats::AmdxdnaStats { + usage_ns: NPU_NS_FIELDS + .get(driver.as_str()) + .map(|names| Self::parse_drm_fields(fdinfo, names, &RE_DRM_TIME)) + .unwrap_or_default(), + mem_bytes: MEM_DRM_FIELDS + .get(driver.as_str()) + .map(|names| { + Self::parse_drm_fields::(fdinfo, names, &RE_DRM_KIB) + .saturating_mul(1024) + }) + .unwrap_or_default(), + }, + _ => bail!("unable to read stats from driver"), + }; + + return Ok((gpu_identifier, stats)); + } + + bail!("unable to find gpu information in this fdinfo"); + } + fn other_gpu_usage_stats( fdinfos: &[HashMap], ) -> Result> { @@ -641,6 +743,8 @@ impl ProcessData { } fn nvidia_gpu_stats_all(pid: i32) -> BTreeMap { + trace!("Gathering NVIDIA GPU stats…"); + let mut return_map = BTreeMap::new(); for (pci_slot, _) in NVML_DEVICES.iter() { @@ -653,6 +757,7 @@ impl ProcessData { } fn nvidia_gpu_stats(pid: i32, pci_slot: PciSlot) -> Result { + trace!("Gathering GPU stats for NVIDIA GPU at {pci_slot}…"); let this_process_stats = NVIDIA_PROCESSES_STATS .read() .unwrap() @@ -692,6 +797,7 @@ impl ProcessData { } fn nvidia_process_infos() -> HashMap> { + trace!("Refreshing NVIDIA process infos…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { @@ -705,6 +811,7 @@ impl ProcessData { } fn nvidia_process_stats() -> HashMap> { + trace!("Refreshing NVIDIA process stats…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { diff --git a/lib/process_data/src/npu_usage.rs b/lib/process_data/src/npu_usage.rs new file mode 100644 index 0000000..c11c5b3 --- /dev/null +++ b/lib/process_data/src/npu_usage.rs @@ -0,0 +1,50 @@ +use serde::{Deserialize, Serialize}; + +/// Represents NPU usage statistics per-process. +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, Copy)] +pub enum NpuUsageStats { + AmdxdnaStats { usage_ns: u64, mem_bytes: u64 }, +} + +impl NpuUsageStats { + fn delta_ns(a: u64, b: u64, time_delta: u64) -> Option { + if time_delta == 0 { + None + } else { + Some(a.saturating_sub(b) as f32 / (time_delta * 1_000_000) as f32) + } + } + + pub fn usage_fraction(&self, old: &Self, time_delta: u64) -> Option { + match (self, old) { + ( + Self::AmdxdnaStats { usage_ns: a_ns, .. }, + Self::AmdxdnaStats { usage_ns: b_ns, .. }, + ) => Self::delta_ns(*a_ns, *b_ns, time_delta), + } + } + + pub fn mem(&self) -> Option { + match self { + Self::AmdxdnaStats { mem_bytes, .. } => Some(*mem_bytes), + } + } + + pub fn greater(&self, other: &Self) -> Self { + match (self, other) { + ( + Self::AmdxdnaStats { + usage_ns: a_ns, + mem_bytes: a_mem_bytes, + }, + Self::AmdxdnaStats { + usage_ns: b_ns, + mem_bytes: b_mem_bytes, + }, + ) => Self::AmdxdnaStats { + usage_ns: *a_ns.max(b_ns), + mem_bytes: *a_mem_bytes.max(b_mem_bytes), + }, + } + } +} diff --git a/src/bin/resources-processes.rs b/src/bin/resources-processes.rs index 6456971..154d01a 100644 --- a/src/bin/resources-processes.rs +++ b/src/bin/resources-processes.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use log::{info, trace}; use process_data::ProcessData; use ron::ser::PrettyConfig; use std::io::{Read, Write}; @@ -18,6 +19,11 @@ struct Args { } fn main() -> Result<()> { + // Initialize logger + pretty_env_logger::init(); + + info!("Starting resources-processes…"); + let args = Args::parse(); if args.once { @@ -29,12 +35,14 @@ fn main() -> Result<()> { let mut buffer = [0; 1]; std::io::stdin().read_exact(&mut buffer)?; + trace!("Received character"); output(args.ron)?; } } fn output(ron: bool) -> Result<()> { + trace!("Gathering process data…"); let data = ProcessData::all_process_data()?; let encoded = if ron { @@ -50,10 +58,13 @@ fn output(ron: bool) -> Result<()> { let stdout = std::io::stdout(); let mut handle = stdout.lock(); + trace!("Sending content length ({})…", encoded.len()); handle.write_all(&len_byte_array)?; + trace!("Sending content…"); handle.write_all(&encoded)?; + trace!("Flushing…"); handle.flush()?; Ok(()) } diff --git a/src/ui/window.rs b/src/ui/window.rs index 0cc71d8..6f6049d 100644 --- a/src/ui/window.rs +++ b/src/ui/window.rs @@ -591,6 +591,18 @@ impl MainWindow { .unwrap_or_default() }; + for npu_data_entry in &mut npu_data { + if npu_data_entry.used_memory.is_none() { + npu_data_entry.used_memory = Some( + process_data + .iter() + .filter_map(|p| p.npu_usage_stats.get(&npu_data_entry.pci_slot)) + .filter_map(|stats| stats.mem()) + .sum::() as usize, + ); + } + } + let refresh_data = RefreshData { cpu_data, mem_data, @@ -703,17 +715,32 @@ impl MainWindow { page.refresh_page(&gpu_data); } - std::mem::drop(apps_context); - /* * Npu */ let npu_pages = imp.npu_pages.borrow(); - for ((_, page), npu_data) in npu_pages.values().zip(npu_data) { + for ((_, page), mut npu_data) in npu_pages.values().zip(npu_data) { let page = page.content().and_downcast::().unwrap(); + + let processes_npu_fraction = apps_context.npu_fraction(npu_data.pci_slot); + npu_data.usage_fraction = Some(f64::max( + npu_data.usage_fraction.unwrap_or(0.0), + processes_npu_fraction.into(), + )); + + if npu_data.total_memory.is_some() { + let processes_npu_memory_fraction = apps_context.npu_mem(npu_data.pci_slot); + npu_data.used_memory = Some(usize::max( + npu_data.used_memory.unwrap_or(0), + processes_npu_memory_fraction as usize, + )); + } + page.refresh_page(&npu_data); } + std::mem::drop(apps_context); + /* * Cpu */ diff --git a/src/utils/app.rs b/src/utils/app.rs index 5e15b01..fdc9b8b 100644 --- a/src/utils/app.rs +++ b/src/utils/app.rs @@ -15,6 +15,7 @@ use log::{debug, info, trace}; use process_data::{ Containerization, ProcessData, gpu_usage::{GpuIdentifier, GpuUsageStats}, + pci_slot::PciSlot, }; use crate::{i18n::i18n, utils::read_parsed}; @@ -614,6 +615,48 @@ impl AppsContext { .clamp(0.0, 1.0) } + pub fn npu_fraction(&self, pci_slot: PciSlot) -> f32 { + self.processes_iter() + .map(|process| { + ( + &process.data.npu_usage_stats, + &process.npu_usage_stats_last, + process.data.timestamp, + process.timestamp_last, + ) + }) + .map(|(new, old, timestamp, timestamp_last)| { + ( + new.get(&pci_slot), + old.get(&pci_slot), + timestamp, + timestamp_last, + ) + }) + .filter_map(|(new, old, timestamp, timestamp_last)| match (new, old) { + (Some(new), Some(old)) => Some((new, old, timestamp, timestamp_last)), + _ => None, + }) + .map(|(new, old, timestamp, timestamp_last)| { + let time_delta = timestamp.saturating_sub(timestamp_last); + new.usage_fraction(old, time_delta).unwrap_or_default() + }) + .sum::() + .clamp(0.0, 1.0) + } + + pub fn npu_mem(&self, pci_slot: PciSlot) -> u64 { + self.processes_iter() + .flat_map(|process| { + process + .data + .npu_usage_stats + .get(&pci_slot) + .and_then(|npu_usage_stats| npu_usage_stats.mem()) + }) + .sum() + } + pub fn vram_usage(&self, gpu_identifier: GpuIdentifier) -> u64 { self.processes_iter() .flat_map(|process| { @@ -621,7 +664,7 @@ impl AppsContext { .data .gpu_usage_stats .get(&gpu_identifier) - .and_then(|gpu_identifier| gpu_identifier.mem()) + .and_then(|gpu_usage_stats| gpu_usage_stats.mem()) }) .sum() } @@ -791,6 +834,7 @@ impl AppsContext { old_process.read_bytes_last = old_process.data.read_bytes; old_process.write_bytes_last = old_process.data.write_bytes; old_process.gpu_usage_stats_last = old_process.data.gpu_usage_stats.clone(); + old_process.npu_usage_stats_last = old_process.data.npu_usage_stats.clone(); old_process.data = process_data.clone(); } else { diff --git a/src/utils/gpu/nvidia.rs b/src/utils/gpu/nvidia.rs index 38f3f42..668e2eb 100644 --- a/src/utils/gpu/nvidia.rs +++ b/src/utils/gpu/nvidia.rs @@ -13,22 +13,16 @@ use std::{ }; static NVML: LazyLock> = LazyLock::new(|| { - let nvml = Nvml::init(); - - if let Err(error) = nvml.as_ref() { - warn!("Connection to NVML failed, reason: {error}"); - if *IS_FLATPAK { - warn!( - "This can occur when the version of the NVIDIA Flatpak runtime (org.freedesktop.Platform.GL.nvidia) \ - and the version of the natively installed NVIDIA driver do not match. Consider updating both your system \ - and Flatpak packages before opening an issue." - ); - } - } else { - debug!("Successfully connected to NVML"); - } - - nvml + Nvml::init() + .inspect_err(|err| { + warn!("Unable to connect to NVML: {err}"); + if *IS_FLATPAK { + warn!("This can occur when the version of the NVIDIA Flatpak runtime \ + (org.freedesktop.Platform.GL.nvidia) and the version of the natively installed NVIDIA driver do not \ + match. Consider updating both your system and Flatpak packages before opening an issue."); + } + }) + .inspect(|_| debug!("Successfully connected to NVML")) }); use crate::utils::{IS_FLATPAK, pci::Device}; diff --git a/src/utils/npu/amd.rs b/src/utils/npu/amd.rs new file mode 100644 index 0000000..1522e3c --- /dev/null +++ b/src/utils/npu/amd.rs @@ -0,0 +1,173 @@ +use anyhow::{Context, Result}; +use process_data::pci_slot::PciSlot; + +use std::fs::File; +use std::os::unix::io::AsRawFd; +use std::path::{Path, PathBuf}; + +use crate::utils::pci::Device; + +use super::NpuImpl; + +const DRM_IOCTL_BASE: u8 = b'd'; +const DRM_COMMAND_BASE: u8 = 0x40; +const DRM_AMDXDNA_GET_INFO: u8 = 7; +const DRM_AMDXDNA_QUERY_CLOCK_METADATA: u32 = 3; + +const IOC_WRITE: u32 = 1; +const IOC_READ: u32 = 2; + +const fn ioc(dir: u32, ty: u8, nr: u8, size: usize) -> libc::c_ulong { + ((dir << 30) | ((ty as u32) << 8) | (nr as u32) | ((size as u32) << 16)) as libc::c_ulong +} + +const fn iowr(ty: u8, nr: u8) -> libc::c_ulong { + ioc(IOC_READ | IOC_WRITE, ty, nr, std::mem::size_of::()) +} + +#[repr(C)] +struct AmdxdnaDrmGetInfo { + param: u32, + buffer_size: u32, + buffer: u64, +} + +#[repr(C)] +#[derive(Default)] +struct AmdxdnaDrmQueryClock { + name: [u8; 16], + freq_mhz: u32, + _pad: u32, +} + +#[repr(C)] +#[derive(Default)] +struct AmdxdnaDrmQueryClockMetadata { + mp_npu_clock: AmdxdnaDrmQueryClock, + h_clock: AmdxdnaDrmQueryClock, +} + +#[derive(Debug, Clone, Default)] + +pub struct AmdNpu { + pub device: Option<&'static Device>, + pub pci_slot: PciSlot, + pub driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, +} + +impl AmdNpu { + pub fn new( + device: Option<&'static Device>, + pci_slot: PciSlot, + driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, + ) -> Self { + Self { + device, + pci_slot, + driver, + sysfs_path, + first_hwmon_path, + } + } + + fn query_clock_metadata(&self) -> Result<(u64, u64)> { + let accel_name = self + .sysfs_path + .file_name() + .context("invalid sysfs path")? + .to_str() + .context("invalid accel name")?; + let dev_path = format!("/dev/accel/{accel_name}"); + + let file = File::open(&dev_path).context("failed to open accel device")?; + let fd = file.as_raw_fd(); + + let mut clock_metadata = AmdxdnaDrmQueryClockMetadata::default(); + let mut get_info = AmdxdnaDrmGetInfo { + param: DRM_AMDXDNA_QUERY_CLOCK_METADATA, + buffer_size: std::mem::size_of::() as u32, + buffer: &mut clock_metadata as *mut _ as u64, + }; + + let ioctl_cmd = + iowr::(DRM_IOCTL_BASE, DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO); + + let ret = unsafe { libc::ioctl(fd, ioctl_cmd, &mut get_info) }; + if ret < 0 { + anyhow::bail!("ioctl failed: {}", std::io::Error::last_os_error()); + } + + let h_clock_hz = clock_metadata.h_clock.freq_mhz as u64 * 1_000_000; + let mp_npu_clock_hz = clock_metadata.mp_npu_clock.freq_mhz as u64 * 1_000_000; + + Ok((h_clock_hz, mp_npu_clock_hz)) + } +} + +impl NpuImpl for AmdNpu { + fn device(&self) -> Option<&'static Device> { + self.device + } + + fn pci_slot(&self) -> PciSlot { + self.pci_slot + } + + fn driver(&self) -> &str { + &self.driver + } + + fn sysfs_path(&self) -> &Path { + &self.sysfs_path + } + + fn first_hwmon(&self) -> Option<&Path> { + self.first_hwmon_path.as_deref() + } + + fn name(&self) -> Result { + self.drm_name() + } + + fn usage(&self) -> Result { + self.drm_usage().map(|usage| usage as f64 / 100.0) + } + + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) + } + + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) + } + + fn temperature(&self) -> Result { + self.hwmon_temperature() + } + + fn power_usage(&self) -> Result { + self.hwmon_power_usage() + } + + fn core_frequency(&self) -> Result { + self.query_clock_metadata() + .map(|(h_clock_hz, _)| h_clock_hz as f64) + } + + fn memory_frequency(&self) -> Result { + self.query_clock_metadata() + .map(|(_, mp_npu_clock_hz)| mp_npu_clock_hz as f64) + } + + fn power_cap(&self) -> Result { + self.hwmon_power_cap() + } + + fn power_cap_max(&self) -> Result { + self.hwmon_power_cap_max() + } +} diff --git a/src/utils/npu/intel.rs b/src/utils/npu/intel.rs index d334088..c923bd6 100644 --- a/src/utils/npu/intel.rs +++ b/src/utils/npu/intel.rs @@ -83,11 +83,11 @@ impl NpuImpl for IntelNpu { Ok(delta_busy_time / delta_timestamp) } - fn used_vram(&self) -> Result { + fn used_memory(&self) -> Result { self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { + fn total_memory(&self) -> Result { self.drm_total_memory().map(|usage| usage as usize) } diff --git a/src/utils/npu/mod.rs b/src/utils/npu/mod.rs index 547ace3..57f9bce 100644 --- a/src/utils/npu/mod.rs +++ b/src/utils/npu/mod.rs @@ -1,6 +1,8 @@ +mod amd; mod intel; mod other; +use amd::AmdNpu; use anyhow::{Context, Result, bail}; use log::{debug, info, trace}; use process_data::pci_slot::PciSlot; @@ -14,16 +16,17 @@ use glob::glob; use crate::{ i18n::i18n, - utils::{pci::Device, read_parsed, read_uevent}, + utils::{ + pci::{Device, Vendor}, + read_parsed, read_uevent, + }, }; use self::{intel::IntelNpu, other::OtherNpu}; -use super::{ - link::{Link, LinkData}, - pci::Vendor, -}; +use super::link::{Link, LinkData}; +pub const VID_AMD: u16 = 0x1002; pub const VID_INTEL: u16 = 0x8086; #[derive(Debug)] @@ -55,8 +58,8 @@ impl NpuData { let usage_fraction = npu.usage().ok(); - let total_memory = npu.total_vram().ok(); - let used_memory = npu.used_vram().ok(); + let total_memory = npu.total_memory().ok(); + let used_memory = npu.used_memory().ok(); let clock_speed = npu.core_frequency().ok(); let vram_speed = npu.memory_frequency().ok(); @@ -91,6 +94,7 @@ impl NpuData { #[derive(Debug, Clone)] pub enum Npu { + Amd(AmdNpu), Intel(IntelNpu), Other(OtherNpu), } @@ -110,8 +114,8 @@ pub trait NpuImpl { fn name(&self) -> Result; fn usage(&self) -> Result; - fn used_vram(&self) -> Result; - fn total_vram(&self) -> Result; + fn used_memory(&self) -> Result; + fn total_memory(&self) -> Result; fn temperature(&self) -> Result; fn power_usage(&self) -> Result; fn core_frequency(&self) -> Result; @@ -181,6 +185,7 @@ impl std::ops::Deref for Npu { fn deref(&self) -> &Self::Target { match self { + Npu::Amd(npu) => npu, Npu::Intel(npu) => npu, Npu::Other(npu) => npu, } @@ -265,6 +270,17 @@ impl Npu { )), "Intel", ) + } else if vid == VID_AMD || driver == "amdxdna" { + ( + Npu::Amd(AmdNpu::new( + device, + pci_slot, + driver, + path.to_path_buf(), + hwmon_vec.first().cloned(), + )), + "AMD", + ) } else { ( Npu::Other(OtherNpu::new( diff --git a/src/utils/npu/other.rs b/src/utils/npu/other.rs index f78fdd7..c8f10a2 100644 --- a/src/utils/npu/other.rs +++ b/src/utils/npu/other.rs @@ -64,11 +64,11 @@ impl NpuImpl for OtherNpu { self.drm_usage().map(|usage| usage as f64 / 100.0) } - fn used_vram(&self) -> Result { + fn used_memory(&self) -> Result { self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { + fn total_memory(&self) -> Result { self.drm_total_memory().map(|usage| usage as usize) } diff --git a/src/utils/process.rs b/src/utils/process.rs index c7fd807..61de356 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -4,6 +4,8 @@ use log::{debug, error, info, trace}; use process_data::{ Niceness, ProcessData, gpu_usage::{GpuIdentifier, GpuUsageStats}, + npu_usage::NpuUsageStats, + pci_slot::PciSlot, }; use std::{ collections::BTreeMap, @@ -42,7 +44,7 @@ static COMPANION_PROCESS: LazyLock> = LazyLock: .args(["--host", proxy_path.as_str()]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() } else { @@ -50,7 +52,7 @@ static COMPANION_PROCESS: LazyLock> = LazyLock: Command::new(proxy_path) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() }; @@ -73,6 +75,7 @@ pub struct Process { pub read_bytes_last: Option, pub write_bytes_last: Option, pub gpu_usage_stats_last: BTreeMap, + pub npu_usage_stats_last: BTreeMap, pub display_name: String, } @@ -175,6 +178,7 @@ impl Process { read_bytes_last, write_bytes_last, gpu_usage_stats_last: Default::default(), + npu_usage_stats_last: Default::default(), display_name, } } @@ -451,6 +455,30 @@ impl Process { .sum() } + #[must_use] + pub fn npu_usage(&self) -> f32 { + let mut returned_npu_usage = 0.0; + for (npu, usage) in &self.data.npu_usage_stats { + if let Some(old_usage) = self.npu_usage_stats_last.get(npu) { + let time_delta = self.data.timestamp.saturating_sub(self.timestamp_last); + returned_npu_usage += usage + .usage_fraction(old_usage, time_delta) + .unwrap_or_default(); + } + } + + returned_npu_usage + } + + #[must_use] + pub fn npu_mem_usage(&self) -> u64 { + self.data + .npu_usage_stats + .values() + .map(|stats| stats.mem().unwrap_or_default()) + .sum() + } + #[must_use] pub fn starttime(&self) -> f64 { self.data.starttime as f64 / *TICK_RATE as f64