diff --git a/Cargo.lock b/Cargo.lock
index e7bbec9..bc90c9e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -984,9 +984,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.28"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "malloc_buf"
@@ -1331,6 +1331,7 @@ dependencies = [
  "glob",
  "lazy-regex",
  "libc",
+ "log",
  "num_cpus",
  "nutype",
  "nvml-wrapper",
diff --git a/lib/process_data/Cargo.lock b/lib/process_data/Cargo.lock
index 1d98f2c..640f2a4 100644
--- a/lib/process_data/Cargo.lock
+++ b/lib/process_data/Cargo.lock
@@ -212,9 +212,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.28"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "memchr"
@@ -312,6 +312,7 @@ dependencies = [
  "glob",
  "lazy-regex",
  "libc",
+ "log",
  "num_cpus",
  "nutype",
  "nvml-wrapper",
diff --git a/lib/process_data/Cargo.toml b/lib/process_data/Cargo.toml
index 6f06279..18af93d 100644
--- a/lib/process_data/Cargo.toml
+++ b/lib/process_data/Cargo.toml
@@ -21,6 +21,7 @@ anyhow = "1.0.100"
 glob = "0.3.3"
 lazy-regex = "3.4.2"
 libc = "0.2.177"
+log = "0.4.29"
 num_cpus = "1.17.0"
 nutype = { version = "0.6.2", features = ["serde"] }
 nvml-wrapper = "0.11.0"
diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs
index 9e2515e..bbe7504 100644
--- a/lib/process_data/src/lib.rs
+++ b/lib/process_data/src/lib.rs
@@ -1,9 +1,11 @@
 pub mod gpu_usage;
+pub mod npu_usage;
 pub mod pci_slot;
 
 use anyhow::{Context, Result, bail};
 use glob::glob;
 use lazy_regex::{Lazy, Regex, lazy_regex};
+use log::{debug, trace, warn};
 use nutype::nutype;
 use nvml_wrapper::enums::device::UsedGpuMemory;
 use nvml_wrapper::error::NvmlError;
@@ -21,6 +23,7 @@ use std::sync::{LazyLock, RwLock};
 use std::time::SystemTime;
 
 use crate::gpu_usage::{GpuIdentifier, GpuUsageStats, IntegerPercentage};
+use crate::npu_usage::NpuUsageStats;
 
 const STAT_OFFSET: usize = 2; // we split the stat contents where the executable name ends, which is the second element
 const STAT_PARENT_PID: usize = 3 - STAT_OFFSET;
@@ -33,10 +36,19 @@ const DRM_DRIVER: &str = "drm-driver";
 
 const DRM_PDEV: &str = "drm-pdev";
 
+const DRM_MAJOR: u32 = 226;
+const ACCEL_MAJOR: u32 = 261;
+
 static USERS_CACHE: LazyLock<HashMap<libc::uid_t, String>> = LazyLock::new(|| unsafe {
-    uzers::all_users()
-        .map(|user| (user.uid(), user.name().to_string_lossy().to_string()))
-        .collect()
+    debug!("Initializing users cache…");
+    let users: HashMap<libc::uid_t, String> = uzers::all_users()
+        .map(|user| {
+            trace!("Found user {}", user.name().to_string_lossy());
+            (user.uid(), user.name().to_string_lossy().to_string())
+        })
+        .collect();
+    debug!("Found {} users", users.len());
+    users
 });
 
 static PAGESIZE: LazyLock<usize> = LazyLock::new(sysconf::pagesize);
@@ -95,25 +107,38 @@ static ENC_TOTAL_CYCLES_DRM_FIELDS: Lazy<HashMap<&str, Vec<&str>>> =
 static DEC_NS_DRM_FIELDS: Lazy<HashMap<&str, Vec<&str>>> =
     Lazy::new(|| HashMap::from_iter([("amdgpu", vec!["drm-engine-dec"])]));
 
+static NPU_NS_FIELDS: Lazy<HashMap<&str, Vec<&str>>> =
+    Lazy::new(|| HashMap::from_iter([("amdxdna_accel_driver", vec!["drm-engine-npu-amdxdna"])]));
+
 static MEM_DRM_FIELDS: Lazy<HashMap<&str, Vec<&str>>> = Lazy::new(|| {
     HashMap::from_iter([
         ("amdgpu", vec!["drm-memory-gtt", "drm-memory-vram"]),
+        ("amdxdna_accel_driver", vec!["drm-total-memory"]),
         ("i915", vec!["drm-total-local0", "drm-total-system0"]),
         ("v3d", vec!["drm-total-memory"]),
         ("xe", vec!["drm-total-gtt", "drm-total-vram0"]),
     ])
 });
 
-static NVML: Lazy<Result<Nvml, NvmlError>> = Lazy::new(Nvml::init);
+static NVML: Lazy<Result<Nvml, NvmlError>> = Lazy::new(|| {
+    debug!("Initializing connection to NVML…");
+    Nvml::init().inspect_err(|err| warn!("Unable to connect to NVML: {err}"))
+});
 
 static NVML_DEVICES: Lazy<Vec<(PciSlot, Device)>> = Lazy::new(|| {
     if let Ok(nvml) = NVML.as_ref() {
+        debug!("Looking for NVIDIA devices…");
         let device_count = nvml.device_count().unwrap_or(0);
         let mut return_vec = Vec::with_capacity(device_count as usize);
         for i in 0..device_count {
             if let Ok(gpu) = nvml.device_by_index(i) {
                 if let Ok(pci_slot) = gpu.pci_info().map(|pci_info| pci_info.bus_id) {
                     let pci_slot = PciSlot::from_str(&pci_slot).unwrap();
+                    debug!(
+                        "Found {} at {}",
+                        gpu.name().unwrap_or("N/A".into()),
+                        pci_slot
+                    );
                     return_vec.push((pci_slot, gpu));
                 }
             }
@@ -164,7 +189,7 @@ pub enum Containerization {
     Snap,
 }
 
-/// Data that could be transferred us>ing `resources-processes`, separated from
+/// Data that could be transferred using `resources-processes`, separated from
 /// `Process` mainly due to `Icon` not being able to derive `Serialize` and
 /// `Deserialize`.
 #[derive(Debug, Default, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
@@ -187,6 +212,7 @@ pub struct ProcessData {
     pub write_bytes: Option<u64>,
     pub timestamp: u64,
     pub gpu_usage_stats: BTreeMap<GpuIdentifier, GpuUsageStats>,
+    pub npu_usage_stats: BTreeMap<PciSlot, NpuUsageStats>,
 }
 
 impl ProcessData {
@@ -266,13 +292,6 @@ impl ProcessData {
 
     pub fn try_from_path<P: AsRef<Path>>(proc_path: P) -> Result<Self> {
         let proc_path = proc_path.as_ref();
-        let stat = std::fs::read_to_string(proc_path.join("stat"))?;
-        let statm = std::fs::read_to_string(proc_path.join("statm"))?;
-        let status = std::fs::read_to_string(proc_path.join("status"))?;
-        let comm = std::fs::read_to_string(proc_path.join("comm"))?;
-        let commandline = std::fs::read_to_string(proc_path.join("cmdline"))?;
-        let io = std::fs::read_to_string(proc_path.join("io")).ok();
-
         let pid = proc_path
             .file_name()
             .context("proc_path terminates in ..")?
@@ -280,6 +299,23 @@ impl ProcessData {
             .context("can't turn OsStr to str")?
             .parse()?;
 
+        trace!("Inspecting process {pid}…");
+
+        trace!("Reading info files…");
+        let stat = std::fs::read_to_string(proc_path.join("stat"))
+            .inspect_err(|err| trace!("Error reading 'stat': {err}"))?;
+        let statm = std::fs::read_to_string(proc_path.join("statm"))
+            .inspect_err(|err| trace!("Error reading 'statm': {err}"))?;
+        let status = std::fs::read_to_string(proc_path.join("status"))
+            .inspect_err(|err| trace!("Error reading 'status': {err}"))?;
+        let comm = std::fs::read_to_string(proc_path.join("comm"))
+            .inspect_err(|err| trace!("Error reading 'comm': {err}"))?;
+        let commandline = std::fs::read_to_string(proc_path.join("cmdline"))
+            .inspect_err(|err| trace!("Error reading 'cmdline': {err}"))?;
+        let io = std::fs::read_to_string(proc_path.join("io"))
+            .inspect_err(|err| trace!("Error reading 'io': {err}"))
+            .ok();
+
         let user = USERS_CACHE
             .get(&Self::get_uid(proc_path)?)
             .cloned()
@@ -288,7 +324,8 @@ impl ProcessData {
         let stat = stat
             .split(')') // since we don't care about the pid or the executable name, split after the executable name to make our life easier
             .last()
-            .context("stat doesn't have ')'")?
+            .context("stat doesn't have ')'")
+            .inspect_err(|err| trace!("Can't parse 'stat': {err}"))?
             .split(' ')
             .skip(1) // the first element would be a space, let's ignore that
             .collect::<Vec<_>>();
@@ -300,23 +337,28 @@ impl ProcessData {
         let parent_pid = stat
             .get(STAT_PARENT_PID)
             .context("wrong stat file format")
-            .and_then(|x| x.parse().context("couldn't parse stat file content"))?;
+            .and_then(|x| x.parse().context("couldn't parse stat file content to int"))
+            .inspect_err(|err| trace!("Can't parse parent pid from 'stat': {err}"))?;
         let user_cpu_time = stat
             .get(STAT_USER_CPU_TIME)
             .context("wrong stat file format")
-            .and_then(|x| x.parse().context("couldn't parse stat file content"))?;
+            .and_then(|x| x.parse().context("couldn't parse stat file content to int"))
+            .inspect_err(|err| trace!("Can't parse user cpu time from 'stat': {err}"))?;
         let system_cpu_time = stat
             .get(STAT_SYSTEM_CPU_TIME)
             .context("wrong stat file format")
-            .and_then(|x| x.parse().context("couldn't parse stat file content"))?;
+            .and_then(|x| x.parse().context("couldn't parse stat file content to int"))
+            .inspect_err(|err| trace!("Can't parse system cpu time from 'stat': {err}"))?;
         let nice = stat
             .get(STAT_NICE)
             .context("wrong stat file format")
-            .and_then(|x| x.parse().context("couldn't parse stat file content"))?;
+            .and_then(|x| x.parse().context("couldn't parse stat file content to int"))
+            .inspect_err(|err| trace!("Can't parse nice from 'stat': {err}"))?;
         let starttime = stat
             .get(STAT_STARTTIME)
             .context("wrong stat file format")
-            .and_then(|x| x.parse().context("couldn't parse stat file content"))?;
+            .and_then(|x| x.parse().context("couldn't parse stat file content to int"))
+            .inspect_err(|err| trace!("Can't parse start time from 'stat': {err}"))?;
 
         let mut affinity = Vec::with_capacity(*NUM_CPUS);
         RE_AFFINITY
@@ -352,7 +394,8 @@ impl ProcessData {
             .and_then(|x| {
                 x.parse::<usize>()
                     .context("couldn't parse statm file content")
-            })?
+            })
+            .inspect_err(|err| trace!("Can't parse memory usage from 'statm': {err}"))?
             .saturating_sub(
                 statm
                     .get(2)
@@ -365,6 +408,7 @@ impl ProcessData {
             .saturating_mul(*PAGESIZE);
 
         let cgroup = std::fs::read_to_string(proc_path.join("cgroup"))
+            .inspect_err(|err| trace!("Can't read cgroup: {err}"))
             .ok()
             .and_then(Self::sanitize_cgroup);
 
@@ -396,6 +440,8 @@ impl ProcessData {
         let mut gpu_usage_stats = Self::other_gpu_usage_stats(&fdinfos).unwrap_or_default();
         gpu_usage_stats.extend(nvidia_stats);
 
+        let npu_usage_stats = Self::npu_usage_stats(&fdinfos).unwrap_or_default();
+
         let timestamp = unix_as_millis();
 
         Ok(Self {
@@ -417,6 +463,7 @@ impl ProcessData {
             write_bytes,
             timestamp,
             gpu_usage_stats,
+            npu_usage_stats,
         })
     }
 
@@ -464,7 +511,9 @@ impl ProcessData {
             if let Some(fd_path) = fd_path {
                 if let Ok(fd_metadata) = std::fs::metadata(fd_path) {
                     let major = libc::major(fd_metadata.st_rdev());
-                    if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 {
+                    if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR
+                        || (major != DRM_MAJOR && major != ACCEL_MAJOR)
+                    {
                         continue;
                     }
                 }
@@ -503,6 +552,59 @@ impl ProcessData {
         )
     }
 
+    fn npu_usage_stats(
+        fdinfos: &[HashMap<String, String>],
+    ) -> Result<BTreeMap<PciSlot, NpuUsageStats>> {
+        let mut return_map = BTreeMap::new();
+
+        for fdinfo in fdinfos {
+            if let Ok((identifier, stats)) = Self::extract_npu_usage_from_fdinfo(fdinfo) {
+                return_map
+                    .entry(identifier)
+                    .and_modify(|existing_value: &mut NpuUsageStats| {
+                        *existing_value = existing_value.greater(&stats)
+                    })
+                    .or_insert(stats);
+            }
+        }
+
+        Ok(return_map)
+    }
+
+    fn extract_npu_usage_from_fdinfo(
+        fdinfo: &HashMap<String, String>,
+    ) -> Result<(PciSlot, NpuUsageStats)> {
+        let driver = fdinfo.get(DRM_DRIVER);
+
+        if let Some(driver) = driver {
+            let gpu_identifier = fdinfo
+                .get(DRM_PDEV)
+                .and_then(|field| PciSlot::from_str(field).ok())
+                .unwrap_or_default();
+
+            let stats = match driver.as_str() {
+                "amdxdna_accel_driver" => NpuUsageStats::AmdxdnaStats {
+                    usage_ns: NPU_NS_FIELDS
+                        .get(driver.as_str())
+                        .map(|names| Self::parse_drm_fields(fdinfo, names, &RE_DRM_TIME))
+                        .unwrap_or_default(),
+                    mem_bytes: MEM_DRM_FIELDS
+                        .get(driver.as_str())
+                        .map(|names| {
+                            Self::parse_drm_fields::<u64, _>(fdinfo, names, &RE_DRM_KIB)
+                                .saturating_mul(1024)
+                        })
+                        .unwrap_or_default(),
+                },
+                _ => bail!("unable to read stats from driver"),
+            };
+
+            return Ok((gpu_identifier, stats));
+        }
+
+        bail!("unable to find gpu information in this fdinfo");
+    }
+
     fn other_gpu_usage_stats(
         fdinfos: &[HashMap<String, String>],
     ) -> Result<BTreeMap<GpuIdentifier, GpuUsageStats>> {
@@ -641,6 +743,8 @@ impl ProcessData {
     }
 
     fn nvidia_gpu_stats_all(pid: i32) -> BTreeMap<GpuIdentifier, GpuUsageStats> {
+        trace!("Gathering NVIDIA GPU stats…");
+
         let mut return_map = BTreeMap::new();
 
         for (pci_slot, _) in NVML_DEVICES.iter() {
@@ -653,6 +757,7 @@ impl ProcessData {
     }
 
     fn nvidia_gpu_stats(pid: i32, pci_slot: PciSlot) -> Result<GpuUsageStats> {
+        trace!("Gathering GPU stats for NVIDIA GPU at {pci_slot}…");
         let this_process_stats = NVIDIA_PROCESSES_STATS
             .read()
             .unwrap()
@@ -692,6 +797,7 @@ impl ProcessData {
     }
 
     fn nvidia_process_infos() -> HashMap<PciSlot, Vec<ProcessInfo>> {
+        trace!("Refreshing NVIDIA process infos…");
         let mut return_map = HashMap::new();
 
         for (pci_slot, gpu) in NVML_DEVICES.iter() {
@@ -705,6 +811,7 @@ impl ProcessData {
     }
 
     fn nvidia_process_stats() -> HashMap<PciSlot, Vec<ProcessUtilizationSample>> {
+        trace!("Refreshing NVIDIA process stats…");
         let mut return_map = HashMap::new();
 
         for (pci_slot, gpu) in NVML_DEVICES.iter() {
diff --git a/lib/process_data/src/npu_usage.rs b/lib/process_data/src/npu_usage.rs
new file mode 100644
index 0000000..c11c5b3
--- /dev/null
+++ b/lib/process_data/src/npu_usage.rs
@@ -0,0 +1,50 @@
+use serde::{Deserialize, Serialize};
+
+/// Represents NPU usage statistics per-process.
+#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, Copy)]
+pub enum NpuUsageStats {
+    AmdxdnaStats { usage_ns: u64, mem_bytes: u64 },
+}
+
+impl NpuUsageStats {
+    fn delta_ns(a: u64, b: u64, time_delta: u64) -> Option<f32> {
+        if time_delta == 0 {
+            None
+        } else {
+            Some(a.saturating_sub(b) as f32 / (time_delta * 1_000_000) as f32)
+        }
+    }
+
+    pub fn usage_fraction(&self, old: &Self, time_delta: u64) -> Option<f32> {
+        match (self, old) {
+            (
+                Self::AmdxdnaStats { usage_ns: a_ns, .. },
+                Self::AmdxdnaStats { usage_ns: b_ns, .. },
+            ) => Self::delta_ns(*a_ns, *b_ns, time_delta),
+        }
+    }
+
+    pub fn mem(&self) -> Option<u64> {
+        match self {
+            Self::AmdxdnaStats { mem_bytes, .. } => Some(*mem_bytes),
+        }
+    }
+
+    pub fn greater(&self, other: &Self) -> Self {
+        match (self, other) {
+            (
+                Self::AmdxdnaStats {
+                    usage_ns: a_ns,
+                    mem_bytes: a_mem_bytes,
+                },
+                Self::AmdxdnaStats {
+                    usage_ns: b_ns,
+                    mem_bytes: b_mem_bytes,
+                },
+            ) => Self::AmdxdnaStats {
+                usage_ns: *a_ns.max(b_ns),
+                mem_bytes: *a_mem_bytes.max(b_mem_bytes),
+            },
+        }
+    }
+}
diff --git a/src/bin/resources-processes.rs b/src/bin/resources-processes.rs
index 6456971..154d01a 100644
--- a/src/bin/resources-processes.rs
+++ b/src/bin/resources-processes.rs
@@ -1,4 +1,5 @@
 use anyhow::Result;
+use log::{info, trace};
 use process_data::ProcessData;
 use ron::ser::PrettyConfig;
 use std::io::{Read, Write};
@@ -18,6 +19,11 @@ struct Args {
 }
 
 fn main() -> Result<()> {
+    // Initialize logger
+    pretty_env_logger::init();
+
+    info!("Starting resources-processes…");
+
     let args = Args::parse();
 
     if args.once {
@@ -29,12 +35,14 @@ fn main() -> Result<()> {
         let mut buffer = [0; 1];
 
         std::io::stdin().read_exact(&mut buffer)?;
+        trace!("Received character");
 
         output(args.ron)?;
     }
 }
 
 fn output(ron: bool) -> Result<()> {
+    trace!("Gathering process data…");
     let data = ProcessData::all_process_data()?;
 
     let encoded = if ron {
@@ -50,10 +58,13 @@ fn output(ron: bool) -> Result<()> {
     let stdout = std::io::stdout();
     let mut handle = stdout.lock();
 
+    trace!("Sending content length ({})…", encoded.len());
     handle.write_all(&len_byte_array)?;
 
+    trace!("Sending content…");
     handle.write_all(&encoded)?;
 
+    trace!("Flushing…");
     handle.flush()?;
     Ok(())
 }
diff --git a/src/ui/window.rs b/src/ui/window.rs
index 0cc71d8..6f6049d 100644
--- a/src/ui/window.rs
+++ b/src/ui/window.rs
@@ -591,6 +591,18 @@ impl MainWindow {
                 .unwrap_or_default()
         };
 
+        for npu_data_entry in &mut npu_data {
+            if npu_data_entry.used_memory.is_none() {
+                npu_data_entry.used_memory = Some(
+                    process_data
+                        .iter()
+                        .filter_map(|p| p.npu_usage_stats.get(&npu_data_entry.pci_slot))
+                        .filter_map(|stats| stats.mem())
+                        .sum::<u64>() as usize,
+                );
+            }
+        }
+
         let refresh_data = RefreshData {
             cpu_data,
             mem_data,
@@ -703,17 +715,32 @@ impl MainWindow {
             page.refresh_page(&gpu_data);
         }
 
-        std::mem::drop(apps_context);
-
         /*
          * Npu
          */
         let npu_pages = imp.npu_pages.borrow();
-        for ((_, page), npu_data) in npu_pages.values().zip(npu_data) {
+        for ((_, page), mut npu_data) in npu_pages.values().zip(npu_data) {
             let page = page.content().and_downcast::<ResNPU>().unwrap();
+
+            let processes_npu_fraction = apps_context.npu_fraction(npu_data.pci_slot);
+            npu_data.usage_fraction = Some(f64::max(
+                npu_data.usage_fraction.unwrap_or(0.0),
+                processes_npu_fraction.into(),
+            ));
+
+            if npu_data.total_memory.is_some() {
+                let processes_npu_memory_fraction = apps_context.npu_mem(npu_data.pci_slot);
+                npu_data.used_memory = Some(usize::max(
+                    npu_data.used_memory.unwrap_or(0),
+                    processes_npu_memory_fraction as usize,
+                ));
+            }
+
             page.refresh_page(&npu_data);
         }
 
+        std::mem::drop(apps_context);
+
         /*
          * Cpu
          */
diff --git a/src/utils/app.rs b/src/utils/app.rs
index 5e15b01..fdc9b8b 100644
--- a/src/utils/app.rs
+++ b/src/utils/app.rs
@@ -15,6 +15,7 @@ use log::{debug, info, trace};
 use process_data::{
     Containerization, ProcessData,
     gpu_usage::{GpuIdentifier, GpuUsageStats},
+    pci_slot::PciSlot,
 };
 
 use crate::{i18n::i18n, utils::read_parsed};
@@ -614,6 +615,48 @@ impl AppsContext {
             .clamp(0.0, 1.0)
     }
 
+    pub fn npu_fraction(&self, pci_slot: PciSlot) -> f32 {
+        self.processes_iter()
+            .map(|process| {
+                (
+                    &process.data.npu_usage_stats,
+                    &process.npu_usage_stats_last,
+                    process.data.timestamp,
+                    process.timestamp_last,
+                )
+            })
+            .map(|(new, old, timestamp, timestamp_last)| {
+                (
+                    new.get(&pci_slot),
+                    old.get(&pci_slot),
+                    timestamp,
+                    timestamp_last,
+                )
+            })
+            .filter_map(|(new, old, timestamp, timestamp_last)| match (new, old) {
+                (Some(new), Some(old)) => Some((new, old, timestamp, timestamp_last)),
+                _ => None,
+            })
+            .map(|(new, old, timestamp, timestamp_last)| {
+                let time_delta = timestamp.saturating_sub(timestamp_last);
+                new.usage_fraction(old, time_delta).unwrap_or_default()
+            })
+            .sum::<f32>()
+            .clamp(0.0, 1.0)
+    }
+
+    pub fn npu_mem(&self, pci_slot: PciSlot) -> u64 {
+        self.processes_iter()
+            .flat_map(|process| {
+                process
+                    .data
+                    .npu_usage_stats
+                    .get(&pci_slot)
+                    .and_then(|npu_usage_stats| npu_usage_stats.mem())
+            })
+            .sum()
+    }
+
     pub fn vram_usage(&self, gpu_identifier: GpuIdentifier) -> u64 {
         self.processes_iter()
             .flat_map(|process| {
@@ -621,7 +664,7 @@ impl AppsContext {
                     .data
                     .gpu_usage_stats
                     .get(&gpu_identifier)
-                    .and_then(|gpu_identifier| gpu_identifier.mem())
+                    .and_then(|gpu_usage_stats| gpu_usage_stats.mem())
             })
             .sum()
     }
@@ -791,6 +834,7 @@ impl AppsContext {
                 old_process.read_bytes_last = old_process.data.read_bytes;
                 old_process.write_bytes_last = old_process.data.write_bytes;
                 old_process.gpu_usage_stats_last = old_process.data.gpu_usage_stats.clone();
+                old_process.npu_usage_stats_last = old_process.data.npu_usage_stats.clone();
 
                 old_process.data = process_data.clone();
             } else {
diff --git a/src/utils/gpu/nvidia.rs b/src/utils/gpu/nvidia.rs
index 38f3f42..668e2eb 100644
--- a/src/utils/gpu/nvidia.rs
+++ b/src/utils/gpu/nvidia.rs
@@ -13,22 +13,16 @@ use std::{
 };
 
 static NVML: LazyLock<Result<Nvml, NvmlError>> = LazyLock::new(|| {
-    let nvml = Nvml::init();
-
-    if let Err(error) = nvml.as_ref() {
-        warn!("Connection to NVML failed, reason: {error}");
-        if *IS_FLATPAK {
-            warn!(
-                "This can occur when the version of the NVIDIA Flatpak runtime (org.freedesktop.Platform.GL.nvidia) \
-            and the version of the natively installed NVIDIA driver do not match. Consider updating both your system \
-            and Flatpak packages before opening an issue."
-            );
-        }
-    } else {
-        debug!("Successfully connected to NVML");
-    }
-
-    nvml
+    Nvml::init()
+        .inspect_err(|err| {
+            warn!("Unable to connect to NVML: {err}"); 
+            if *IS_FLATPAK {
+                warn!("This can occur when the version of the NVIDIA Flatpak runtime \
+                (org.freedesktop.Platform.GL.nvidia) and the version of the natively installed NVIDIA driver do not \
+                match. Consider updating both your system and Flatpak packages before opening an issue.");
+            }
+        })
+        .inspect(|_| debug!("Successfully connected to NVML"))
 });
 
 use crate::utils::{IS_FLATPAK, pci::Device};
diff --git a/src/utils/npu/amd.rs b/src/utils/npu/amd.rs
new file mode 100644
index 0000000..1522e3c
--- /dev/null
+++ b/src/utils/npu/amd.rs
@@ -0,0 +1,173 @@
+use anyhow::{Context, Result};
+use process_data::pci_slot::PciSlot;
+
+use std::fs::File;
+use std::os::unix::io::AsRawFd;
+use std::path::{Path, PathBuf};
+
+use crate::utils::pci::Device;
+
+use super::NpuImpl;
+
+const DRM_IOCTL_BASE: u8 = b'd';
+const DRM_COMMAND_BASE: u8 = 0x40;
+const DRM_AMDXDNA_GET_INFO: u8 = 7;
+const DRM_AMDXDNA_QUERY_CLOCK_METADATA: u32 = 3;
+
+const IOC_WRITE: u32 = 1;
+const IOC_READ: u32 = 2;
+
+const fn ioc(dir: u32, ty: u8, nr: u8, size: usize) -> libc::c_ulong {
+    ((dir << 30) | ((ty as u32) << 8) | (nr as u32) | ((size as u32) << 16)) as libc::c_ulong
+}
+
+const fn iowr<T>(ty: u8, nr: u8) -> libc::c_ulong {
+    ioc(IOC_READ | IOC_WRITE, ty, nr, std::mem::size_of::<T>())
+}
+
+#[repr(C)]
+struct AmdxdnaDrmGetInfo {
+    param: u32,
+    buffer_size: u32,
+    buffer: u64,
+}
+
+#[repr(C)]
+#[derive(Default)]
+struct AmdxdnaDrmQueryClock {
+    name: [u8; 16],
+    freq_mhz: u32,
+    _pad: u32,
+}
+
+#[repr(C)]
+#[derive(Default)]
+struct AmdxdnaDrmQueryClockMetadata {
+    mp_npu_clock: AmdxdnaDrmQueryClock,
+    h_clock: AmdxdnaDrmQueryClock,
+}
+
+#[derive(Debug, Clone, Default)]
+
+pub struct AmdNpu {
+    pub device: Option<&'static Device>,
+    pub pci_slot: PciSlot,
+    pub driver: String,
+    sysfs_path: PathBuf,
+    first_hwmon_path: Option<PathBuf>,
+}
+
+impl AmdNpu {
+    pub fn new(
+        device: Option<&'static Device>,
+        pci_slot: PciSlot,
+        driver: String,
+        sysfs_path: PathBuf,
+        first_hwmon_path: Option<PathBuf>,
+    ) -> Self {
+        Self {
+            device,
+            pci_slot,
+            driver,
+            sysfs_path,
+            first_hwmon_path,
+        }
+    }
+
+    fn query_clock_metadata(&self) -> Result<(u64, u64)> {
+        let accel_name = self
+            .sysfs_path
+            .file_name()
+            .context("invalid sysfs path")?
+            .to_str()
+            .context("invalid accel name")?;
+        let dev_path = format!("/dev/accel/{accel_name}");
+
+        let file = File::open(&dev_path).context("failed to open accel device")?;
+        let fd = file.as_raw_fd();
+
+        let mut clock_metadata = AmdxdnaDrmQueryClockMetadata::default();
+        let mut get_info = AmdxdnaDrmGetInfo {
+            param: DRM_AMDXDNA_QUERY_CLOCK_METADATA,
+            buffer_size: std::mem::size_of::<AmdxdnaDrmQueryClockMetadata>() as u32,
+            buffer: &mut clock_metadata as *mut _ as u64,
+        };
+
+        let ioctl_cmd =
+            iowr::<AmdxdnaDrmGetInfo>(DRM_IOCTL_BASE, DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO);
+
+        let ret = unsafe { libc::ioctl(fd, ioctl_cmd, &mut get_info) };
+        if ret < 0 {
+            anyhow::bail!("ioctl failed: {}", std::io::Error::last_os_error());
+        }
+
+        let h_clock_hz = clock_metadata.h_clock.freq_mhz as u64 * 1_000_000;
+        let mp_npu_clock_hz = clock_metadata.mp_npu_clock.freq_mhz as u64 * 1_000_000;
+
+        Ok((h_clock_hz, mp_npu_clock_hz))
+    }
+}
+
+impl NpuImpl for AmdNpu {
+    fn device(&self) -> Option<&'static Device> {
+        self.device
+    }
+
+    fn pci_slot(&self) -> PciSlot {
+        self.pci_slot
+    }
+
+    fn driver(&self) -> &str {
+        &self.driver
+    }
+
+    fn sysfs_path(&self) -> &Path {
+        &self.sysfs_path
+    }
+
+    fn first_hwmon(&self) -> Option<&Path> {
+        self.first_hwmon_path.as_deref()
+    }
+
+    fn name(&self) -> Result<String> {
+        self.drm_name()
+    }
+
+    fn usage(&self) -> Result<f64> {
+        self.drm_usage().map(|usage| usage as f64 / 100.0)
+    }
+
+    fn used_memory(&self) -> Result<usize> {
+        self.drm_used_memory().map(|usage| usage as usize)
+    }
+
+    fn total_memory(&self) -> Result<usize> {
+        self.drm_total_memory().map(|usage| usage as usize)
+    }
+
+    fn temperature(&self) -> Result<f64> {
+        self.hwmon_temperature()
+    }
+
+    fn power_usage(&self) -> Result<f64> {
+        self.hwmon_power_usage()
+    }
+
+    fn core_frequency(&self) -> Result<f64> {
+        self.query_clock_metadata()
+            .map(|(h_clock_hz, _)| h_clock_hz as f64)
+    }
+
+    fn memory_frequency(&self) -> Result<f64> {
+        self.query_clock_metadata()
+            .map(|(_, mp_npu_clock_hz)| mp_npu_clock_hz as f64)
+    }
+
+    fn power_cap(&self) -> Result<f64> {
+        self.hwmon_power_cap()
+    }
+
+    fn power_cap_max(&self) -> Result<f64> {
+        self.hwmon_power_cap_max()
+    }
+}
diff --git a/src/utils/npu/intel.rs b/src/utils/npu/intel.rs
index d334088..c923bd6 100644
--- a/src/utils/npu/intel.rs
+++ b/src/utils/npu/intel.rs
@@ -83,11 +83,11 @@ impl NpuImpl for IntelNpu {
         Ok(delta_busy_time / delta_timestamp)
     }
 
-    fn used_vram(&self) -> Result<usize> {
+    fn used_memory(&self) -> Result<usize> {
         self.drm_used_memory().map(|usage| usage as usize)
     }
 
-    fn total_vram(&self) -> Result<usize> {
+    fn total_memory(&self) -> Result<usize> {
         self.drm_total_memory().map(|usage| usage as usize)
     }
 
diff --git a/src/utils/npu/mod.rs b/src/utils/npu/mod.rs
index 547ace3..57f9bce 100644
--- a/src/utils/npu/mod.rs
+++ b/src/utils/npu/mod.rs
@@ -1,6 +1,8 @@
+mod amd;
 mod intel;
 mod other;
 
+use amd::AmdNpu;
 use anyhow::{Context, Result, bail};
 use log::{debug, info, trace};
 use process_data::pci_slot::PciSlot;
@@ -14,16 +16,17 @@ use glob::glob;
 
 use crate::{
     i18n::i18n,
-    utils::{pci::Device, read_parsed, read_uevent},
+    utils::{
+        pci::{Device, Vendor},
+        read_parsed, read_uevent,
+    },
 };
 
 use self::{intel::IntelNpu, other::OtherNpu};
 
-use super::{
-    link::{Link, LinkData},
-    pci::Vendor,
-};
+use super::link::{Link, LinkData};
 
+pub const VID_AMD: u16 = 0x1002;
 pub const VID_INTEL: u16 = 0x8086;
 
 #[derive(Debug)]
@@ -55,8 +58,8 @@ impl NpuData {
 
         let usage_fraction = npu.usage().ok();
 
-        let total_memory = npu.total_vram().ok();
-        let used_memory = npu.used_vram().ok();
+        let total_memory = npu.total_memory().ok();
+        let used_memory = npu.used_memory().ok();
 
         let clock_speed = npu.core_frequency().ok();
         let vram_speed = npu.memory_frequency().ok();
@@ -91,6 +94,7 @@ impl NpuData {
 
 #[derive(Debug, Clone)]
 pub enum Npu {
+    Amd(AmdNpu),
     Intel(IntelNpu),
     Other(OtherNpu),
 }
@@ -110,8 +114,8 @@ pub trait NpuImpl {
 
     fn name(&self) -> Result<String>;
     fn usage(&self) -> Result<f64>;
-    fn used_vram(&self) -> Result<usize>;
-    fn total_vram(&self) -> Result<usize>;
+    fn used_memory(&self) -> Result<usize>;
+    fn total_memory(&self) -> Result<usize>;
     fn temperature(&self) -> Result<f64>;
     fn power_usage(&self) -> Result<f64>;
     fn core_frequency(&self) -> Result<f64>;
@@ -181,6 +185,7 @@ impl std::ops::Deref for Npu {
 
     fn deref(&self) -> &Self::Target {
         match self {
+            Npu::Amd(npu) => npu,
             Npu::Intel(npu) => npu,
             Npu::Other(npu) => npu,
         }
@@ -265,6 +270,17 @@ impl Npu {
                 )),
                 "Intel",
             )
+        } else if vid == VID_AMD || driver == "amdxdna" {
+            (
+                Npu::Amd(AmdNpu::new(
+                    device,
+                    pci_slot,
+                    driver,
+                    path.to_path_buf(),
+                    hwmon_vec.first().cloned(),
+                )),
+                "AMD",
+            )
         } else {
             (
                 Npu::Other(OtherNpu::new(
diff --git a/src/utils/npu/other.rs b/src/utils/npu/other.rs
index f78fdd7..c8f10a2 100644
--- a/src/utils/npu/other.rs
+++ b/src/utils/npu/other.rs
@@ -64,11 +64,11 @@ impl NpuImpl for OtherNpu {
         self.drm_usage().map(|usage| usage as f64 / 100.0)
     }
 
-    fn used_vram(&self) -> Result<usize> {
+    fn used_memory(&self) -> Result<usize> {
         self.drm_used_memory().map(|usage| usage as usize)
     }
 
-    fn total_vram(&self) -> Result<usize> {
+    fn total_memory(&self) -> Result<usize> {
         self.drm_total_memory().map(|usage| usage as usize)
     }
 
diff --git a/src/utils/process.rs b/src/utils/process.rs
index c7fd807..61de356 100644
--- a/src/utils/process.rs
+++ b/src/utils/process.rs
@@ -4,6 +4,8 @@ use log::{debug, error, info, trace};
 use process_data::{
     Niceness, ProcessData,
     gpu_usage::{GpuIdentifier, GpuUsageStats},
+    npu_usage::NpuUsageStats,
+    pci_slot::PciSlot,
 };
 use std::{
     collections::BTreeMap,
@@ -42,7 +44,7 @@ static COMPANION_PROCESS: LazyLock<Mutex<(ChildStdin, ChildStdout)>> = LazyLock:
             .args(["--host", proxy_path.as_str()])
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
-            .stderr(Stdio::null())
+            .stderr(Stdio::inherit())
             .spawn()
             .unwrap()
     } else {
@@ -50,7 +52,7 @@ static COMPANION_PROCESS: LazyLock<Mutex<(ChildStdin, ChildStdout)>> = LazyLock:
         Command::new(proxy_path)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
-            .stderr(Stdio::null())
+            .stderr(Stdio::inherit())
             .spawn()
             .unwrap()
     };
@@ -73,6 +75,7 @@ pub struct Process {
     pub read_bytes_last: Option<u64>,
     pub write_bytes_last: Option<u64>,
     pub gpu_usage_stats_last: BTreeMap<GpuIdentifier, GpuUsageStats>,
+    pub npu_usage_stats_last: BTreeMap<PciSlot, NpuUsageStats>,
     pub display_name: String,
 }
 
@@ -175,6 +178,7 @@ impl Process {
             read_bytes_last,
             write_bytes_last,
             gpu_usage_stats_last: Default::default(),
+            npu_usage_stats_last: Default::default(),
             display_name,
         }
     }
@@ -451,6 +455,30 @@ impl Process {
             .sum()
     }
 
+    #[must_use]
+    pub fn npu_usage(&self) -> f32 {
+        let mut returned_npu_usage = 0.0;
+        for (npu, usage) in &self.data.npu_usage_stats {
+            if let Some(old_usage) = self.npu_usage_stats_last.get(npu) {
+                let time_delta = self.data.timestamp.saturating_sub(self.timestamp_last);
+                returned_npu_usage += usage
+                    .usage_fraction(old_usage, time_delta)
+                    .unwrap_or_default();
+            }
+        }
+
+        returned_npu_usage
+    }
+
+    #[must_use]
+    pub fn npu_mem_usage(&self) -> u64 {
+        self.data
+            .npu_usage_stats
+            .values()
+            .map(|stats| stats.mem().unwrap_or_default())
+            .sum()
+    }
+
     #[must_use]
     pub fn starttime(&self) -> f64 {
         self.data.starttime as f64 / *TICK_RATE as f64