From 544bea88829d3f81a308108a0ee843303afe7582 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 2 Sep 2025 10:54:19 +0200 Subject: [PATCH 1/5] refactor(pci): move (de)allocation BAR methods out of PciDevice PciDevice trait was defining (optional) methods to allocate and deallocate BAR regions for devices. This logic is quite device specific and when invoked we typically know what device it refers to exactly. Currently, we only implement the allocate API for VirtIO devices (no deallocation whatsoever). Simplify things by dropping these APIs from the trait definition. Also, remove the 32bit allocator argument from allocate_bars(); we always only allocate a single 64-bit BAR for VirtIO devices. Signed-off-by: Babis Chalios --- Cargo.lock | 1 - src/pci/Cargo.toml | 1 - src/pci/src/device.rs | 22 ----- src/vmm/src/device_manager/pci_mngr.rs | 7 +- .../devices/virtio/transport/pci/device.rs | 83 +++++++++---------- 5 files changed, 43 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index de60175ed79..61455f5ac13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1043,7 +1043,6 @@ dependencies = [ "serde", "serde_test", "thiserror 2.0.16", - "vm-allocator", "vm-device", "vm-memory", "vmm-sys-util", diff --git a/src/pci/Cargo.toml b/src/pci/Cargo.toml index 27a1e9918c7..d724bf7b47c 100644 --- a/src/pci/Cargo.toml +++ b/src/pci/Cargo.toml @@ -18,7 +18,6 @@ libc = "0.2.175" log = "0.4.28" serde = { version = "1.0.219", features = ["derive"] } thiserror = "2.0.16" -vm-allocator = "0.1.3" vm-device = { path = "../vm-device" } vm-memory = { version = "0.16.1", features = [ "backend-mmap", diff --git a/src/pci/src/device.rs b/src/pci/src/device.rs index 11db4f478a5..d2105f26cc4 100644 --- a/src/pci/src/device.rs +++ b/src/pci/src/device.rs @@ -8,8 +8,6 @@ use std::sync::{Arc, Barrier}; use std::{io, result}; -use vm_allocator::AddressAllocator; - use crate::configuration::{self, PciBarRegionType}; #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -23,7 +21,6 @@ pub enum Error { /// Expected resource not found. MissingResource, } -pub type Result = std::result::Result; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct BarReprogrammingParams { @@ -34,25 +31,6 @@ pub struct BarReprogrammingParams { } pub trait PciDevice: Send { - /// Allocates the needed PCI BARs space using the `allocate` function which takes a size and - /// returns an address. Returns a Vec of (GuestAddress, GuestUsize) tuples. - fn allocate_bars( - &mut self, - _mmio32_allocator: &mut AddressAllocator, - _mmio64_allocator: &mut AddressAllocator, - ) -> Result<()> { - Ok(()) - } - - /// Frees the PCI BARs previously allocated with a call to allocate_bars(). - fn free_bars( - &mut self, - _mmio32_allocator: &mut AddressAllocator, - _mmio64_allocator: &mut AddressAllocator, - ) -> Result<()> { - Ok(()) - } - /// Sets a register in the configuration space. /// * `reg_idx` - The index of the config register to modify. /// * `offset` - Offset into the register. diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index f1ec39ab1d5..1e93d2abeae 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -8,7 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, error, warn}; -use pci::{PciBarRegionType, PciDevice, PciDeviceError, PciRootError}; +use pci::{PciBarRegionType, PciDeviceError, PciRootError}; use serde::{Deserialize, Serialize}; use vm_device::BusError; @@ -130,10 +130,7 @@ impl PciDevices { let mut resource_allocator_lock = vm.resource_allocator(); let resource_allocator = resource_allocator_lock.deref_mut(); - virtio_device.allocate_bars( - &mut resource_allocator.mmio32_memory, - &mut resource_allocator.mmio64_memory, - )?; + virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory)?; let virtio_device = Arc::new(Mutex::new(virtio_device)); pci_segment diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 3d6e4aee6a8..6784e93d4cb 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -359,6 +359,47 @@ impl VirtioPciDevice { Ok(msix_config) } + pub fn allocate_bars( + &mut self, + mmio64_allocator: &mut AddressAllocator, + ) -> std::result::Result<(), PciDeviceError> { + let device_clone = self.device.clone(); + let device = device_clone.lock().unwrap(); + + // Allocate the virtio-pci capability BAR. + // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 + let virtio_pci_bar_addr = mmio64_allocator + .allocate( + CAPABILITY_BAR_SIZE, + CAPABILITY_BAR_SIZE, + AllocPolicy::FirstMatch, + ) + .unwrap() + .start(); + + let bar = PciBarConfiguration { + addr: virtio_pci_bar_addr, + size: CAPABILITY_BAR_SIZE, + idx: VIRTIO_COMMON_BAR_INDEX, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: pci::PciBarPrefetchable::NotPrefetchable, + }; + + // The creation of the PCI BAR and its associated capabilities must + // happen only during the creation of a brand new VM. When a VM is + // restored from a known state, the BARs are already created with the + // right content, therefore we don't need to go through this codepath. + self.configuration + .add_pci_bar(&bar) + .map_err(|e| PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr, e))?; + + // Once the BARs are allocated, the capabilities can be added to the PCI configuration. + self.add_pci_capabilities()?; + self.bar_region = bar; + + Ok(()) + } + /// Constructs a new PCI transport for the given virtio device. pub fn new( id: String, @@ -798,48 +839,6 @@ impl PciDevice for VirtioPciDevice { self.configuration.detect_bar_reprogramming(reg_idx, data) } - fn allocate_bars( - &mut self, - mmio32_allocator: &mut AddressAllocator, - mmio64_allocator: &mut AddressAllocator, - ) -> std::result::Result<(), PciDeviceError> { - let device_clone = self.device.clone(); - let device = device_clone.lock().unwrap(); - - // Allocate the virtio-pci capability BAR. - // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 - let virtio_pci_bar_addr = mmio64_allocator - .allocate( - CAPABILITY_BAR_SIZE, - CAPABILITY_BAR_SIZE, - AllocPolicy::FirstMatch, - ) - .unwrap() - .start(); - - let bar = PciBarConfiguration { - addr: virtio_pci_bar_addr, - size: CAPABILITY_BAR_SIZE, - idx: VIRTIO_COMMON_BAR_INDEX, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: pci::PciBarPrefetchable::NotPrefetchable, - }; - - // The creation of the PCI BAR and its associated capabilities must - // happen only during the creation of a brand new VM. When a VM is - // restored from a known state, the BARs are already created with the - // right content, therefore we don't need to go through this codepath. - self.configuration - .add_pci_bar(&bar) - .map_err(|e| PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr, e))?; - - // Once the BARs are allocated, the capabilities can be added to the PCI configuration. - self.add_pci_capabilities()?; - self.bar_region = bar; - - Ok(()) - } - fn move_bar( &mut self, old_base: u64, From 304d992ed7a1a1a92f209654d566ee2b5a65ac49 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 2 Sep 2025 12:36:05 +0200 Subject: [PATCH 2/5] refactor(pci): assume we only have a single 64-bit BAR In our case, only VirtIO PCI devices use a BAR region. Moreover, this region has a fixed size and it's always a 64-bit region. This means we don't handle 32-bit or IO BARs and we don't support a ROM bar. Encode these assumptions to the logic that adds BAR regions and detects BAR reprogramming attempts from the guest side so that we simplify the code. Signed-off-by: Babis Chalios --- src/pci/src/bus.rs | 36 +- src/pci/src/configuration.rs | 697 ++++++------------ src/pci/src/device.rs | 8 - src/vmm/src/device_manager/pci_mngr.rs | 19 +- .../devices/virtio/transport/pci/device.rs | 74 +- src/vmm/src/vstate/vm.rs | 1 - 6 files changed, 273 insertions(+), 562 deletions(-) diff --git a/src/pci/src/bus.rs b/src/pci/src/bus.rs index 12188ee7ba5..7739e8d9ef8 100644 --- a/src/pci/src/bus.rs +++ b/src/pci/src/bus.rs @@ -213,7 +213,6 @@ impl PciConfigIo { params.new_base, params.len, device.deref_mut(), - params.region_type, ) { error!( "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", @@ -348,7 +347,6 @@ impl PciConfigMmio { params.new_base, params.len, device.deref_mut(), - params.region_type, ) { error!( "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", @@ -445,8 +443,8 @@ mod tests { use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; use crate::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; use crate::{ - DeviceRelocation, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciClassCode, - PciConfiguration, PciDevice, PciHeaderType, PciMassStorageSubclass, + DeviceRelocation, PciClassCode, PciConfiguration, PciDevice, PciHeaderType, + PciMassStorageSubclass, }; #[derive(Debug, Default)] @@ -467,7 +465,6 @@ mod tests { _new_base: u64, _len: u64, _pci_dev: &mut dyn crate::PciDevice, - _region_type: crate::PciBarRegionType, ) -> std::result::Result<(), std::io::Error> { self.reloc_cnt .fetch_add(1, std::sync::atomic::Ordering::SeqCst); @@ -493,15 +490,7 @@ mod tests { None, ); - config - .add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }) - .unwrap(); + config.add_pci_bar(0, 0x1000, 0x1000); PciDevMock(config) } @@ -920,6 +909,8 @@ mod tests { read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); let old_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; assert_eq!(old_addr, 0x1000); + + // Writing the lower 32bits first should not trigger any reprogramming write_mmio_config( &mut mmio_config, 0, @@ -927,16 +918,23 @@ mod tests { 0, 0x4, 0, - &u32::to_le_bytes(0x1312_1110), + &u32::to_le_bytes(0x1312_0000), ); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); let new_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; - assert_eq!(new_addr, 0x1312_1110); - assert_eq!(mock.cnt(), 1); + assert_eq!(new_addr, 0x1312_0000); + assert_eq!(mock.cnt(), 0); - // BAR1 should not be used, so reading its address should return all 0s + // Writing the upper 32bits first should now trigger the reprogramming logic + write_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &u32::to_le_bytes(0x1110)); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); + let new_addr = u32::from_le_bytes(buffer); + assert_eq!(new_addr, 0x1110); + assert_eq!(mock.cnt(), 1); + + // BAR2 should not be used, so reading its address should return all 0s + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); // and reprogramming shouldn't have any effect @@ -950,7 +948,7 @@ mod tests { &u32::to_le_bytes(0x1312_1110), ); - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); } } diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index fd1e3958ec8..22754c0f9dc 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -5,7 +5,6 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::fmt::{self, Display}; use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; @@ -21,7 +20,6 @@ const STATUS_REG: usize = 1; const STATUS_REG_CAPABILITIES_USED_MASK: u32 = 0x0010_0000; const BAR0_REG: usize = 4; const ROM_BAR_REG: usize = 12; -const BAR_IO_ADDR_MASK: u32 = 0xffff_fffc; const BAR_MEM_ADDR_MASK: u32 = 0xffff_fff0; const ROM_BAR_ADDR_MASK: u32 = 0xffff_f800; const MSI_CAPABILITY_REGISTER_MASK: u32 = 0x0071_0000; @@ -365,36 +363,22 @@ pub trait PciCapability { fn id(&self) -> PciCapabilityId; } -fn encode_32_bits_bar_size(bar_size: u32) -> Option { - if bar_size > 0 { - return Some(!(bar_size - 1)); - } - None -} - -fn decode_32_bits_bar_size(bar_size: u32) -> Option { - if bar_size > 0 { - return Some(!bar_size + 1); - } - None -} - -fn encode_64_bits_bar_size(bar_size: u64) -> Option<(u32, u32)> { - if bar_size > 0 { - let result = !(bar_size - 1); - let result_hi = (result >> 32) as u32; - let result_lo = (result & 0xffff_ffff) as u32; - return Some((result_hi, result_lo)); - } - None +// This encodes the BAR size as expected by the software running inside the guest. +// It assumes that bar_size is not 0 +fn encode_64_bits_bar_size(bar_size: u64) -> (u32, u32) { + assert_ne!(bar_size, 0); + let result = !(bar_size - 1); + let result_hi = (result >> 32) as u32; + let result_lo = (result & 0xffff_ffff) as u32; + (result_hi, result_lo) } -fn decode_64_bits_bar_size(bar_size_hi: u32, bar_size_lo: u32) -> Option { +// This decoes the BAR size from the value stored in the BAR registers. +fn decode_64_bits_bar_size(bar_size_hi: u32, bar_size_lo: u32) -> u64 { let bar_size: u64 = ((bar_size_hi as u64) << 32) | (bar_size_lo as u64); - if bar_size > 0 { - return Some(!bar_size + 1); - } - None + let size = !bar_size + 1; + assert_ne!(size, 0); + size } #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] @@ -466,56 +450,6 @@ pub struct PciBarConfiguration { pub prefetchable: PciBarPrefetchable, } -#[derive(Debug)] -pub enum Error { - BarAddressInvalid(u64, u64), - BarInUse(usize), - BarInUse64(usize), - BarInvalid(usize), - BarInvalid64(usize), - BarSizeInvalid(u64), - CapabilitySpaceFull(usize), - Decode32BarSize, - Decode64BarSize, - Encode32BarSize, - Encode64BarSize, - RomBarAddressInvalid(u64, u64), - RomBarInUse(usize), - RomBarInvalid(usize), - RomBarSizeInvalid(u64), -} -pub type Result = std::result::Result; - -impl std::error::Error for Error {} - -impl Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - use self::Error::*; - match self { - BarAddressInvalid(a, s) => write!(f, "address {a} size {s} too big"), - BarInUse(b) => write!(f, "bar {b} already used"), - BarInUse64(b) => write!(f, "64bit bar {b} already used(requires two regs)"), - BarInvalid(b) => write!(f, "bar {} invalid, max {}", b, NUM_BAR_REGS - 1), - BarInvalid64(b) => write!( - f, - "64bitbar {} invalid, requires two regs, max {}", - b, - NUM_BAR_REGS - 1 - ), - BarSizeInvalid(s) => write!(f, "bar address {s} not a power of two"), - CapabilitySpaceFull(s) => write!(f, "capability of size {s} doesn't fit"), - Decode32BarSize => write!(f, "failed to decode 32 bits BAR size"), - Decode64BarSize => write!(f, "failed to decode 64 bits BAR size"), - Encode32BarSize => write!(f, "failed to encode 32 bits BAR size"), - Encode64BarSize => write!(f, "failed to encode 64 bits BAR size"), - RomBarAddressInvalid(a, s) => write!(f, "address {a} size {s} too big"), - RomBarInUse(b) => write!(f, "rom bar {b} already used"), - RomBarInvalid(b) => write!(f, "rom bar {} invalid, max {}", b, NUM_BAR_REGS - 1), - RomBarSizeInvalid(s) => write!(f, "rom bar address {s} not a power of two"), - } - } -} - impl PciConfiguration { #[allow(clippy::too_many_arguments)] pub fn new( @@ -695,93 +629,66 @@ impl PciConfiguration { } } - /// Adds a region specified by `config`. Configures the specified BAR(s) to - /// report this region and size to the guest kernel. Enforces a few constraints - /// (i.e, region size must be power of two, register not already used). - pub fn add_pci_bar(&mut self, config: &PciBarConfiguration) -> Result<()> { - let bar_idx = config.idx; + /// Add the [addr, addr + size) BAR region. + /// + /// Configures the specified BAR to report this region and size to the guest kernel. + /// Enforces a few constraints (i.e, region size must be power of two, register not already + /// used). + pub fn add_pci_bar(&mut self, bar_idx: usize, addr: u64, size: u64) { let reg_idx = BAR0_REG + bar_idx; - if bar_idx >= NUM_BAR_REGS { - return Err(Error::BarInvalid(bar_idx)); - } - - if self.bars[bar_idx].used { - return Err(Error::BarInUse(bar_idx)); - } - - if !config.size.is_power_of_two() { - return Err(Error::BarSizeInvalid(config.size)); - } - - let end_addr = config - .addr - .checked_add(config.size - 1) - .ok_or(Error::BarAddressInvalid(config.addr, config.size))?; - match config.region_type { - PciBarRegionType::Memory32BitRegion | PciBarRegionType::IoRegion => { - if end_addr > u64::from(u32::MAX) { - return Err(Error::BarAddressInvalid(config.addr, config.size)); - } - - // Encode the BAR size as expected by the software running in - // the guest. - self.bars[bar_idx].size = - encode_32_bits_bar_size(config.size as u32).ok_or(Error::Encode32BarSize)?; - } - PciBarRegionType::Memory64BitRegion => { - if bar_idx + 1 >= NUM_BAR_REGS { - return Err(Error::BarInvalid64(bar_idx)); - } - - if self.bars[bar_idx + 1].used { - return Err(Error::BarInUse64(bar_idx + 1)); - } - - // Encode the BAR size as expected by the software running in - // the guest. - let (bar_size_hi, bar_size_lo) = - encode_64_bits_bar_size(config.size).ok_or(Error::Encode64BarSize)?; - - self.registers[reg_idx + 1] = (config.addr >> 32) as u32; - self.writable_bits[reg_idx + 1] = 0xffff_ffff; - self.bars[bar_idx + 1].addr = self.registers[reg_idx + 1]; - self.bars[bar_idx].size = bar_size_lo; - self.bars[bar_idx + 1].size = bar_size_hi; - self.bars[bar_idx + 1].used = true; - } - } - - let (mask, lower_bits) = match config.region_type { - PciBarRegionType::Memory32BitRegion | PciBarRegionType::Memory64BitRegion => ( - BAR_MEM_ADDR_MASK, - config.prefetchable as u32 | config.region_type as u32, - ), - PciBarRegionType::IoRegion => (BAR_IO_ADDR_MASK, config.region_type as u32), - }; - - self.registers[reg_idx] = ((config.addr as u32) & mask) | lower_bits; - self.writable_bits[reg_idx] = mask; + // These are a few constraints that are imposed due to the fact + // that only VirtIO devices are actually allocating a BAR. Moreover, this is + // a single 64-bit BAR. Not conforming to these requirements is an internal + // Firecracker bug. + + // We are only using BAR 0 + assert_eq!(bar_idx, 0); + // We shouldn't be trying to use the same BAR twice + assert!(!self.bars[0].used); + assert!(!self.bars[1].used); + // We can't have a size of 0 + assert_ne!(size, 0); + // BAR size needs to be a power of two + assert!(size.is_power_of_two()); + // We should not be overflowing the address space + addr.checked_add(size - 1).unwrap(); + + // Encode the BAR size as expected by the software running in + // the guest. + let (bar_size_hi, bar_size_lo) = encode_64_bits_bar_size(size); + + self.registers[reg_idx + 1] = (addr >> 32) as u32; + self.writable_bits[reg_idx + 1] = 0xffff_ffff; + self.bars[bar_idx + 1].addr = self.registers[reg_idx + 1]; + self.bars[bar_idx].size = bar_size_lo; + self.bars[bar_idx + 1].size = bar_size_hi; + self.bars[bar_idx + 1].used = true; + + // Addresses of memory BARs are 16-byte aligned so the lower 4 bits are always 0. Within + // the register we use this 4 bits to encode extra information about the BAR. The meaning + // of these bits is: + // + // | Bit 3 | Bits 2-1 | Bit 0 | + // | Prefetchable | type | Always 0 | + self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) + | (PciBarPrefetchable::NotPrefetchable as u32) + | (PciBarRegionType::Memory64BitRegion as u32); + self.writable_bits[reg_idx] = BAR_MEM_ADDR_MASK; self.bars[bar_idx].addr = self.registers[reg_idx]; self.bars[bar_idx].used = true; - self.bars[bar_idx].r#type = Some(config.region_type); - - Ok(()) } /// Returns the address of the given BAR region. - pub fn get_bar_addr(&self, bar_num: usize) -> u64 { - let bar_idx = BAR0_REG + bar_num; - - let mut addr = u64::from(self.bars[bar_num].addr & self.writable_bits[bar_idx]); + /// + /// This assumes that `bar_idx` is a valid BAR register. + pub fn get_bar_addr(&self, bar_idx: usize) -> u64 { + assert!(bar_idx < NUM_BAR_REGS); - if let Some(bar_type) = self.bars[bar_num].r#type { - if bar_type == PciBarRegionType::Memory64BitRegion { - addr |= u64::from(self.bars[bar_num + 1].addr) << 32; - } - } + let reg_idx = BAR0_REG + bar_idx; - addr + (u64::from(self.bars[bar_idx].addr & self.writable_bits[reg_idx])) + | (u64::from(self.bars[bar_idx + 1].addr) << 32) } /// Adds the capability `cap_data` to the list of capabilities. @@ -789,18 +696,18 @@ impl PciConfiguration { /// `cap_data` should not include the two-byte PCI capability header (type, next). /// Correct values will be generated automatically based on `cap_data.id()` and /// `cap_data.len()`. - pub fn add_capability(&mut self, cap_data: &dyn PciCapability) -> Result { + pub fn add_capability(&mut self, cap_data: &dyn PciCapability) -> usize { let total_len = cap_data.bytes().len() + 2; let (cap_offset, tail_offset) = match self.last_capability { Some((offset, len)) => (Self::next_dword(offset, len), offset + 1), None => (FIRST_CAPABILITY_OFFSET, CAPABILITY_LIST_HEAD_OFFSET), }; - let end_offset = cap_offset - .checked_add(total_len) - .ok_or(Error::CapabilitySpaceFull(total_len))?; - if end_offset > CAPABILITY_MAX_OFFSET { - return Err(Error::CapabilitySpaceFull(total_len)); - } + + // We know that the capabilities we are using have a valid size (doesn't overflow) and that + // we add capabilities that fit in the available space. If any of these requirements don't + // hold, this is due to a Firecracker bug. + let end_offset = cap_offset.checked_add(total_len).unwrap(); + assert!(end_offset <= CAPABILITY_MAX_OFFSET); self.registers[STATUS_REG] |= STATUS_REG_CAPABILITIES_USED_MASK; self.write_byte_internal(tail_offset, cap_offset as u8, false); self.write_byte_internal(cap_offset, cap_data.id() as u8, false); @@ -821,7 +728,7 @@ impl PciConfiguration { _ => {} } - Ok(cap_offset) + cap_offset } // Find the next aligned offset after the one given. @@ -882,111 +789,54 @@ impl PciConfiguration { let value = LittleEndian::read_u32(data); let mask = self.writable_bits[reg_idx]; - if (BAR0_REG..BAR0_REG + NUM_BAR_REGS).contains(®_idx) { - // Ignore the case where the BAR size is being asked for. - if value == 0xffff_ffff { - return None; - } + if !(BAR0_REG..BAR0_REG + NUM_BAR_REGS).contains(®_idx) { + return None; + } - let bar_idx = reg_idx - 4; - // Handle special case where the address being written is - // different from the address initially provided. This is a - // BAR reprogramming case which needs to be properly caught. - if let Some(bar_type) = self.bars[bar_idx].r#type { - // In case of 64 bits memory BAR, we don't do anything until - // the upper BAR is modified, otherwise we would be moving the - // BAR to a wrong location in memory. - if bar_type == PciBarRegionType::Memory64BitRegion { - return None; - } + // Ignore the case where the BAR size is being asked for. + if value == 0xffff_ffff { + return None; + } - // Ignore the case where the value is unchanged. - if (value & mask) == (self.bars[bar_idx].addr & mask) { - return None; - } + let bar_idx = reg_idx - 4; - info!( - "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", - reg_idx, self.registers[reg_idx], value - ); - let old_base = u64::from(self.bars[bar_idx].addr & mask); - let new_base = u64::from(value & mask); - let len = u64::from( - decode_32_bits_bar_size(self.bars[bar_idx].size) - .ok_or(Error::Decode32BarSize) - .unwrap(), - ); - let region_type = bar_type; - - self.bars[bar_idx].addr = value; - - return Some(BarReprogrammingParams { - old_base, - new_base, - len, - region_type, - }); - } else if (reg_idx > BAR0_REG) - && ( - // The lower BAR (of this 64bit BAR) has been reprogrammed to a different value - // than it used to be - (self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) + // Do not reprogram BARs we are not using + if !self.bars[bar_idx].used { + return None; + } + + // We are always using 64bit BARs, so two BAR registers. We don't do anything until + // the upper BAR is modified, otherwise we would be moving the BAR to a wrong + // location in memory. + if bar_idx == 0 { + return None; + } + + // The lower BAR (of this 64bit BAR) has been reprogrammed to a different value + // than it used to be + if (self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) != (self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]) || // Or the lower BAR hasn't been changed but the upper one is being reprogrammed // now to a different value (value & mask) != (self.bars[bar_idx].addr & mask) - ) - { - info!( - "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", - reg_idx, self.registers[reg_idx], value - ); - let old_base = (u64::from(self.bars[bar_idx].addr & mask) << 32) - | u64::from(self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]); - let new_base = (u64::from(value & mask) << 32) - | u64::from(self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]); - let len = - decode_64_bits_bar_size(self.bars[bar_idx].size, self.bars[bar_idx - 1].size) - .ok_or(Error::Decode64BarSize) - .unwrap(); - let region_type = PciBarRegionType::Memory64BitRegion; - - self.bars[bar_idx].addr = value; - self.bars[bar_idx - 1].addr = self.registers[reg_idx - 1]; - - return Some(BarReprogrammingParams { - old_base, - new_base, - len, - region_type, - }); - } - } else if reg_idx == ROM_BAR_REG && (value & mask) != (self.rom_bar_addr & mask) { - // Ignore the case where the BAR size is being asked for. - if value & ROM_BAR_ADDR_MASK == ROM_BAR_ADDR_MASK { - return None; - } - + { info!( - "Detected ROM BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", + "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", reg_idx, self.registers[reg_idx], value ); - let old_base = u64::from(self.rom_bar_addr & mask); - let new_base = u64::from(value & mask); - let len = u64::from( - decode_32_bits_bar_size(self.rom_bar_size) - .ok_or(Error::Decode32BarSize) - .unwrap(), - ); - let region_type = PciBarRegionType::Memory32BitRegion; + let old_base = (u64::from(self.bars[bar_idx].addr & mask) << 32) + | u64::from(self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]); + let new_base = (u64::from(value & mask) << 32) + | u64::from(self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]); + let len = decode_64_bits_bar_size(self.bars[bar_idx].size, self.bars[bar_idx - 1].size); - self.rom_bar_addr = value; + self.bars[bar_idx].addr = value; + self.bars[bar_idx - 1].addr = self.registers[reg_idx - 1]; return Some(BarReprogrammingParams { old_base, new_base, len, - region_type, }); } @@ -1058,45 +908,38 @@ mod tests { } #[test] - fn add_capability() { - let mut cfg = PciConfiguration::new( - 0x1234, - 0x5678, - 0x1, - PciClassCode::MultimediaController, - &PciMultimediaSubclass::AudioController, - None, - PciHeaderType::Device, - 0xABCD, - 0x2468, - None, - None, - ); + #[should_panic] + fn test_too_big_capability() { + let mut cfg = default_config(); + cfg.add_capability(&BadCap::new(127)); + } + + #[test] + #[should_panic] + fn test_capability_space_overflow() { + let mut cfg = default_config(); + cfg.add_capability(&BadCap::new(62)); + cfg.add_capability(&BadCap::new(62)); + cfg.add_capability(&BadCap::new(0)); + } + + #[test] + fn test_add_capability() { + let mut cfg = default_config(); - // Bad size capabilities - assert!(matches!( - cfg.add_capability(&BadCap::new(127)), - Err(Error::CapabilitySpaceFull(129)) - )); - cfg.add_capability(&BadCap::new(62)).unwrap(); - cfg.add_capability(&BadCap::new(62)).unwrap(); - assert!(matches!( - cfg.add_capability(&BadCap::new(0)), - Err(Error::CapabilitySpaceFull(2)) - )); // Reset capabilities cfg.last_capability = None; // Add two capabilities with different contents. let cap1 = TestCap { len: 4, foo: 0xAA }; - let cap1_offset = cfg.add_capability(&cap1).unwrap(); + let cap1_offset = cfg.add_capability(&cap1); assert_eq!(cap1_offset % 4, 0); let cap2 = TestCap { len: 0x04, foo: 0x55, }; - let cap2_offset = cfg.add_capability(&cap2).unwrap(); + let cap2_offset = cfg.add_capability(&cap2); assert_eq!(cap2_offset % 4, 0); // The capability list head should be pointing to cap1. @@ -1119,19 +962,7 @@ mod tests { #[test] fn test_msix_capability() { - let mut cfg = PciConfiguration::new( - 0x1234, - 0x5678, - 0x1, - PciClassCode::MultimediaController, - &PciMultimediaSubclass::AudioController, - None, - PciHeaderType::Device, - 0xABCD, - 0x2468, - None, - None, - ); + let mut cfg = default_config(); // Information about the MSI-X capability layout: https://wiki.osdev.org/PCI#Enabling_MSI-X let msix_cap = MsixCap::new( @@ -1141,7 +972,7 @@ mod tests { 4, // BAR4 used for pending control bit 0x420, // Offset of pending bit array (PBA) inside BAR ); - cfg.add_capability(&msix_cap).unwrap(); + cfg.add_capability(&msix_cap); let cap_reg = FIRST_CAPABILITY_OFFSET / 4; let reg = cfg.read_reg(cap_reg); @@ -1255,12 +1086,19 @@ mod tests { } #[test] - fn test_bar_size_encoding() { - assert!(encode_32_bits_bar_size(0).is_none()); - assert!(decode_32_bits_bar_size(0).is_none()); - assert!(encode_64_bits_bar_size(0).is_none()); - assert!(decode_64_bits_bar_size(0, 0).is_none()); + #[should_panic] + fn test_encode_zero_sized_bar() { + encode_64_bits_bar_size(0); + } + #[test] + #[should_panic] + fn test_decode_zero_sized_bar() { + decode_64_bits_bar_size(0, 0); + } + + #[test] + fn test_bar_size_encoding() { // According to OSDev wiki (https://wiki.osdev.org/PCI#Address_and_size_of_the_BAR): // // > To determine the amount of address space needed by a PCI device, you must save the @@ -1272,22 +1110,16 @@ mod tests { // have BAR0 > filled with 0xFF000000 (0x1000000 after decoding) and you can only modify // the upper > 8-bits. // - // So we should be encoding an address like this: `addr` -> `!(addr - 1)` - let encoded = encode_32_bits_bar_size(0x0101_0101).unwrap(); - assert_eq!(encoded, 0xfefe_feff); - assert_eq!(decode_32_bits_bar_size(encoded), Some(0x0101_0101)); - - // Similarly we encode a 64 bits size and then store it as a 2 32bit addresses (we use + // So, we encode a 64 bits size and then store it as a 2 32bit addresses (we use // two BARs). - let (hi, lo) = encode_64_bits_bar_size(0xffff_ffff_ffff_fff0).unwrap(); + let (hi, lo) = encode_64_bits_bar_size(0xffff_ffff_ffff_fff0); assert_eq!(hi, 0); assert_eq!(lo, 0x0000_0010); - assert_eq!(decode_64_bits_bar_size(hi, lo), Some(0xffff_ffff_ffff_fff0)); + assert_eq!(decode_64_bits_bar_size(hi, lo), 0xffff_ffff_ffff_fff0); } - #[test] - fn test_add_pci_bar() { - let mut pci_config = PciConfiguration::new( + fn default_config() -> PciConfiguration { + PciConfiguration::new( 0x42, 0x0, 0x0, @@ -1299,120 +1131,72 @@ mod tests { 0x12, None, None, - ); + ) + } - // BAR size can only be a power of 2 - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1001, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }), - Err(Error::BarSizeInvalid(0x1001)) - )); - - // Invalid BAR index - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: NUM_BAR_REGS, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable - }), - Err(Error::BarInvalid(NUM_BAR_REGS)) - )); - // 64bit BARs need 2 BAR slots actually - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: NUM_BAR_REGS - 1, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable - }), - Err(Error::BarInvalid64(_)) - )); - - // Check for valid addresses - // Can't have an address that exceeds 32 bits for a 32bit BAR - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x1000_0000_0000_0000, - size: 0x1000, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable - }), - Err(Error::BarAddressInvalid(0x1000_0000_0000_0000, 0x1000)) - )); - // Ensure that we handle properly overflows in 64bit BAR ranges - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: u64::MAX, - size: 0x2, - idx: 0, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable - }), - Err(Error::BarAddressInvalid(u64::MAX, 2)) - )); - - // We can't reuse a BAR slot - pci_config - .add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }) - .unwrap(); - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }), - Err(Error::BarInUse(0)) - )); - pci_config - .add_pci_bar(&PciBarConfiguration { - addr: 0x0000_0001_0000_0000, - size: 0x2000, - idx: 2, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }) - .unwrap(); - // For 64bit BARs two BARs are used (in this case BARs 1 and 2) - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x0000_0001_0000_0000, - size: 0x1000, - idx: 2, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }), - Err(Error::BarInUse(2)) - )); - assert!(matches!( - pci_config.add_pci_bar(&PciBarConfiguration { - addr: 0x0000_0001_0000_0000, - size: 0x1000, - idx: 1, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }), - Err(Error::BarInUse64(2)) - )); - - assert_eq!(pci_config.get_bar_addr(0), 0x1000); - assert_eq!(pci_config.get_bar_addr(2), 0x1_0000_0000); + #[test] + #[should_panic] + fn test_bar_size_no_power_of_two() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(0, 0x1000, 0x1001); + } + + #[test] + #[should_panic] + fn test_bad_bar_index() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(NUM_BAR_REGS, 0x1000, 0x1000); + } + + #[test] + #[should_panic] + fn test_bad_64bit_bar_index() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(NUM_BAR_REGS - 1, 0x1000, 0x1000); + } + + #[test] + #[should_panic] + fn test_bar_size_overflows() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(0, u64::MAX, 0x2); + } + + #[test] + #[should_panic] + fn test_lower_bar_free_upper_used() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(1, 0x1000, 0x1000); + pci_config.add_pci_bar(0, 0x1000, 0x1000); + } + + #[test] + #[should_panic] + fn test_lower_bar_used() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(0, 0x1000, 0x1000); + pci_config.add_pci_bar(0, 0x1000, 0x1000); + } + + #[test] + #[should_panic] + fn test_upper_bar_used() { + let mut pci_config = default_config(); + pci_config.add_pci_bar(0, 0x1000, 0x1000); + pci_config.add_pci_bar(1, 0x1000, 0x1000); + } + + #[test] + fn test_add_pci_bar() { + let mut pci_config = default_config(); + + pci_config.add_pci_bar(0, 0x1_0000_0000, 0x1000); + + assert_eq!(pci_config.get_bar_addr(0), 0x1_0000_0000); + assert_eq!(pci_config.read_reg(BAR0_REG) & 0xffff_fff0, 0x0); + assert!(pci_config.bars[0].used); + assert_eq!(pci_config.read_reg(BAR0_REG + 1), 1); + assert!(pci_config.bars[0].used); } #[test] @@ -1503,91 +1287,44 @@ mod tests { .is_none()); } - // Reprogramming of a 32bit BAR - pci_config - .add_pci_bar(&PciBarConfiguration { - addr: 0x1000, - size: 0x1000, - idx: 0, - region_type: PciBarRegionType::Memory32BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }) - .unwrap(); - - assert_eq!( - pci_config.detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x2000)), - Some(BarReprogrammingParams { - old_base: 0x1000, - new_base: 0x2000, - len: 0x1000, - region_type: PciBarRegionType::Memory32BitRegion - }) - ); - - pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x2000)); - assert_eq!(pci_config.read_reg(BAR0_REG) & 0xffff_fff0, 0x2000); - - // Attempting to reprogram the BAR with the same address should not have any effect - assert!(pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x2000)) - .is_none()); - // Reprogramming of a 64bit BAR - pci_config - .add_pci_bar(&PciBarConfiguration { - addr: 0x13_1200_0000, - size: 0x8000, - idx: 1, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: PciBarPrefetchable::Prefetchable, - }) - .unwrap(); - - assert_eq!(pci_config.read_reg(BAR0_REG + 1) & 0xffff_fff0, 0x1200_0000); - assert_eq!( - pci_config.bars[1].r#type, - Some(PciBarRegionType::Memory64BitRegion) - ); - assert_eq!(pci_config.read_reg(BAR0_REG + 2), 0x13); - assert!(pci_config.bars[2].r#type.is_none()); + pci_config.add_pci_bar(0, 0x13_1200_0000, 0x8000); // First we write the lower 32 bits and this shouldn't cause any reprogramming assert!(pci_config - .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x4200_0000)) + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) .is_none()); - pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x4200_0000)); + pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x4200_0000)); // Writing the upper 32 bits should trigger the reprogramming assert_eq!( - pci_config.detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x84)), + pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x84)), Some(BarReprogrammingParams { old_base: 0x13_1200_0000, new_base: 0x84_4200_0000, len: 0x8000, - region_type: PciBarRegionType::Memory64BitRegion }) ); - pci_config.write_config_register(BAR0_REG + 2, 0, &u32::to_le_bytes(0x84)); + pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x84)); // Trying to reprogram the upper bits directly (without first touching the lower bits) // should trigger a reprogramming assert_eq!( - pci_config.detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x1312)), + pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)), Some(BarReprogrammingParams { old_base: 0x84_4200_0000, new_base: 0x1312_4200_0000, len: 0x8000, - region_type: PciBarRegionType::Memory64BitRegion }) ); - pci_config.write_config_register(BAR0_REG + 2, 0, &u32::to_le_bytes(0x1312)); + pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x1312)); // Attempting to reprogram the BAR with the same address should not have any effect assert!(pci_config - .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x4200_0000)) + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) .is_none()); assert!(pci_config - .detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x1312)) + .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) .is_none()); } } diff --git a/src/pci/src/device.rs b/src/pci/src/device.rs index d2105f26cc4..fe9e676eb03 100644 --- a/src/pci/src/device.rs +++ b/src/pci/src/device.rs @@ -8,16 +8,10 @@ use std::sync::{Arc, Barrier}; use std::{io, result}; -use crate::configuration::{self, PciBarRegionType}; - #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum Error { - /// Setup of the device capabilities failed: {0}. - CapabilitiesSetup(configuration::Error), /// Allocating space for an IO BAR failed, size={0}. IoAllocationFailed(u64), - /// Registering an IO BAR at address {0} failed: {1} - IoRegistrationFailed(u64, configuration::Error), /// Expected resource not found. MissingResource, } @@ -27,7 +21,6 @@ pub struct BarReprogrammingParams { pub old_base: u64, pub new_base: u64, pub len: u64, - pub region_type: PciBarRegionType, } pub trait PciDevice: Send { @@ -78,6 +71,5 @@ pub trait DeviceRelocation: Send + Sync { new_base: u64, len: u64, pci_dev: &mut dyn PciDevice, - region_type: PciBarRegionType, ) -> result::Result<(), io::Error>; } diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 1e93d2abeae..b8371d82a83 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -8,7 +8,7 @@ use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, error, warn}; -use pci::{PciBarRegionType, PciDeviceError, PciRootError}; +use pci::{PciDeviceError, PciRootError}; use serde::{Deserialize, Serialize}; use vm_device::BusError; @@ -25,7 +25,7 @@ use crate::devices::virtio::net::persist::{NetConstructorArgs, NetState}; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::rng::persist::{EntropyConstructorArgs, EntropyState}; use crate::devices::virtio::transport::pci::device::{ - VirtioPciDevice, VirtioPciDeviceError, VirtioPciDeviceState, + CAPABILITY_BAR_SIZE, VirtioPciDevice, VirtioPciDeviceError, VirtioPciDeviceState, }; use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, @@ -89,13 +89,16 @@ impl PciDevices { virtio_device: &Arc>, ) -> Result<(), PciManagerError> { let virtio_device_locked = virtio_device.lock().expect("Poisoned lock"); - let bar = &virtio_device_locked.bar_region; - assert_eq!(bar.region_type, PciBarRegionType::Memory64BitRegion); - debug!("Inserting MMIO BAR region: {:#x}:{:#x}", bar.addr, bar.size); - vm.common - .mmio_bus - .insert(virtio_device.clone(), bar.addr, bar.size)?; + debug!( + "Inserting MMIO BAR region: {:#x}:{:#x}", + virtio_device_locked.bar_address, CAPABILITY_BAR_SIZE + ); + vm.common.mmio_bus.insert( + virtio_device.clone(), + virtio_device_locked.bar_address, + CAPABILITY_BAR_SIZE, + )?; Ok(()) } diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 6784e93d4cb..120b6094413 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -226,8 +226,8 @@ const MSIX_PBA_BAR_OFFSET: u64 = 0x48000; // The size is 2KiB because the Pending Bit Array has one bit per vector and it // can support up to 2048 vectors. const MSIX_PBA_SIZE: u64 = 0x800; -// The BAR size must be a power of 2. -const CAPABILITY_BAR_SIZE: u64 = 0x80000; +/// The BAR size must be a power of 2. +pub const CAPABILITY_BAR_SIZE: u64 = 0x80000; const VIRTIO_COMMON_BAR_INDEX: usize = 0; const VIRTIO_SHM_BAR_INDEX: usize = 2; @@ -246,7 +246,7 @@ pub struct VirtioPciDeviceState { pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, pub msi_vector_group: Vec, - pub bar_configuration: PciBarConfiguration, + pub bar_address: u64, } #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -295,8 +295,8 @@ pub struct VirtioPciDevice { // a device. cap_pci_cfg_info: VirtioPciCfgCapInfo, - // Details of BAR region - pub bar_region: PciBarConfiguration, + // Allocated address for the BAR + pub bar_address: u64, } impl Debug for VirtioPciDevice { @@ -359,6 +359,11 @@ impl VirtioPciDevice { Ok(msix_config) } + /// Allocate the PCI BAR for the VirtIO device and its associated capabilities. + /// + /// This must happen only during the creation of a brand new VM. When a VM is restored from a + /// known state, the BARs are already created with the right content, therefore we don't need + /// to go through this codepath. pub fn allocate_bars( &mut self, mmio64_allocator: &mut AddressAllocator, @@ -377,25 +382,15 @@ impl VirtioPciDevice { .unwrap() .start(); - let bar = PciBarConfiguration { - addr: virtio_pci_bar_addr, - size: CAPABILITY_BAR_SIZE, - idx: VIRTIO_COMMON_BAR_INDEX, - region_type: PciBarRegionType::Memory64BitRegion, - prefetchable: pci::PciBarPrefetchable::NotPrefetchable, - }; - - // The creation of the PCI BAR and its associated capabilities must - // happen only during the creation of a brand new VM. When a VM is - // restored from a known state, the BARs are already created with the - // right content, therefore we don't need to go through this codepath. - self.configuration - .add_pci_bar(&bar) - .map_err(|e| PciDeviceError::IoRegistrationFailed(virtio_pci_bar_addr, e))?; + self.configuration.add_pci_bar( + VIRTIO_COMMON_BAR_INDEX, + virtio_pci_bar_addr, + CAPABILITY_BAR_SIZE, + ); // Once the BARs are allocated, the capabilities can be added to the PCI configuration. self.add_pci_capabilities()?; - self.bar_region = bar; + self.bar_address = virtio_pci_bar_addr; Ok(()) } @@ -446,7 +441,7 @@ impl VirtioPciDevice { memory, interrupt_source_group: msi_vectors, cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), - bar_region: PciBarConfiguration::default(), + bar_address: 0, }; Ok(virtio_pci_device) @@ -496,7 +491,7 @@ impl VirtioPciDevice { memory: memory.clone(), interrupt_source_group: msi_vectors, cap_pci_cfg_info, - bar_region: state.bar_configuration, + bar_address: state.bar_address, }; if state.device_activated { @@ -536,18 +531,14 @@ impl VirtioPciDevice { COMMON_CONFIG_BAR_OFFSET.try_into().unwrap(), COMMON_CONFIG_SIZE.try_into().unwrap(), ); - self.configuration - .add_capability(&common_cap) - .map_err(PciDeviceError::CapabilitiesSetup)?; + self.configuration.add_capability(&common_cap); let isr_cap = VirtioPciCap::new( PciCapabilityType::Isr, ISR_CONFIG_BAR_OFFSET.try_into().unwrap(), ISR_CONFIG_SIZE.try_into().unwrap(), ); - self.configuration - .add_capability(&isr_cap) - .map_err(PciDeviceError::CapabilitiesSetup)?; + self.configuration.add_capability(&isr_cap); // TODO(dgreid) - set based on device's configuration size? let device_cap = VirtioPciCap::new( @@ -555,9 +546,7 @@ impl VirtioPciDevice { DEVICE_CONFIG_BAR_OFFSET.try_into().unwrap(), DEVICE_CONFIG_SIZE.try_into().unwrap(), ); - self.configuration - .add_capability(&device_cap) - .map_err(PciDeviceError::CapabilitiesSetup)?; + self.configuration.add_capability(&device_cap); let notify_cap = VirtioPciNotifyCap::new( PciCapabilityType::Notify, @@ -565,16 +554,11 @@ impl VirtioPciDevice { NOTIFICATION_SIZE.try_into().unwrap(), Le32::from(NOTIFY_OFF_MULTIPLIER), ); - self.configuration - .add_capability(¬ify_cap) - .map_err(PciDeviceError::CapabilitiesSetup)?; + self.configuration.add_capability(¬ify_cap); let configuration_cap = VirtioPciCfgCap::new(); - self.cap_pci_cfg_info.offset = self - .configuration - .add_capability(&configuration_cap) - .map_err(PciDeviceError::CapabilitiesSetup)? - + VIRTIO_PCI_CAP_OFFSET; + self.cap_pci_cfg_info.offset = + self.configuration.add_capability(&configuration_cap) + VIRTIO_PCI_CAP_OFFSET; self.cap_pci_cfg_info.cap = configuration_cap; if self.msix_config.is_some() { @@ -585,9 +569,7 @@ impl VirtioPciDevice { VIRTIO_BAR_INDEX, MSIX_PBA_BAR_OFFSET.try_into().unwrap(), ); - self.configuration - .add_capability(&msix_cap) - .map_err(PciDeviceError::CapabilitiesSetup)?; + self.configuration.add_capability(&msix_cap); } Ok(()) @@ -691,7 +673,7 @@ impl VirtioPciDevice { .expect("Poisoned lock") .state(), msi_vector_group: self.interrupt_source_group.save(), - bar_configuration: self.bar_region, + bar_address: self.bar_address, } } } @@ -846,8 +828,8 @@ impl PciDevice for VirtioPciDevice { ) -> std::result::Result<(), std::io::Error> { // We only update our idea of the bar in order to support free_bars() above. // The majority of the reallocation is done inside DeviceManager. - if self.bar_region.addr == old_base { - self.bar_region.addr = new_base; + if self.bar_address == old_base { + self.bar_address = new_base; } Ok(()) diff --git a/src/vmm/src/vstate/vm.rs b/src/vmm/src/vstate/vm.rs index deef6710b90..27d43176367 100644 --- a/src/vmm/src/vstate/vm.rs +++ b/src/vmm/src/vstate/vm.rs @@ -669,7 +669,6 @@ impl DeviceRelocation for Vm { _new_base: u64, _len: u64, _pci_dev: &mut dyn pci::PciDevice, - _region_type: pci::PciBarRegionType, ) -> Result<(), std::io::Error> { error!("pci: device relocation not supported"); Err(std::io::Error::from(std::io::ErrorKind::Unsupported)) From 0e72d2032f17a0840be6c2065bb2d769b5a14f5c Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Tue, 2 Sep 2025 14:39:45 +0200 Subject: [PATCH 3/5] refactor(pci): remove ROM-related state from PCI configuration Drop any ROM-related state we were holding in PciConfiguration and ensure we are always returning a side of zero for the ROM BAR. Signed-off-by: Babis Chalios --- src/pci/src/configuration.rs | 131 ++++++++++++++++------------------- 1 file changed, 59 insertions(+), 72 deletions(-) diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index 22754c0f9dc..a3dbf38e15f 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -386,7 +386,6 @@ struct PciBar { addr: u32, size: u32, used: bool, - r#type: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -394,9 +393,6 @@ pub struct PciConfigurationState { registers: Vec, writable_bits: Vec, bars: Vec, - rom_bar_addr: u32, - rom_bar_size: u32, - rom_bar_used: bool, last_capability: Option<(usize, usize)>, msix_cap_reg_idx: Option, } @@ -409,9 +405,6 @@ pub struct PciConfiguration { registers: [u32; NUM_CONFIGURATION_REGISTERS], writable_bits: [u32; NUM_CONFIGURATION_REGISTERS], // writable bits for each register. bars: [PciBar; NUM_BAR_REGS], - rom_bar_addr: u32, - rom_bar_size: u32, - rom_bar_used: bool, // Contains the byte offset and size of the last capability. last_capability: Option<(usize, usize)>, msix_cap_reg_idx: Option, @@ -465,74 +458,57 @@ impl PciConfiguration { msix_config: Option>>, state: Option, ) -> Self { - let ( - registers, - writable_bits, - bars, - rom_bar_addr, - rom_bar_size, - rom_bar_used, - last_capability, - msix_cap_reg_idx, - ) = if let Some(state) = state { - ( - state.registers.try_into().unwrap(), - state.writable_bits.try_into().unwrap(), - state.bars.try_into().unwrap(), - state.rom_bar_addr, - state.rom_bar_size, - state.rom_bar_used, - state.last_capability, - state.msix_cap_reg_idx, - ) - } else { - let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; - let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; - registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); - // TODO(dverkamp): Status should be write-1-to-clear - writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) - let pi = if let Some(pi) = programming_interface { - pi.get_register_value() + let (registers, writable_bits, bars, last_capability, msix_cap_reg_idx) = + if let Some(state) = state { + ( + state.registers.try_into().unwrap(), + state.writable_bits.try_into().unwrap(), + state.bars.try_into().unwrap(), + state.last_capability, + state.msix_cap_reg_idx, + ) } else { - 0 - }; - registers[2] = (u32::from(class_code.get_register_value()) << 24) - | (u32::from(subclass.get_register_value()) << 16) - | (u32::from(pi) << 8) - | u32::from(revision_id); - writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) - match header_type { - PciHeaderType::Device => { - registers[3] = 0x0000_0000; // Header type 0 (device) - writable_bits[15] = 0x0000_00ff; // Interrupt line (r/w) - } - PciHeaderType::Bridge => { - registers[3] = 0x0001_0000; // Header type 1 (bridge) - writable_bits[9] = 0xfff0_fff0; // Memory base and limit - writable_bits[15] = 0xffff_00ff; // Bridge control (r/w), interrupt line (r/w) - } + let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; + let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; + registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); + // TODO(dverkamp): Status should be write-1-to-clear + writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) + let pi = if let Some(pi) = programming_interface { + pi.get_register_value() + } else { + 0 + }; + registers[2] = (u32::from(class_code.get_register_value()) << 24) + | (u32::from(subclass.get_register_value()) << 16) + | (u32::from(pi) << 8) + | u32::from(revision_id); + writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) + match header_type { + PciHeaderType::Device => { + registers[3] = 0x0000_0000; // Header type 0 (device) + writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) + } + PciHeaderType::Bridge => { + registers[3] = 0x0001_0000; // Header type 1 (bridge) + writable_bits[9] = 0xfff0_fff0; // Memory base and limit + writable_bits[15] = 0xffff_00ff; // Bridge control (r/w), IRQ line (r/w) + } + }; + registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); + + ( + registers, + writable_bits, + [PciBar::default(); NUM_BAR_REGS], + None, + None, + ) }; - registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); - - ( - registers, - writable_bits, - [PciBar::default(); NUM_BAR_REGS], - 0, - 0, - false, - None, - None, - ) - }; PciConfiguration { registers, writable_bits, bars, - rom_bar_addr, - rom_bar_size, - rom_bar_used, last_capability, msix_cap_reg_idx, msix_config, @@ -544,9 +520,6 @@ impl PciConfiguration { registers: self.registers.to_vec(), writable_bits: self.writable_bits.to_vec(), bars: self.bars.to_vec(), - rom_bar_addr: self.rom_bar_addr, - rom_bar_size: self.rom_bar_size, - rom_bar_used: self.rom_bar_used, last_capability: self.last_capability, msix_cap_reg_idx: self.msix_cap_reg_idx, } @@ -572,7 +545,7 @@ impl PciConfiguration { // all 1's on bits 31-11 to retrieve the BAR size during next BAR // reading. if value & ROM_BAR_ADDR_MASK == ROM_BAR_ADDR_MASK { - mask &= self.rom_bar_size; + mask = 0; } } @@ -1327,4 +1300,18 @@ mod tests { .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) .is_none()); } + + #[test] + fn test_rom_bar() { + let mut pci_config = default_config(); + + // ROM BAR address should always be 0 and writes to it shouldn't do anything + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + pci_config.write_reg(ROM_BAR_REG, 0x42); + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + + // Reading the size of the BAR should always return 0 as well + pci_config.write_reg(ROM_BAR_REG, 0xffff_ffff); + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + } } From 2a4ac6037e37c0a25a7eb011b0a8846c98a45246 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Fri, 5 Sep 2025 12:39:56 +0200 Subject: [PATCH 4/5] refactor(pci): drop code for creating Bridge PCI configuration We only ever create type-0 (device) PCI configurations. The code that was handling Bridge types was effectively dead code. Drop it. Signed-off-by: Babis Chalios --- src/pci/src/configuration.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index a3dbf38e15f..b9607d76611 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -32,7 +32,7 @@ const CAPABILITY_MAX_OFFSET: usize = 192; pub const PCI_CONFIGURATION_ID: &str = "pci_configuration"; /// Represents the types of PCI headers allowed in the configuration registers. -#[derive(Copy, Clone)] +#[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum PciHeaderType { Device, Bridge, @@ -483,17 +483,11 @@ impl PciConfiguration { | (u32::from(pi) << 8) | u32::from(revision_id); writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) - match header_type { - PciHeaderType::Device => { - registers[3] = 0x0000_0000; // Header type 0 (device) - writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) - } - PciHeaderType::Bridge => { - registers[3] = 0x0001_0000; // Header type 1 (bridge) - writable_bits[9] = 0xfff0_fff0; // Memory base and limit - writable_bits[15] = 0xffff_00ff; // Bridge control (r/w), IRQ line (r/w) - } - }; + + // We only every create device types. No bridges used at the moment + assert_eq!(header_type, PciHeaderType::Device); + registers[3] = 0x0000_0000; // Header type 0 (device) + writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); ( From e8b0ef42f83f4c73ebc7716f632f9aa3b6e5ffb6 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Mon, 15 Sep 2025 14:10:51 +0200 Subject: [PATCH 5/5] pci: make VirtIO logging less verbose Every time we were handling an MMIO write within a VirtIO device's BAR we were logging a debug-level message when the write was not leading to a device activation. This leads to way too verbose Firecracker logs up until a device was set up properly. These logs don't really provide any useful debugging information, so just drop it to keep logs readable. Signed-off-by: Babis Chalios --- src/vmm/src/devices/virtio/transport/pci/device.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 120b6094413..9fc5322e86f 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -938,8 +938,6 @@ impl PciDevice for VirtioPciDevice { let _ = interrupt.trigger(VirtioInterruptType::Config); } } - } else { - debug!("Device doesn't need activation"); } // Device has been reset by the driver