|
| 1 | +// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +use std::mem::offset_of; |
| 5 | +use std::sync::atomic::{Ordering, fence}; |
| 6 | + |
| 7 | +use acpi_tables::{Aml, aml}; |
| 8 | +use log::error; |
| 9 | +use serde::{Deserialize, Serialize}; |
| 10 | +use vm_allocator::AllocPolicy; |
| 11 | +use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryError}; |
| 12 | + |
| 13 | +use crate::devices::acpi::generated::vmclock_abi::{ |
| 14 | + VMCLOCK_COUNTER_INVALID, VMCLOCK_MAGIC, VMCLOCK_STATUS_UNKNOWN, vmclock_abi, |
| 15 | +}; |
| 16 | +use crate::snapshot::Persist; |
| 17 | +use crate::vstate::memory::GuestMemoryMmap; |
| 18 | +use crate::vstate::resources::ResourceAllocator; |
| 19 | + |
| 20 | +// SAFETY: `vmclock_abi` is a POD |
| 21 | +unsafe impl ByteValued for vmclock_abi {} |
| 22 | + |
| 23 | +// We are reserving a physical page to expose the [`VmClock`] data |
| 24 | +const VMCLOCK_SIZE: u32 = 0x1000; |
| 25 | + |
| 26 | +// Write a value in `vmclock_abi` both in the Firecracker-managed state |
| 27 | +// and inside guest memory address that corresponds to it. |
| 28 | +macro_rules! write_vmclock_field { |
| 29 | + ($vmclock:expr, $mem:expr, $field:ident, $value:expr) => { |
| 30 | + $vmclock.inner.$field = $value; |
| 31 | + $mem.write_obj( |
| 32 | + $vmclock.inner.$field, |
| 33 | + $vmclock |
| 34 | + .guest_address |
| 35 | + .unchecked_add(offset_of!(vmclock_abi, $field) as u64), |
| 36 | + ); |
| 37 | + }; |
| 38 | +} |
| 39 | + |
| 40 | +/// VMclock device |
| 41 | +/// |
| 42 | +/// This device emulates the VMclock device which allows passing information to the guest related |
| 43 | +/// to the relation of the host CPU to real-time clock as well as information about disruptive |
| 44 | +/// events, such as live-migration. |
| 45 | +#[derive(Debug)] |
| 46 | +pub struct VmClock { |
| 47 | + /// Guest address in which we will write the VMclock struct |
| 48 | + pub guest_address: GuestAddress, |
| 49 | + /// The [`VmClock`] state we are exposing to the guest |
| 50 | + inner: vmclock_abi, |
| 51 | +} |
| 52 | + |
| 53 | +impl VmClock { |
| 54 | + /// Create a new [`VmClock`] device for a newly booted VM |
| 55 | + pub fn new(resource_allocator: &mut ResourceAllocator) -> VmClock { |
| 56 | + let addr = resource_allocator |
| 57 | + .allocate_system_memory( |
| 58 | + VMCLOCK_SIZE as u64, |
| 59 | + VMCLOCK_SIZE as u64, |
| 60 | + AllocPolicy::LastMatch, |
| 61 | + ) |
| 62 | + .inspect_err(|err| error!("vmclock: could not allocate guest memory for device: {err}")) |
| 63 | + .unwrap(); |
| 64 | + |
| 65 | + let mut inner = vmclock_abi { |
| 66 | + magic: VMCLOCK_MAGIC, |
| 67 | + size: VMCLOCK_SIZE, |
| 68 | + version: u16::to_le(1), |
| 69 | + clock_status: VMCLOCK_STATUS_UNKNOWN, |
| 70 | + counter_id: VMCLOCK_COUNTER_INVALID, |
| 71 | + ..Default::default() |
| 72 | + }; |
| 73 | + |
| 74 | + VmClock { |
| 75 | + guest_address: GuestAddress(addr), |
| 76 | + inner, |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + /// Activate [`VmClock`] device |
| 81 | + pub fn activate(&self, mem: &GuestMemoryMmap) -> Result<(), GuestMemoryError> { |
| 82 | + mem.write_slice(self.inner.as_slice(), self.guest_address)?; |
| 83 | + Ok(()) |
| 84 | + } |
| 85 | + |
| 86 | + /// Bump the VM generation counter |
| 87 | + pub fn post_load_update(&mut self, mem: &GuestMemoryMmap) { |
| 88 | + write_vmclock_field!(self, mem, seq_count, self.inner.seq_count | 1); |
| 89 | + |
| 90 | + // This fence ensures guest sees all previous writes. It is matched to a |
| 91 | + // read barrier in the guest. |
| 92 | + fence(Ordering::Release); |
| 93 | + |
| 94 | + write_vmclock_field!( |
| 95 | + self, |
| 96 | + mem, |
| 97 | + disruption_marker, |
| 98 | + self.inner.disruption_marker.wrapping_add(1) |
| 99 | + ); |
| 100 | + |
| 101 | + // This fence ensures guest sees the `disruption_marker` update. It is matched to a |
| 102 | + // read barrier in the guest. |
| 103 | + fence(Ordering::Release); |
| 104 | + |
| 105 | + write_vmclock_field!(self, mem, seq_count, self.inner.seq_count.wrapping_add(1)); |
| 106 | + } |
| 107 | +} |
| 108 | + |
| 109 | +/// (De)serialize-able state of the [`VmClock`] |
| 110 | +/// |
| 111 | +/// We could avoid this and reuse [`VmClock`] itself if `GuestAddress` was `Serialize`/`Deserialize` |
| 112 | +#[derive(Default, Debug, Clone, Serialize, Deserialize)] |
| 113 | +pub struct VmClockState { |
| 114 | + /// Guest address in which we write the [`VmClock`] info |
| 115 | + pub guest_address: u64, |
| 116 | + /// Data we expose to the guest |
| 117 | + pub inner: vmclock_abi, |
| 118 | +} |
| 119 | + |
| 120 | +impl<'a> Persist<'a> for VmClock { |
| 121 | + type State = VmClockState; |
| 122 | + type ConstructorArgs = &'a GuestMemoryMmap; |
| 123 | + type Error = GuestMemoryError; |
| 124 | + |
| 125 | + fn save(&self) -> Self::State { |
| 126 | + VmClockState { |
| 127 | + guest_address: self.guest_address.0, |
| 128 | + inner: self.inner, |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + fn restore( |
| 133 | + constructor_args: Self::ConstructorArgs, |
| 134 | + state: &Self::State, |
| 135 | + ) -> Result<Self, Self::Error> { |
| 136 | + let mut vmclock = VmClock { |
| 137 | + guest_address: GuestAddress(state.guest_address), |
| 138 | + inner: state.inner, |
| 139 | + }; |
| 140 | + vmclock.post_load_update(constructor_args); |
| 141 | + Ok(vmclock) |
| 142 | + } |
| 143 | +} |
| 144 | + |
| 145 | +impl Aml for VmClock { |
| 146 | + fn append_aml_bytes(&self, v: &mut Vec<u8>) -> Result<(), aml::AmlError> { |
| 147 | + #[allow(clippy::cast_possible_truncation)] |
| 148 | + let addr_low = self.guest_address.0 as u32; |
| 149 | + let addr_high = (self.guest_address.0 >> 32) as u32; |
| 150 | + aml::Device::new( |
| 151 | + "_SB_.VCLK".try_into()?, |
| 152 | + vec![ |
| 153 | + &aml::Name::new("_HID".try_into()?, &"AMZNC10C")?, |
| 154 | + &aml::Name::new("_CID".try_into()?, &"VMCLOCK")?, |
| 155 | + &aml::Name::new("_DDN".try_into()?, &"VMCLOCK")?, |
| 156 | + &aml::Method::new( |
| 157 | + "_STA".try_into()?, |
| 158 | + 0, |
| 159 | + false, |
| 160 | + vec![&aml::Return::new(&0x0fu8)], |
| 161 | + ), |
| 162 | + &aml::Name::new( |
| 163 | + "_CRS".try_into()?, |
| 164 | + &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( |
| 165 | + aml::AddressSpaceCacheable::Cacheable, |
| 166 | + false, |
| 167 | + self.guest_address.0, |
| 168 | + self.guest_address.0 + VMCLOCK_SIZE as u64 - 1, |
| 169 | + )?]), |
| 170 | + )?, |
| 171 | + ], |
| 172 | + ) |
| 173 | + .append_aml_bytes(v) |
| 174 | + } |
| 175 | +} |
| 176 | + |
| 177 | +#[cfg(test)] |
| 178 | +mod tests { |
| 179 | + use vm_memory::{Bytes, GuestAddress}; |
| 180 | + |
| 181 | + use crate::arch; |
| 182 | + use crate::devices::acpi::generated::vmclock_abi::vmclock_abi; |
| 183 | + use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock}; |
| 184 | + use crate::snapshot::Persist; |
| 185 | + use crate::test_utils::single_region_mem; |
| 186 | + use crate::utils::u64_to_usize; |
| 187 | + use crate::vstate::resources::ResourceAllocator; |
| 188 | + |
| 189 | + // We are allocating memory from the end of the system memory portion |
| 190 | + const VMCLOCK_TEST_GUEST_ADDR: GuestAddress = |
| 191 | + GuestAddress(arch::SYSTEM_MEM_START + arch::SYSTEM_MEM_SIZE - VMCLOCK_SIZE as u64); |
| 192 | + |
| 193 | + fn default_vmclock() -> VmClock { |
| 194 | + let mut resource_allocator = ResourceAllocator::new(); |
| 195 | + VmClock::new(&mut resource_allocator) |
| 196 | + } |
| 197 | + |
| 198 | + #[test] |
| 199 | + fn test_new_device() { |
| 200 | + let vmclock = default_vmclock(); |
| 201 | + let mem = single_region_mem( |
| 202 | + u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE), |
| 203 | + ); |
| 204 | + |
| 205 | + let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); |
| 206 | + assert_ne!(guest_data, vmclock.inner); |
| 207 | + |
| 208 | + vmclock.activate(&mem); |
| 209 | + |
| 210 | + let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); |
| 211 | + assert_eq!(guest_data, vmclock.inner); |
| 212 | + } |
| 213 | + |
| 214 | + #[test] |
| 215 | + fn test_device_save_restore() { |
| 216 | + let vmclock = default_vmclock(); |
| 217 | + let mem = single_region_mem( |
| 218 | + u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE), |
| 219 | + ); |
| 220 | + |
| 221 | + vmclock.activate(&mem).unwrap(); |
| 222 | + let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); |
| 223 | + |
| 224 | + let state = vmclock.save(); |
| 225 | + let vmclock_new = VmClock::restore(&mem, &state).unwrap(); |
| 226 | + |
| 227 | + let guest_data_new: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); |
| 228 | + assert_ne!(guest_data_new, vmclock.inner); |
| 229 | + assert_eq!(guest_data_new, vmclock_new.inner); |
| 230 | + assert_eq!( |
| 231 | + vmclock.inner.disruption_marker + 1, |
| 232 | + vmclock_new.inner.disruption_marker |
| 233 | + ); |
| 234 | + } |
| 235 | +} |
0 commit comments