Skip to content

Commit 4247837

Browse files
committed
vmclock: add support for VMclock device for x86_64
Implement the VMClock device on x86_64 platforms. At the moment, we just allocate the memory region in the guest address space for exposing the device. We don't expose any clock from the host and since we don't support live migration, the device won't do anything at the moment, but we should still be able to see a `/dev/vmclock` inside the guest. We do support the `disruption_marker` field which notifies the guest to adjust clocks due to a time shifting event. Signed-off-by: Babis Chalios <bchalios@amazon.es>
1 parent 424b2e5 commit 4247837

File tree

7 files changed

+281
-2
lines changed

7 files changed

+281
-2
lines changed

src/vmm/src/builder.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ pub fn build_microvm_for_boot(
256256
)?;
257257

258258
device_manager.attach_vmgenid_device(&vm)?;
259+
#[cfg(target_arch = "x86_64")]
260+
device_manager.attach_vmclock_device(&vm)?;
259261

260262
#[cfg(target_arch = "aarch64")]
261263
if vcpus[0].kvm_vcpu.supports_pvtime() {
@@ -943,6 +945,11 @@ pub(crate) mod tests {
943945
vmm.device_manager.attach_vmgenid_device(&vmm.vm).unwrap();
944946
}
945947

948+
#[cfg(target_arch = "x86_64")]
949+
pub(crate) fn insert_vmclock_device(vmm: &mut Vmm) {
950+
vmm.device_manager.attach_vmclock_device(&vmm.vm).unwrap();
951+
}
952+
946953
pub(crate) fn insert_balloon_device(
947954
vmm: &mut Vmm,
948955
cmdline: &mut Cmdline,

src/vmm/src/device_manager/acpi.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use acpi_tables::{Aml, aml};
55
use vm_memory::GuestMemoryError;
66

77
use crate::Vm;
8+
#[cfg(target_arch = "x86_64")]
9+
use crate::devices::acpi::vmclock::VmClock;
810
use crate::devices::acpi::vmgenid::VmGenId;
911
use crate::vstate::resources::ResourceAllocator;
1012

@@ -20,26 +22,41 @@ pub enum ACPIDeviceError {
2022
pub struct ACPIDeviceManager {
2123
/// VMGenID device
2224
pub vmgenid: VmGenId,
25+
/// VMclock device
26+
#[cfg(target_arch = "x86_64")]
27+
pub vmclock: VmClock,
2328
}
2429

2530
impl ACPIDeviceManager {
2631
/// Create a new ACPIDeviceManager object
2732
pub fn new(resource_allocator: &mut ResourceAllocator) -> Self {
28-
let vmgenid = VmGenId::new(resource_allocator);
29-
ACPIDeviceManager { vmgenid }
33+
ACPIDeviceManager {
34+
vmgenid: VmGenId::new(resource_allocator),
35+
#[cfg(target_arch = "x86_64")]
36+
vmclock: VmClock::new(resource_allocator),
37+
}
3038
}
3139

3240
pub fn attach_vmgenid(&self, vm: &Vm) -> Result<(), ACPIDeviceError> {
3341
vm.register_irq(&self.vmgenid.interrupt_evt, self.vmgenid.gsi)?;
3442
self.vmgenid.activate(vm.guest_memory())?;
3543
Ok(())
3644
}
45+
46+
#[cfg(target_arch = "x86_64")]
47+
pub fn attach_vmclock(&self, vm: &Vm) -> Result<(), ACPIDeviceError> {
48+
self.vmclock.activate(vm.guest_memory())?;
49+
Ok(())
50+
}
3751
}
3852

3953
impl Aml for ACPIDeviceManager {
4054
fn append_aml_bytes(&self, v: &mut Vec<u8>) -> Result<(), aml::AmlError> {
4155
// AML for [`VmGenId`] device.
4256
self.vmgenid.append_aml_bytes(v)?;
57+
// AML for [`VmClock`] device.
58+
#[cfg(target_arch = "x86_64")]
59+
self.vmclock.append_aml_bytes(v)?;
4360

4461
// Create the AML for the GED interrupt handler
4562
aml::Device::new(

src/vmm/src/device_manager/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,12 @@ impl DeviceManager {
237237
Ok(())
238238
}
239239

240+
#[cfg(target_arch = "x86_64")]
241+
pub(crate) fn attach_vmclock_device(&mut self, vm: &Vm) -> Result<(), AttachDeviceError> {
242+
self.acpi_devices.attach_vmclock(vm)?;
243+
Ok(())
244+
}
245+
240246
#[cfg(target_arch = "aarch64")]
241247
pub(crate) fn attach_legacy_devices_aarch64(
242248
&mut self,

src/vmm/src/device_manager/persist.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ use super::mmio::*;
1515
#[cfg(target_arch = "aarch64")]
1616
use crate::arch::DeviceType;
1717
use crate::device_manager::acpi::ACPIDeviceError;
18+
#[cfg(target_arch = "x86_64")]
19+
use crate::devices::acpi::vmclock::{VmClock, VmClockState};
1820
use crate::devices::acpi::vmgenid::{VMGenIDState, VmGenId};
1921
#[cfg(target_arch = "aarch64")]
2022
use crate::devices::legacy::RTCDevice;
@@ -158,6 +160,8 @@ impl fmt::Debug for MMIODevManagerConstructorArgs<'_> {
158160
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
159161
pub struct ACPIDeviceManagerState {
160162
vmgenid: VMGenIDState,
163+
#[cfg(target_arch = "x86_64")]
164+
vmclock: VmClockState,
161165
}
162166

163167
impl<'a> Persist<'a> for ACPIDeviceManager {
@@ -168,13 +172,18 @@ impl<'a> Persist<'a> for ACPIDeviceManager {
168172
fn save(&self) -> Self::State {
169173
ACPIDeviceManagerState {
170174
vmgenid: self.vmgenid.save(),
175+
#[cfg(target_arch = "x86_64")]
176+
vmclock: self.vmclock.save(),
171177
}
172178
}
173179

174180
fn restore(vm: Self::ConstructorArgs, state: &Self::State) -> Result<Self, Self::Error> {
175181
let acpi_devices = ACPIDeviceManager {
176182
// This can't fail
177183
vmgenid: VmGenId::restore(vm.guest_memory(), &state.vmgenid).unwrap(),
184+
// This can't fail
185+
#[cfg(target_arch = "x86_64")]
186+
vmclock: VmClock::restore(vm.guest_memory(), &state.vmclock).unwrap(),
178187
};
179188

180189
acpi_devices.attach_vmgenid(vm)?;

src/vmm/src/devices/acpi/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
mod generated;
5+
pub mod vmclock;
56
pub mod vmgenid;
Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use std::mem::offset_of;
5+
use std::sync::atomic::{Ordering, fence};
6+
7+
use acpi_tables::{Aml, aml};
8+
use log::error;
9+
use serde::{Deserialize, Serialize};
10+
use vm_allocator::AllocPolicy;
11+
use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryError};
12+
13+
use crate::devices::acpi::generated::vmclock_abi::{
14+
VMCLOCK_COUNTER_INVALID, VMCLOCK_MAGIC, VMCLOCK_STATUS_UNKNOWN, vmclock_abi,
15+
};
16+
use crate::snapshot::Persist;
17+
use crate::vstate::memory::GuestMemoryMmap;
18+
use crate::vstate::resources::ResourceAllocator;
19+
20+
// SAFETY: `vmclock_abi` is a POD
21+
unsafe impl ByteValued for vmclock_abi {}
22+
23+
// We are reserving a physical page to expose the [`VmClock`] data
24+
const VMCLOCK_SIZE: u32 = 0x1000;
25+
26+
// Write a value in `vmclock_abi` both in the Firecracker-managed state
27+
// and inside guest memory address that corresponds to it.
28+
macro_rules! write_vmclock_field {
29+
($vmclock:expr, $mem:expr, $field:ident, $value:expr) => {
30+
$vmclock.inner.$field = $value;
31+
$mem.write_obj(
32+
$vmclock.inner.$field,
33+
$vmclock
34+
.guest_address
35+
.unchecked_add(offset_of!(vmclock_abi, $field) as u64),
36+
);
37+
};
38+
}
39+
40+
/// VMclock device
41+
///
42+
/// This device emulates the VMclock device which allows passing information to the guest related
43+
/// to the relation of the host CPU to real-time clock as well as information about disruptive
44+
/// events, such as live-migration.
45+
#[derive(Debug)]
46+
pub struct VmClock {
47+
/// Guest address in which we will write the VMclock struct
48+
pub guest_address: GuestAddress,
49+
/// The [`VmClock`] state we are exposing to the guest
50+
inner: vmclock_abi,
51+
}
52+
53+
impl VmClock {
54+
/// Create a new [`VmClock`] device for a newly booted VM
55+
pub fn new(resource_allocator: &mut ResourceAllocator) -> VmClock {
56+
let addr = resource_allocator
57+
.allocate_system_memory(
58+
VMCLOCK_SIZE as u64,
59+
VMCLOCK_SIZE as u64,
60+
AllocPolicy::LastMatch,
61+
)
62+
.inspect_err(|err| error!("vmclock: could not allocate guest memory for device: {err}"))
63+
.unwrap();
64+
65+
let mut inner = vmclock_abi {
66+
magic: VMCLOCK_MAGIC,
67+
size: VMCLOCK_SIZE,
68+
version: u16::to_le(1),
69+
clock_status: VMCLOCK_STATUS_UNKNOWN,
70+
counter_id: VMCLOCK_COUNTER_INVALID,
71+
..Default::default()
72+
};
73+
74+
VmClock {
75+
guest_address: GuestAddress(addr),
76+
inner,
77+
}
78+
}
79+
80+
/// Activate [`VmClock`] device
81+
pub fn activate(&self, mem: &GuestMemoryMmap) -> Result<(), GuestMemoryError> {
82+
mem.write_slice(self.inner.as_slice(), self.guest_address)?;
83+
Ok(())
84+
}
85+
86+
/// Bump the VM generation counter
87+
pub fn post_load_update(&mut self, mem: &GuestMemoryMmap) {
88+
write_vmclock_field!(self, mem, seq_count, self.inner.seq_count | 1);
89+
90+
// This fence ensures guest sees all previous writes. It is matched to a
91+
// read barrier in the guest.
92+
fence(Ordering::Release);
93+
94+
write_vmclock_field!(
95+
self,
96+
mem,
97+
disruption_marker,
98+
self.inner.disruption_marker.wrapping_add(1)
99+
);
100+
101+
// This fence ensures guest sees the `disruption_marker` update. It is matched to a
102+
// read barrier in the guest.
103+
fence(Ordering::Release);
104+
105+
write_vmclock_field!(self, mem, seq_count, self.inner.seq_count.wrapping_add(1));
106+
}
107+
}
108+
109+
/// (De)serialize-able state of the [`VmClock`]
110+
///
111+
/// We could avoid this and reuse [`VmClock`] itself if `GuestAddress` was `Serialize`/`Deserialize`
112+
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
113+
pub struct VmClockState {
114+
/// Guest address in which we write the [`VmClock`] info
115+
pub guest_address: u64,
116+
/// Data we expose to the guest
117+
pub inner: vmclock_abi,
118+
}
119+
120+
impl<'a> Persist<'a> for VmClock {
121+
type State = VmClockState;
122+
type ConstructorArgs = &'a GuestMemoryMmap;
123+
type Error = GuestMemoryError;
124+
125+
fn save(&self) -> Self::State {
126+
VmClockState {
127+
guest_address: self.guest_address.0,
128+
inner: self.inner,
129+
}
130+
}
131+
132+
fn restore(
133+
constructor_args: Self::ConstructorArgs,
134+
state: &Self::State,
135+
) -> Result<Self, Self::Error> {
136+
let mut vmclock = VmClock {
137+
guest_address: GuestAddress(state.guest_address),
138+
inner: state.inner,
139+
};
140+
vmclock.post_load_update(constructor_args);
141+
Ok(vmclock)
142+
}
143+
}
144+
145+
impl Aml for VmClock {
146+
fn append_aml_bytes(&self, v: &mut Vec<u8>) -> Result<(), aml::AmlError> {
147+
#[allow(clippy::cast_possible_truncation)]
148+
let addr_low = self.guest_address.0 as u32;
149+
let addr_high = (self.guest_address.0 >> 32) as u32;
150+
aml::Device::new(
151+
"_SB_.VCLK".try_into()?,
152+
vec![
153+
&aml::Name::new("_HID".try_into()?, &"AMZNC10C")?,
154+
&aml::Name::new("_CID".try_into()?, &"VMCLOCK")?,
155+
&aml::Name::new("_DDN".try_into()?, &"VMCLOCK")?,
156+
&aml::Method::new(
157+
"_STA".try_into()?,
158+
0,
159+
false,
160+
vec![&aml::Return::new(&0x0fu8)],
161+
),
162+
&aml::Name::new(
163+
"_CRS".try_into()?,
164+
&aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory(
165+
aml::AddressSpaceCacheable::Cacheable,
166+
false,
167+
self.guest_address.0,
168+
self.guest_address.0 + VMCLOCK_SIZE as u64 - 1,
169+
)?]),
170+
)?,
171+
],
172+
)
173+
.append_aml_bytes(v)
174+
}
175+
}
176+
177+
#[cfg(test)]
178+
mod tests {
179+
use vm_memory::{Bytes, GuestAddress};
180+
181+
use crate::arch;
182+
use crate::devices::acpi::generated::vmclock_abi::vmclock_abi;
183+
use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock};
184+
use crate::snapshot::Persist;
185+
use crate::test_utils::single_region_mem;
186+
use crate::utils::u64_to_usize;
187+
use crate::vstate::resources::ResourceAllocator;
188+
189+
// We are allocating memory from the end of the system memory portion
190+
const VMCLOCK_TEST_GUEST_ADDR: GuestAddress =
191+
GuestAddress(arch::SYSTEM_MEM_START + arch::SYSTEM_MEM_SIZE - VMCLOCK_SIZE as u64);
192+
193+
fn default_vmclock() -> VmClock {
194+
let mut resource_allocator = ResourceAllocator::new();
195+
VmClock::new(&mut resource_allocator)
196+
}
197+
198+
#[test]
199+
fn test_new_device() {
200+
let vmclock = default_vmclock();
201+
let mem = single_region_mem(
202+
u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE),
203+
);
204+
205+
let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
206+
assert_ne!(guest_data, vmclock.inner);
207+
208+
vmclock.activate(&mem);
209+
210+
let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
211+
assert_eq!(guest_data, vmclock.inner);
212+
}
213+
214+
#[test]
215+
fn test_device_save_restore() {
216+
let vmclock = default_vmclock();
217+
let mem = single_region_mem(
218+
u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE),
219+
);
220+
221+
vmclock.activate(&mem).unwrap();
222+
let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
223+
224+
let state = vmclock.save();
225+
let vmclock_new = VmClock::restore(&mem, &state).unwrap();
226+
227+
let guest_data_new: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap();
228+
assert_ne!(guest_data_new, vmclock.inner);
229+
assert_eq!(guest_data_new, vmclock_new.inner);
230+
assert_eq!(
231+
vmclock.inner.disruption_marker + 1,
232+
vmclock_new.inner.disruption_marker
233+
);
234+
}
235+
}

src/vmm/src/persist.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,8 @@ mod tests {
576576
use super::*;
577577
use crate::Vmm;
578578
#[cfg(target_arch = "x86_64")]
579+
use crate::builder::tests::insert_vmclock_device;
580+
#[cfg(target_arch = "x86_64")]
579581
use crate::builder::tests::insert_vmgenid_device;
580582
use crate::builder::tests::{
581583
CustomBlockConfig, default_kernel_cmdline, default_vmm, insert_balloon_device,
@@ -638,6 +640,8 @@ mod tests {
638640

639641
#[cfg(target_arch = "x86_64")]
640642
insert_vmgenid_device(&mut vmm);
643+
#[cfg(target_arch = "x86_64")]
644+
insert_vmclock_device(&mut vmm);
641645

642646
vmm
643647
}

0 commit comments

Comments
 (0)