diff --git a/applications/tests/test_dvfs/Cargo.toml b/applications/tests/test_dvfs/Cargo.toml index 048f5ebb3..947b3fc6e 100644 --- a/applications/tests/test_dvfs/Cargo.toml +++ b/applications/tests/test_dvfs/Cargo.toml @@ -5,6 +5,9 @@ edition = "2024" [dependencies] log = "0.4" +array-macro = "2.1" +serde_json = { version = "1.0", default-features = false, features = ["alloc"] } +num-traits = { version = "0.2", default-features = false } [dependencies.awkernel_async_lib] path = "../../../awkernel_async_lib" diff --git a/applications/tests/test_dvfs/src/lib.rs b/applications/tests/test_dvfs/src/lib.rs index 74beface1..d5811bc02 100644 --- a/applications/tests/test_dvfs/src/lib.rs +++ b/applications/tests/test_dvfs/src/lib.rs @@ -1,62 +1,241 @@ #![no_std] -use core::time::Duration; +use core::{ + sync::atomic::{AtomicU64, AtomicUsize, Ordering, fence}, + time::Duration, +}; + +use alloc::{format, vec::Vec}; +use array_macro::array; +use awkernel_lib::{ + dvfs::DesiredPerformance, + sync::{mcs::MCSNode, mutex::Mutex}, +}; extern crate alloc; -const APP_NAME: &str = "test DVFS"; +mod nbody; -const NUM_LOOP: usize = 1000000; +const NUM_CPU: usize = 14; +const NUM_TRIALS_LATENCY: usize = 100; +const NUM_BUSY_LOOP: usize = 1000000000; + +static LATENCY: [[[AtomicU64; NUM_TRIALS_LATENCY]; 11]; NUM_CPU] = + array![_ => array![_ => array![_ => AtomicU64::new(0); NUM_TRIALS_LATENCY]; 11]; NUM_CPU]; + +static COUNT: [[AtomicUsize; 11]; NUM_CPU] = + array![_ => array![_ => AtomicUsize::new(0); 11]; NUM_CPU]; +static TOTAL_COUNT: AtomicUsize = AtomicUsize::new(0); pub async fn run() { - awkernel_async_lib::spawn( - APP_NAME.into(), - test_dvfs(), - awkernel_async_lib::scheduler::SchedulerType::FIFO, - ) - .await; + let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2); + + for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) { + let w = awkernel_async_lib::spawn( + "test_latency_diff".into(), + test_latency_diff(), + awkernel_async_lib::scheduler::SchedulerType::FIFO, + ) + .await; + + waiter.push(w); + } + + for w in waiter { + let _ = w.join().await; + } + + let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2); + + for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) { + let w = awkernel_async_lib::spawn( + "test_latency".into(), + test_latency(), + awkernel_async_lib::scheduler::SchedulerType::FIFO, + ) + .await; + + waiter.push(w); + } + + for w in waiter { + let _ = w.join().await; + } } -async fn test_dvfs() { - loop { - let max = awkernel_lib::dvfs::get_max_freq(); - let cpuid = awkernel_lib::cpu::cpu_id(); +async fn test_latency() { + let end_count = (awkernel_lib::cpu::num_cpu() - 1) * NUM_TRIALS_LATENCY * 11; + + while TOTAL_COUNT.load(Ordering::Relaxed) + 1 < end_count { + let cpu_id = awkernel_lib::cpu::cpu_id(); + + for i in 0..=10 { + awkernel_lib::dvfs::set_min_max_performance(10 * i); + awkernel_lib::dvfs::set_energy_efficiency(0); + awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto); + + warm_up(); + + let elapsed = workload(); + + log::debug!( + "CPU {cpu_id}: Performance {}: Elapsed: {} [us]", + i * 10, + elapsed.as_micros() + ); + + let count = + COUNT[cpu_id][i as usize].fetch_add(1, core::sync::atomic::Ordering::Relaxed); + if count < NUM_TRIALS_LATENCY { + LATENCY[cpu_id][i as usize][count].store( + elapsed.as_micros() as u64, + core::sync::atomic::Ordering::Relaxed, + ); + + let total_count = TOTAL_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed); + + log::debug!("progress: {total_count} / {end_count}"); + + if total_count + 1 == end_count { + print_latency(); + } + } + } + + awkernel_async_lib::r#yield().await; + } +} - // Maximum frequency. - awkernel_lib::dvfs::fix_freq(max); +fn warm_up() { + for _ in 0..(NUM_BUSY_LOOP) { + core::hint::black_box(()); + } +} - let start = awkernel_async_lib::time::Time::now(); +fn workload() -> Duration { + let t = awkernel_async_lib::time::Time::now(); + nbody::simulate(); + t.elapsed() +} - for _ in 0..NUM_LOOP { - core::hint::black_box(()); +fn print_latency() { + let mut result: [[Vec; 11]; NUM_CPU] = + array![_ => array![_ => Vec::with_capacity(NUM_TRIALS_LATENCY); 11]; NUM_CPU]; + + for (j, latency_cpu) in LATENCY.iter().enumerate() { + for (k, latency) in latency_cpu.iter().enumerate() { + let mut sum = 0; + let mut min = u64::MAX; + let mut max = 0; + for usec in latency.iter() { + let val = usec.load(core::sync::atomic::Ordering::Relaxed); + if min > val { + min = val; + } + if max < val { + max = val; + } + sum += val; + + result[j][k].push(val); + } + let avg = sum / NUM_TRIALS_LATENCY as u64; + + let msg = format!( + "CPU {j}: Performance {}: Average: {avg} us, Min: {min} us, Max: {max} us\r\n", + k * 10 + ); + awkernel_lib::console::print(&msg); } + } - let t = start.elapsed(); + let result_json = serde_json::to_string(&result).unwrap(); + let result_str = format!("{result_json}\r\n"); + awkernel_lib::console::print(&result_str); +} - let current = awkernel_lib::dvfs::get_curr_freq(); +const NUM_TRIALS_LATENCY_DIFF: usize = 20; +static FREQ_LATENCY: [[Mutex>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] = + array![_ => array![_ => Mutex::new(Vec::new()); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU]; +static TOTAL_COUNT_LATENCY_DIFF: AtomicUsize = AtomicUsize::new(0); +static N: usize = 500; - log::debug!( - "cpuid = {cpuid}, max = {max}, current = {current}, expected = {max}, time = {t:?}" - ); +async fn test_latency_diff() { + loop { + awkernel_lib::dvfs::set_min_max_performance(10); + awkernel_lib::dvfs::set_energy_efficiency(0); + awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto); + + workload(); + + let mut diff = Vec::with_capacity(N); + + awkernel_lib::dvfs::set_min_max_performance(100); + awkernel_lib::dvfs::set_energy_efficiency(0); + awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto); + + let t = awkernel_async_lib::time::Time::now(); + for _ in 0..N { + let start = unsafe { core::arch::x86_64::_rdtsc() }; + fence(Ordering::AcqRel); + for _ in 0..1000 { + core::hint::black_box(()); + } + fence(Ordering::AcqRel); + let end = unsafe { core::arch::x86_64::_rdtsc() }; + diff.push((t.elapsed(), (end - start) as i64)); + } - // Maximum / 2 frequency. - awkernel_lib::dvfs::fix_freq(max / 2); + let mut result = Vec::with_capacity(diff.len()); - let start = awkernel_async_lib::time::Time::now(); + for (t, d) in diff.iter() { + result.push((t.as_nanos() as u64, *d)); + } - for _ in 0..NUM_LOOP { - core::hint::black_box(()); + let cpu_id = awkernel_lib::cpu::cpu_id(); + for (i, r) in FREQ_LATENCY[cpu_id].iter().enumerate() { + let mut node = MCSNode::new(); + let mut guard = r.lock(&mut node); + if guard.is_empty() { + *guard = result; + drop(guard); + + let old_total = TOTAL_COUNT_LATENCY_DIFF.fetch_add(1, Ordering::Relaxed); + + log::debug!("{cpu_id}: {i}, {old_total}"); + + if old_total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF - 1 { + print_latency_diff(); + } + + break; + } } - let t = start.elapsed(); + let total = TOTAL_COUNT_LATENCY_DIFF.load(Ordering::Relaxed); - let current = awkernel_lib::dvfs::get_curr_freq(); + if total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF { + break; + } - log::debug!( - "cpuid = {cpuid}, max = {max}, current = {current}, expected = {}, time = {t:?}", - max / 2 - ); + awkernel_async_lib::r#yield().await; + } +} + +fn print_latency_diff() { + let mut result: [[Vec<(u64, i64)>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] = + array![_ => array![_ => Vec::new(); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU]; + + for (dst, src) in result.iter_mut().zip(FREQ_LATENCY.iter()) { + for (dst, src) in dst.iter_mut().zip(src.iter()) { + let mut node = MCSNode::new(); + let guard = src.lock(&mut node); - awkernel_async_lib::sleep(Duration::from_secs(1)).await; + *dst = guard.clone(); + } } + + let result_json = serde_json::to_string(&result).unwrap(); + let result_str = format!("{result_json}\r\n"); + awkernel_lib::console::print(&result_str); } diff --git a/applications/tests/test_dvfs/src/nbody.rs b/applications/tests/test_dvfs/src/nbody.rs new file mode 100644 index 000000000..836044571 --- /dev/null +++ b/applications/tests/test_dvfs/src/nbody.rs @@ -0,0 +1,102 @@ +use alloc::{vec, vec::Vec}; +use num_traits::float::Float; + +#[derive(Clone, Copy, Debug)] +struct Body { + x: f64, + y: f64, + vx: f64, + vy: f64, + mass: f64, +} + +impl Body { + fn update_velocity(&mut self, fx: f64, fy: f64, dt: f64) { + self.vx += fx / self.mass * dt; + self.vy += fy / self.mass * dt; + } + + fn update_position(&mut self, dt: f64) { + self.x += self.vx * dt; + self.y += self.vy * dt; + } +} + +fn compute_force(a: &Body, b: &Body, g: f64, eps: f64) -> (f64, f64) { + let dx = b.x - a.x; + let dy = b.y - a.y; + let dist_sq = dx * dx + dy * dy + eps * eps; // softening + let dist = dist_sq.sqrt(); + let f = g * a.mass * b.mass / dist_sq; + let fx = f * dx / dist; + let fy = f * dy / dist; + (fx, fy) +} + +fn nbody_step(bodies: &mut [Body], g: f64, dt: f64, eps: f64) { + let n = bodies.len(); + let mut forces = vec![(0.0, 0.0); n]; + + for i in 0..n { + for j in 0..n { + if i != j { + let (fx, fy) = compute_force(&bodies[i], &bodies[j], g, eps); + forces[i].0 += fx; + forces[i].1 += fy; + } + } + } + + for i in 0..n { + bodies[i].update_velocity(forces[i].0, forces[i].1, dt); + bodies[i].update_position(dt); + } +} + +pub fn simulate() { + const N: usize = 5000; + const STEPS: usize = 2; + const G: f64 = 6.67430e-11; + const DT: f64 = 0.1; + const EPS: f64 = 1e-3; + + let mut rnd = XorShift64::new(0x12345678); // 乱数生成器の初期化 + + // 初期化:ランダムにばら撒く(実用では乱数を使ってもよい) + let mut bodies = (0..N) + .map(|_| Body { + x: rnd.next_f64(), + y: rnd.next_f64(), + vx: 0.0, + vy: 0.0, + mass: rnd.next_f64(), + }) + .collect::>(); + + for _ in 0..STEPS { + nbody_step(&mut bodies, G, DT, EPS); + } +} + +pub struct XorShift64 { + state: u64, +} + +impl XorShift64 { + pub fn new(seed: u64) -> Self { + Self { state: seed } + } + + pub fn next(&mut self) -> u64 { + let mut x = self.state; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + self.state = x; + x + } + + pub fn next_f64(&mut self) -> f64 { + (self.next() as f64) / (u64::MAX as f64) + } +} diff --git a/awkernel_lib/src/arch/aarch64/dvfs.rs b/awkernel_lib/src/arch/aarch64/dvfs.rs index 63332b7d8..05710d5b4 100644 --- a/awkernel_lib/src/arch/aarch64/dvfs.rs +++ b/awkernel_lib/src/arch/aarch64/dvfs.rs @@ -2,21 +2,4 @@ use crate::dvfs::Dvfs; use super::AArch64; -impl Dvfs for AArch64 { - /// Fix the frequency of the current CPU. - fn fix_freq(_freq: u64) { - // TODO: Implement this. - } - - /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64 { - // TODO: Implement this. - 0 - } - - /// Get the current frequency of the current CPU. - fn get_curr_freq() -> u64 { - // TODO: Implement this. - 0 - } -} +impl Dvfs for AArch64 {} diff --git a/awkernel_lib/src/arch/rv32/dvfs.rs b/awkernel_lib/src/arch/rv32/dvfs.rs index 42059b9bb..7c57905a1 100644 --- a/awkernel_lib/src/arch/rv32/dvfs.rs +++ b/awkernel_lib/src/arch/rv32/dvfs.rs @@ -2,21 +2,4 @@ use crate::dvfs::Dvfs; use super::RV32; -impl Dvfs for RV32 { - /// Fix the frequency of the current CPU. - fn fix_freq(_freq: u64) { - // TODO: Implement this. - } - - /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64 { - // TODO: Implement this. - 0 - } - - /// Get the current frequency of the current CPU. - fn get_curr_freq() -> u64 { - // TODO: Implement this. - 0 - } -} +impl Dvfs for RV32 {} diff --git a/awkernel_lib/src/arch/rv64/dvfs.rs b/awkernel_lib/src/arch/rv64/dvfs.rs index 15c7906a8..ad2b9c6f4 100644 --- a/awkernel_lib/src/arch/rv64/dvfs.rs +++ b/awkernel_lib/src/arch/rv64/dvfs.rs @@ -2,21 +2,4 @@ use crate::dvfs::Dvfs; use super::RV64; -impl Dvfs for RV64 { - /// Fix the frequency of the current CPU. - fn fix_freq(_freq: u64) { - // TODO: Implement this. - } - - /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64 { - // TODO: Implement this. - 0 - } - - /// Get the current frequency of the current CPU. - fn get_curr_freq() -> u64 { - // TODO: Implement this. - 0 - } -} +impl Dvfs for RV64 {} diff --git a/awkernel_lib/src/arch/std_common/dvfs.rs b/awkernel_lib/src/arch/std_common/dvfs.rs index 5e5098d59..39db1df81 100644 --- a/awkernel_lib/src/arch/std_common/dvfs.rs +++ b/awkernel_lib/src/arch/std_common/dvfs.rs @@ -2,21 +2,4 @@ use crate::dvfs::Dvfs; use super::StdCommon; -impl Dvfs for StdCommon { - /// Fix the frequency of the current CPU. - fn fix_freq(_freq: u64) { - // no operation - } - - /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64 { - // no operation1 - 1 - } - - /// Get the current frequency of the current CPU. - fn get_curr_freq() -> u64 { - // no operation - 1 - } -} +impl Dvfs for StdCommon {} diff --git a/awkernel_lib/src/arch/x86_64.rs b/awkernel_lib/src/arch/x86_64.rs index 5b9ebf032..56907a169 100644 --- a/awkernel_lib/src/arch/x86_64.rs +++ b/awkernel_lib/src/arch/x86_64.rs @@ -4,7 +4,7 @@ use ::acpi::AcpiTables; pub mod acpi; pub mod cpu; pub mod delay; -pub(super) mod dvfs; +pub mod dvfs; pub mod fault; pub(super) mod interrupt; pub mod interrupt_remap; diff --git a/awkernel_lib/src/arch/x86_64/cpu.rs b/awkernel_lib/src/arch/x86_64/cpu.rs index 34e4f6bc6..9d402988b 100644 --- a/awkernel_lib/src/arch/x86_64/cpu.rs +++ b/awkernel_lib/src/arch/x86_64/cpu.rs @@ -7,6 +7,72 @@ struct RawCpuIdAndCpuId { cpu_id: usize, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CPUVendor { + Intel, + AMD, + Hygon, + Centaur, +} + +#[derive(Debug)] +struct CPUVendorStr { + vendor_id: CPUVendor, + vendor: &'static str, +} + +const CPU_VENDORS: [CPUVendorStr; 4] = [ + CPUVendorStr { + vendor_id: CPUVendor::Intel, + vendor: "GenuineIntel", + }, + CPUVendorStr { + vendor_id: CPUVendor::AMD, + vendor: "AuthenticAMD", + }, + CPUVendorStr { + vendor_id: CPUVendor::Hygon, + vendor: "HygonGenuine", + }, + CPUVendorStr { + vendor_id: CPUVendor::Centaur, + vendor: "CentaurHauls", + }, +]; + +#[allow(dead_code)] +#[derive(Debug, Clone, Copy)] +struct CPUId { + ebx: u32, + edx: u32, + ecx: u32, +} + +union CPUVendorData { + cpuid: CPUId, + vendor_string: [u8; 12], +} + +pub fn get_cpu_vendor() -> Option { + let cpuid = unsafe { core::arch::x86_64::__cpuid(0) }; + let cpuid = CPUId { + ebx: cpuid.ebx, + edx: cpuid.edx, + ecx: cpuid.ecx, + }; + + let vendor_data = CPUVendorData { cpuid }; + let vendor_str = unsafe { core::str::from_utf8(&vendor_data.vendor_string).unwrap() }; + + for vendor in CPU_VENDORS.iter() { + if vendor.vendor == vendor_str { + return Some(vendor.vendor_id); + } + } + + None +} + static mut CPU_ID_NUMA_ID: [u8; NUM_MAX_CPU] = [0; NUM_MAX_CPU]; static mut RAW_CPU_ID_AND_CPU_ID: [RawCpuIdAndCpuId; NUM_MAX_CPU] = [RawCpuIdAndCpuId { diff --git a/awkernel_lib/src/arch/x86_64/dvfs.rs b/awkernel_lib/src/arch/x86_64/dvfs.rs index 4c93c0895..f22bf30c3 100644 --- a/awkernel_lib/src/arch/x86_64/dvfs.rs +++ b/awkernel_lib/src/arch/x86_64/dvfs.rs @@ -4,7 +4,10 @@ use x86_64::registers::model_specific::Msr; use crate::{delay::wait_millisec, dvfs::Dvfs}; -use super::X86; +use super::{ + cpu::{self, CPUVendor}, + X86, +}; #[allow(dead_code)] // TODO: remove this later mod hwpstate_intel; @@ -17,7 +20,7 @@ const IA32_MISC_ENABLE: u32 = 0x1A0; impl Dvfs for X86 { /// Fix the frequency of the current CPU. - fn fix_freq(freq_mhz: u64) { + fn fix_freq(freq_mhz: u64) -> bool { unsafe { let mut misc_enable = Msr::new(IA32_MISC_ENABLE); let mut value = misc_enable.read(); @@ -43,26 +46,28 @@ impl Dvfs for X86 { value |= target_pstate; perf_ctl.write(value); } + + true } /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64 { + fn get_max_freq() -> Option { unsafe { let platform_info = Msr::new(MSR_PLATFORM_INFO); let max_ratio = (platform_info.read() >> 8) & 0xFF; let bus_freq_mhz = (__cpuid(0x16).ecx & 0xffff) as u64; - max_ratio * bus_freq_mhz + Some(max_ratio * bus_freq_mhz) } } /// Get the current frequency of the current CPU. - fn get_curr_freq() -> u64 { + fn get_curr_freq() -> Option { // Check if the CPU supports the IA32_PERF_MPERF and IA32_PERF_APERF MSRs. let cpuid = unsafe { __cpuid(0x6) }; if (cpuid.ecx & 0x1) == 0 { log::warn!("The CPU does not support IA32_PERF_MPERF and IA32_PERF_APERF MSRs."); - return 0; + return None; } unsafe { @@ -76,7 +81,36 @@ impl Dvfs for X86 { let mperf_delta = mperf.read(); let aperf_delta = aperf.read(); - aperf_delta * Self::get_max_freq() / mperf_delta + Some(aperf_delta * Self::get_max_freq()? / mperf_delta) + } + } + + fn set_min_performance(min: u8) -> bool { + hwpstate_intel::HwPstateIntelImpl::set_min_performance(min) + } + + fn set_max_performance(max: u8) -> bool { + hwpstate_intel::HwPstateIntelImpl::set_max_performance(max) + } + + fn set_desired_performance(val: crate::dvfs::DesiredPerformance) -> bool { + hwpstate_intel::HwPstateIntelImpl::set_desired_performance(val) + } + + fn set_min_max_performance(min: u8) -> bool { + hwpstate_intel::HwPstateIntelImpl::set_min_max_performance(min) + } +} + +/// Initialize DVFS. +/// +/// # Safety +/// +/// This function must be called once by each CPU core. +pub unsafe fn init() { + if let Some(CPUVendor::Intel) = cpu::get_cpu_vendor() { + if !hwpstate_intel::init() { + log::warn!("Failed to initialize Intel Hardware-controlled Performance States."); } } } diff --git a/awkernel_lib/src/arch/x86_64/dvfs/hwpstate_intel.rs b/awkernel_lib/src/arch/x86_64/dvfs/hwpstate_intel.rs index 70435e0cc..6dcd76d91 100644 --- a/awkernel_lib/src/arch/x86_64/dvfs/hwpstate_intel.rs +++ b/awkernel_lib/src/arch/x86_64/dvfs/hwpstate_intel.rs @@ -1,8 +1,14 @@ use core::arch::x86_64::__cpuid; +use array_macro::array; +use awkernel_sync::{mcs::MCSNode, mutex::Mutex}; use x86_64::registers::model_specific::Msr; -use crate::{arch::x86_64::msr::*, cpu::cpu_id}; +use crate::{ + arch::x86_64::msr::*, + cpu::{cpu_id, NUM_MAX_CPU}, + dvfs::{DesiredPerformance, Dvfs}, +}; const CPUTPM1_HWP_NOTIFICATION: u32 = 0x00000100; const CPUTPM1_HWP_ACTIVITY_WINDOW: u32 = 0x00000200; @@ -87,18 +93,18 @@ impl HwPstateIntel { } if let Some(result) = rdmsr_safe(&hwp_req) { - log::error!("Failed to read HWP request MSR for cpu{}", cpu_id()); self.req = result; } else { + log::error!("Failed to read HWP request MSR for cpu{}", cpu_id()); return false; } let hwp_caps = Msr::new(MSR_IA32_HWP_CAPABILITIES); if let Some(result) = rdmsr_safe(&hwp_caps) { - log::error!("Failed to read HWP capabilities MSR for cpu{}", cpu_id()); caps = result; } else { + log::error!("Failed to read HWP capabilities MSR for cpu{}", cpu_id()); return false; } } @@ -184,7 +190,7 @@ impl HwPstateIntel { } /// Select Efficiency/Performance Preference. - /// (range from 0, most performant, through 100, most efficient) + /// (range from 0, most performance, through 100, most efficient) pub(super) fn epp_select(&mut self, epp: u8) -> bool { let epp = if epp > 100 { 100 } else { epp }; @@ -206,8 +212,24 @@ impl HwPstateIntel { } } + /// Select Desired Preference. + /// (range from 0, most performance, through 100, most efficient) + pub(super) fn desired_select(&mut self, percent: u8) -> bool { + let raw_max = ((self.req & IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE) >> 8) as u8; + let raw_min = (self.req & IA32_HWP_MINIMUM_PERFORMANCE) as u8; + + let percent = if percent > 100 { 100 } else { percent }; + + let val = self.percent_to_raw_performance(percent); + let val = if val > raw_max { raw_max } else { val }; + let val = (if val < raw_min { raw_min } else { val }) as u64; + + self.req = (self.req & !IA32_HWP_DESIRED_PERFORMANCE) | (val << 16); + self.request() + } + /// Select Maximum Performance. - /// (range from 0, lowest performant, through 100, highest performance) + /// (range from 0, lowest performance, through 100, highest performance) /// /// If `max` is less than the minimum performance, /// this function sets the maximum performance to the minimum performance. @@ -224,7 +246,7 @@ impl HwPstateIntel { } /// Select Minimum Performance. - /// (range from 0, lowest performant, through 100, highest performance) + /// (range from 0, lowest performance, through 100, highest performance) /// /// If `min` is greater than the maximum performance, /// this function sets the minimum performance to the maximum performance. @@ -240,6 +262,19 @@ impl HwPstateIntel { self.request() } + /// Select Minimum and Maximum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + fn min_max_peformance_select(&mut self, val: u8) -> bool { + let val = self.percent_to_raw_performance(val) as u64; + + self.req = (self.req + & !(IA32_HWP_MINIMUM_PERFORMANCE | IA32_HWP_REQUEST_MAXIMUM_PERFORMANCE)) + | (val << 8) + | val; + + self.request() + } + #[inline] fn request(&self) -> bool { if self.hwp_pkg_ctrl_en { @@ -279,6 +314,18 @@ impl HwPstateIntel { val as u8 } } + + /// Set Energy_Performance_Preference. + /// (range from 0, highest performance, through 100, highest energy efficient) + fn set_energy_performance_preference(&mut self, percent: u8) -> bool { + let percent = if percent > 100 { 100 } else { percent }; + let raw_val = percent_to_raw(percent as u64); + + self.req &= !IA32_HWP_REQUEST_ENERGY_PERFORMANCE_PREFERENCE; + self.req |= raw_val << 24; + + self.request() + } } /// Given x * 10 in [0, 1000], round to the integer nearest x. @@ -310,3 +357,98 @@ fn percent_to_raw_perf_bias(x: u64) -> u64 { assert!(x <= 100); ((0xf * x) + 50) / 100 } + +static HWPSTATE_INTEL: [Mutex>; NUM_MAX_CPU] = + array![_ => Mutex::new(None); NUM_MAX_CPU]; + +pub(super) struct HwPstateIntelImpl; + +impl Dvfs for HwPstateIntelImpl { + fn set_min_performance(min: u8) -> bool { + let cpu_id = cpu_id(); + + let mut node = MCSNode::new(); + let mut hwps = HWPSTATE_INTEL[cpu_id].lock(&mut node); + + if let Some(hwps) = hwps.as_mut() { + hwps.minimum_performance_select(min) + } else { + false + } + } + + fn set_max_performance(max: u8) -> bool { + let cpu_id = cpu_id(); + + let mut node = MCSNode::new(); + let mut hwps = HWPSTATE_INTEL[cpu_id].lock(&mut node); + + if let Some(hwps) = hwps.as_mut() { + hwps.maximum_performance_select(max) + } else { + false + } + } + + fn set_energy_efficiency(val: u8) -> bool { + let cpu_id = cpu_id(); + + let mut node = MCSNode::new(); + let mut hwps = HWPSTATE_INTEL[cpu_id].lock(&mut node); + + if let Some(hwps) = hwps.as_mut() { + hwps.set_energy_performance_preference(val) + } else { + false + } + } + + fn set_desired_performance(val: DesiredPerformance) -> bool { + let cpu_id = cpu_id(); + + let mut node = MCSNode::new(); + let mut hwps = HWPSTATE_INTEL[cpu_id].lock(&mut node); + + if let Some(hwps) = hwps.as_mut() { + match val { + DesiredPerformance::Desired(val) => hwps.desired_select(val), + DesiredPerformance::Auto => hwps.desired_select(0), + } + } else { + false + } + } + + fn set_min_max_performance(val: u8) -> bool { + let cpu_id = cpu_id(); + + let mut node = MCSNode::new(); + let mut hwps = HWPSTATE_INTEL[cpu_id].lock(&mut node); + + if let Some(hwps) = hwps.as_mut() { + hwps.min_max_peformance_select(val) + } else { + false + } + } +} + +/// Initialize Intel Hardware-controlled Performance States +/// This function should be called before the main loop on each CPU core. +/// +/// # Safety +/// +/// This function must be called once by each CPU core. +pub(super) unsafe fn init() -> bool { + let cpu_id = cpu_id(); + + let hwps = &HWPSTATE_INTEL[cpu_id]; + let mut node = MCSNode::new(); + let mut hwps = hwps.lock(&mut node); + + if hwps.is_none() { + *hwps = HwPstateIntel::new(); + } + + hwps.is_some() +} diff --git a/awkernel_lib/src/dvfs.rs b/awkernel_lib/src/dvfs.rs index 8b26212b0..927b439e2 100644 --- a/awkernel_lib/src/dvfs.rs +++ b/awkernel_lib/src/dvfs.rs @@ -1,12 +1,97 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DesiredPerformance { + Desired(u8), + Auto, +} + +#[allow(unused_variables)] pub trait Dvfs { /// Fix the frequency of the current CPU. - fn fix_freq(freq: u64); + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn fix_freq(freq: u64) -> bool { + false + } /// Get the maximum frequency of the current CPU. - fn get_max_freq() -> u64; + /// + /// If current driver does not support this operation, + /// it will return `None`. + fn get_max_freq() -> Option { + None + } /// Get the frequency of the current CPU. - fn get_curr_freq() -> u64; + /// + /// If current driver does not support this operation, + /// it will return `None`. + fn get_curr_freq() -> Option { + None + } + + /// Select the Minimum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn set_min_performance(min: u8) -> bool { + false + } + + /// Get the Minimum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `None`. + fn get_min_performance() -> Option { + None + } + + /// Set the Maximum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn set_max_performance(max: u8) -> bool { + false + } + + /// Get the Maximum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `None`. + fn get_max_performance() -> Option { + None + } + + /// Select the Minimum and Maximum Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn set_min_max_performance(min: u8) -> bool { + false + } + + /// Set the Energy Efficiency Preference. + /// (range from 0, highest performance, through 100, highest energy efficient) + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn set_energy_efficiency(val: u8) -> bool { + false + } + + /// Set the Desired Performance. + /// (range from 0, lowest performance, through 100, highest performance) + /// + /// If current driver does not support this operation, + /// it will return `false`. + fn set_desired_performance(val: DesiredPerformance) -> bool { + false + } } /// Fix the frequency of the current CPU. @@ -17,12 +102,57 @@ pub fn fix_freq(freq: u64) { /// Get the maximum frequency of the current CPU. #[inline(always)] -pub fn get_max_freq() -> u64 { +pub fn get_max_freq() -> Option { crate::arch::ArchImpl::get_max_freq() } /// Get the frequency of the current CPU. #[inline(always)] -pub fn get_curr_freq() -> u64 { +pub fn get_curr_freq() -> Option { crate::arch::ArchImpl::get_curr_freq() } + +/// Set Maximum Performance. +/// (range from 0, lowest performance, through 100, highest performance) +/// +/// If the driver does not support `set_max_performance()`, `false` will be returned. +#[inline(always)] +pub fn set_max_performance(max: u8) -> bool { + crate::arch::ArchImpl::set_max_performance(max) +} + +/// Set Minimum Performance. +/// (range from 0, lowest performance, through 100, highest performance) +/// +/// If the driver does not support `set_min_performance()`, `false` will be returned. +#[inline(always)] +pub fn set_min_performance(min: u8) -> bool { + crate::arch::ArchImpl::set_min_performance(min) +} + +/// Set the Energy Efficiency Preference. +/// (range from 0, highest performance, through 100, highest energy efficient) +/// +/// If the driver does not support `set_energy_efficiency()`, `false` will be returned. +#[inline(always)] +pub fn set_energy_efficiency(val: u8) -> bool { + crate::arch::ArchImpl::set_energy_efficiency(val) +} + +/// Set the Desired Performance. +/// (range from 0, lowest performance, through 100, highest performance) +/// +/// If the driver does not support `set_desired_performance()`, `false` will be returned. +#[inline(always)] +pub fn set_desired_performance(val: DesiredPerformance) -> bool { + crate::arch::ArchImpl::set_desired_performance(val) +} + +/// Set Minimum and Maximum Performance. +/// (range from 0, lowest performance, through 100, highest performance) +/// +/// If the driver does not support `set_min_max_performance()`, `false` will be returned. +#[inline(always)] +pub fn set_min_max_performance(percent: u8) -> bool { + crate::arch::ArchImpl::set_min_max_performance(percent) +} diff --git a/kernel/src/arch/x86_64/kernel_main.rs b/kernel/src/arch/x86_64/kernel_main.rs index 25418ea03..6aa57d29f 100644 --- a/kernel/src/arch/x86_64/kernel_main.rs +++ b/kernel/src/arch/x86_64/kernel_main.rs @@ -96,7 +96,8 @@ const MPBOOT_REGION_END: u64 = 1024 * 1024; /// 16. Initialize PCIe devices. /// 17. Initialize interrupt handlers. /// 18. Synchronize TSC. -/// 19. Call `crate::main()`. +/// 19. Initialize DVFS. +/// 20. Call `crate::main()`. fn kernel_main(boot_info: &'static mut BootInfo) -> ! { unsafe { crate::config::init() }; // 0. Initialize the configuration. @@ -340,7 +341,10 @@ fn kernel_main2( num_cpu: non_primary_cpus.len() + 1, }; - // 19. Call `crate::main()`. + // 19. Initialize DVFS. + unsafe { awkernel_lib::arch::x86_64::dvfs::init() }; + + // 20. Call `crate::main()`. crate::main(kernel_info); } @@ -538,6 +542,8 @@ fn non_primary_kernel_main() -> ! { num_cpu, }; + unsafe { awkernel_lib::arch::x86_64::dvfs::init() }; + crate::main(kernel_info); // jump to userland wait_forever(); diff --git a/userland/Cargo.toml b/userland/Cargo.toml index 583de9ba2..009306018 100644 --- a/userland/Cargo.toml +++ b/userland/Cargo.toml @@ -83,7 +83,7 @@ path = "../applications/tests/test_voluntary_preemption" optional = true [features] -default = [] +default = ["test_dvfs"] perf = ["awkernel_services/perf"] # Evaluation applications @@ -104,4 +104,5 @@ test_measure_channel = ["dep:test_measure_channel"] test_measure_channel_heavy = ["dep:test_measure_channel_heavy"] test_sched_preempt = ["dep:test_sched_preempt"] test_dag = ["dep:test_dag"] +test_dvfs = ["dep:test_dvfs"] test_voluntary_preemption = ["dep:test_voluntary_preemption"]