From f699a7999bb7a9264811dfd64a3f190e32c61102 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 10 Jul 2025 20:08:21 +0200 Subject: [PATCH 01/10] update `Cargo.lock` --- Cargo.lock | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 80f424dfdd..13cc938ef5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,20 +73,20 @@ version = "0.1.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.104", ] [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "cc" -version = "1.2.26" +version = "1.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "956a5e21988b87f372569b66183b78babf23ebc2e744b733e4350a752c4dafac" +checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" dependencies = [ "shlex", ] @@ -99,9 +99,9 @@ checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "clap" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" +checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9" dependencies = [ "clap_builder", "clap_derive", @@ -109,9 +109,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" +checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d" dependencies = [ "anstream", "anstyle", @@ -121,14 +121,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.40" +version = "4.5.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" +checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.104", ] [[package]] @@ -338,9 +338,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", "hashbrown 0.15.4", @@ -403,9 +403,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "linked-hash-map" @@ -624,7 +624,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.104", ] [[package]] @@ -685,7 +685,7 @@ version = "0.1.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.104", ] [[package]] @@ -742,7 +742,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.102", + "syn 2.0.104", ] [[package]] @@ -780,9 +780,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.102" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6397daf94fa90f058bd0fd88429dd9e5738999cca8d701813c80723add80462" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -838,7 +838,7 @@ version = "0.113.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "286049849b5a5bd09a8773171be96824afabffc7cc3df6caaf33a38db6cd07ae" dependencies = [ - "indexmap 2.9.0", + "indexmap 2.10.0", "semver", ] @@ -945,20 +945,20 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn 2.0.102", + "syn 2.0.104", ] From b9cf31d72d6eaee032e7cc4243fb9041e47f8387 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 10 Jul 2025 20:08:49 +0200 Subject: [PATCH 02/10] on `aarch64_be-unknown-linux-gnu`, use `g++` as the linker --- ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index 74f770556d..3eba02766b 100644 --- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -24,7 +24,7 @@ RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" -ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" +ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-g++" ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}" ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump" ENV STDARCH_TEST_SKIP_FEATURE=tme From 9020e4c112b8e9a191bad70430b4e2d443d59d64 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 10 Jul 2025 20:09:46 +0200 Subject: [PATCH 03/10] pass the whole config to C compiler construction --- crates/intrinsic-test/src/arm/compile.rs | 57 ++++++++++++++---------- crates/intrinsic-test/src/arm/mod.rs | 13 +----- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs index 8276cd87c1..622c8358b6 100644 --- a/crates/intrinsic-test/src/arm/compile.rs +++ b/crates/intrinsic-test/src/arm/compile.rs @@ -1,23 +1,23 @@ +use crate::common::cli::ProcessedCli; use crate::common::compile_c::CompilationCommandBuilder; use crate::common::gen_c::compile_c_programs; -pub fn compile_c_arm( - intrinsics_name_list: &[String], - compiler: &str, - target: &str, - cxx_toolchain_dir: Option<&str>, -) -> bool { +pub fn compile_c_arm(config: &ProcessedCli, intrinsics_name_list: &[String]) -> bool { + let Some(ref cpp_compiler) = config.cpp_compiler else { + return true; + }; + // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations let mut command = CompilationCommandBuilder::new() .add_arch_flags(vec!["armv8.6-a", "crypto", "crc", "dotprod", "fp16"]) - .set_compiler(compiler) - .set_target(target) + .set_compiler(cpp_compiler) + .set_target(&config.target) .set_opt_level("2") - .set_cxx_toolchain_dir(cxx_toolchain_dir) + .set_cxx_toolchain_dir(config.cxx_toolchain_dir.as_deref()) .set_project_root("c_programs") .add_extra_flags(vec!["-ffp-contract=off", "-Wno-narrowing"]); - if !target.contains("v7") { + if !config.target.contains("v7") { command = command.add_arch_flags(vec!["faminmax", "lut", "sha3"]); } @@ -30,22 +30,33 @@ pub fn compile_c_arm( * does not work as it gets caught up with `#include_next ` * not existing... */ - if target.contains("aarch64_be") { - command = command - .set_linker( - cxx_toolchain_dir.unwrap_or("").to_string() + "/bin/aarch64_be-none-linux-gnu-g++", + if config.target.contains("aarch64_be") { + let Some(ref cxx_toolchain_dir) = config.cxx_toolchain_dir else { + panic!( + "target `{}` must specify `cxx_toolchain_dir`", + config.target ) - .set_include_paths(vec![ - "/include", - "/aarch64_be-none-linux-gnu/include", - "/aarch64_be-none-linux-gnu/include/c++/14.2.1", - "/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu", - "/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward", - "/aarch64_be-none-linux-gnu/libc/usr/include", - ]); + }; + + let linker = if let Some(ref linker) = config.linker { + linker.to_owned() + } else { + format!("{cxx_toolchain_dir}/bin/aarch64_be-none-linux-gnu-g++") + }; + + trace!("using linker: {linker}"); + + command = command.set_linker(linker).set_include_paths(vec![ + "/include", + "/aarch64_be-none-linux-gnu/include", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1/aarch64_be-none-linux-gnu", + "/aarch64_be-none-linux-gnu/include/c++/14.2.1/backward", + "/aarch64_be-none-linux-gnu/libc/usr/include", + ]); } - if !compiler.contains("clang") { + if !cpp_compiler.contains("clang") { command = command.add_extra_flag("-flax-vector-conversions"); } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 6aaa49ff97..b9bfd56e31 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -11,7 +11,6 @@ use crate::common::gen_rust::compile_rust_programs; use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; use crate::common::intrinsic_helpers::TypeKind; use crate::common::write_file::{write_c_testfiles, write_rust_testfiles}; -use compile::compile_c_arm; use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices}; use intrinsic::ArmIntrinsicType; use json_parser::get_neon_intrinsics; @@ -51,9 +50,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest { } fn build_c_file(&self) -> bool { - let compiler = self.cli_options.cpp_compiler.as_deref(); let target = &self.cli_options.target; - let cxx_toolchain_dir = self.cli_options.cxx_toolchain_dir.as_deref(); let c_target = "aarch64"; let intrinsics_name_list = write_c_testfiles( @@ -69,15 +66,7 @@ impl SupportedArchitectureTest for ArmArchitectureTest { &[POLY128_OSTREAM_DEF], ); - match compiler { - None => true, - Some(compiler) => compile_c_arm( - intrinsics_name_list.as_slice(), - compiler, - target, - cxx_toolchain_dir, - ), - } + compile::compile_c_arm(&self.cli_options, intrinsics_name_list.as_slice()) } fn build_rust_file(&self) -> bool { From d2a815eebffe23e5800ea0e07c5627b4c3a2ef62 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Thu, 10 Jul 2025 20:30:36 +0200 Subject: [PATCH 04/10] improve cpp compiler execution --- crates/intrinsic-test/src/arm/compile.rs | 22 +-- crates/intrinsic-test/src/arm/mod.rs | 4 +- crates/intrinsic-test/src/common/compile_c.rs | 169 ++++++++++++------ crates/intrinsic-test/src/common/gen_c.rs | 40 +++-- 4 files changed, 138 insertions(+), 97 deletions(-) diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs index 622c8358b6..03079a89cf 100644 --- a/crates/intrinsic-test/src/arm/compile.rs +++ b/crates/intrinsic-test/src/arm/compile.rs @@ -1,11 +1,8 @@ use crate::common::cli::ProcessedCli; -use crate::common::compile_c::CompilationCommandBuilder; -use crate::common::gen_c::compile_c_programs; +use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; -pub fn compile_c_arm(config: &ProcessedCli, intrinsics_name_list: &[String]) -> bool { - let Some(ref cpp_compiler) = config.cpp_compiler else { - return true; - }; +pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { + let cpp_compiler = config.cpp_compiler.as_ref()?; // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations let mut command = CompilationCommandBuilder::new() @@ -60,16 +57,5 @@ pub fn compile_c_arm(config: &ProcessedCli, intrinsics_name_list: &[String]) -> command = command.add_extra_flag("-flax-vector-conversions"); } - let compiler_commands = intrinsics_name_list - .iter() - .map(|intrinsic_name| { - command - .clone() - .set_input_name(intrinsic_name) - .set_output_name(intrinsic_name) - .make_string() - }) - .collect::>(); - - compile_c_programs(&compiler_commands) + Some(command.into_cpp_compilation()) } diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index b9bfd56e31..f40def4206 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -7,6 +7,7 @@ mod types; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; use crate::common::compare::compare_outputs; +use crate::common::gen_c::compile_c_programs; use crate::common::gen_rust::compile_rust_programs; use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; use crate::common::intrinsic_helpers::TypeKind; @@ -66,7 +67,8 @@ impl SupportedArchitectureTest for ArmArchitectureTest { &[POLY128_OSTREAM_DEF], ); - compile::compile_c_arm(&self.cli_options, intrinsics_name_list.as_slice()) + let pipeline = compile::build_cpp_compilation(&self.cli_options).unwrap(); + compile_c_programs(&pipeline, &intrinsics_name_list) } fn build_rust_file(&self) -> bool { diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs index aebb7b111e..ed70155002 100644 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ b/crates/intrinsic-test/src/common/compile_c.rs @@ -7,8 +7,6 @@ pub struct CompilationCommandBuilder { optimization: String, include_paths: Vec, project_root: Option, - output: String, - input: String, linker: Option, extra_flags: Vec, } @@ -23,8 +21,6 @@ impl CompilationCommandBuilder { optimization: "2".to_string(), include_paths: Vec::new(), project_root: None, - output: String::new(), - input: String::new(), linker: None, extra_flags: Vec::new(), } @@ -71,18 +67,6 @@ impl CompilationCommandBuilder { self } - /// The name of the output executable, without any suffixes - pub fn set_output_name(mut self, path: &str) -> Self { - self.output = path.to_string(); - self - } - - /// The name of the input C file, without any suffixes - pub fn set_input_name(mut self, path: &str) -> Self { - self.input = path.to_string(); - self - } - pub fn set_linker(mut self, linker: String) -> Self { self.linker = Some(linker); self @@ -100,55 +84,122 @@ impl CompilationCommandBuilder { } impl CompilationCommandBuilder { - pub fn make_string(self) -> String { - let arch_flags = self.arch_flags.join("+"); + pub fn into_cpp_compilation(self) -> CppCompilation { + let mut cpp_compiler = std::process::Command::new(self.compiler); + + if let Some(project_root) = self.project_root { + cpp_compiler.current_dir(project_root); + } + let flags = std::env::var("CPPFLAGS").unwrap_or("".into()); - let project_root = self.project_root.unwrap_or_default(); - let project_root_str = project_root.as_str(); - let mut output = self.output.clone(); - if self.linker.is_some() { - output += ".o" - }; - let mut command = format!( - "{} {flags} -march={arch_flags} \ - -O{} \ - -o {project_root}/{} \ - {project_root}/{}.cpp", - self.compiler, self.optimization, output, self.input, - ); - - command = command + " " + self.extra_flags.join(" ").as_str(); + cpp_compiler.args(flags.split_whitespace()); + + cpp_compiler.arg(format!("-march={}", self.arch_flags.join("+"))); + + cpp_compiler.arg(format!("-O{}", self.optimization)); + + cpp_compiler.args(self.extra_flags); if let Some(target) = &self.target { - command = command + " --target=" + target; + cpp_compiler.arg(format!("--target={target}")); } if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) { - let include_args = self - .include_paths - .iter() - .map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path) - .collect::>() - .join(" "); - - command = command - + " -c " - + include_args.as_str() - + " && " - + linker - + " " - + project_root_str - + "/" - + &output - + " -o " - + project_root_str - + "/" - + &self.output - + " && rm " - + project_root_str - + "/" - + &output; + cpp_compiler.args( + self.include_paths + .iter() + .map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path), + ); + + CppCompilation::CustomLinker { + cpp_compiler, + linker: linker.to_owned(), + } + } else { + CppCompilation::Simple(cpp_compiler) + } + } +} + +pub enum CppCompilation { + Simple(std::process::Command), + CustomLinker { + cpp_compiler: std::process::Command, + linker: String, + }, +} + +fn clone_command(command: &std::process::Command) -> std::process::Command { + let mut cmd = std::process::Command::new(command.get_program()); + if let Some(current_dir) = command.get_current_dir() { + cmd.current_dir(current_dir); + } + cmd.args(command.get_args()); + + for (key, val) in command.get_envs() { + cmd.env(key, val.unwrap_or_default()); + } + + cmd +} + +impl CppCompilation { + pub fn run(&self, inputs: &[String], output: &str) -> std::io::Result { + match self { + CppCompilation::Simple(command) => { + let mut cmd = clone_command(command); + cmd.args(inputs); + cmd.args(["-o", output]); + + cmd.output() + } + CppCompilation::CustomLinker { + cpp_compiler, + linker, + } => { + let object_file = &format!("{output}.o"); + + // Build an object file using the cpp compiler. + let mut cmd = clone_command(cpp_compiler); + cmd.args(inputs); + cmd.args(["-c", "-o", object_file]); + + let cpp_output = cmd.output()?; + if !cpp_output.status.success() { + error!("c++ compilaton failed"); + return Ok(cpp_output); + } + + trace!("using custom linker"); + + // Use the custom linker to turn the object file into an executable. + let mut cmd = std::process::Command::new(linker); + cmd.args([object_file, "-o", output]); + + if let Some(current_dir) = cpp_compiler.get_current_dir() { + cmd.current_dir(current_dir); + } + + for (key, val) in cpp_compiler.get_envs() { + cmd.env(key, val.unwrap_or_default()); + } + + let linker_output = cmd.output()?; + if !linker_output.status.success() { + error!("custom linker failed"); + return Ok(linker_output); + } + + trace!("removing {object_file}"); + let object_file_path = match cpp_compiler.get_current_dir() { + Some(current_dir) => &format!("{}/{object_file}", current_dir.display()), + None => object_file, + }; + + std::fs::remove_file(object_file_path)?; + + Ok(cpp_output) + } } - command } } diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 1cfb66c39b..84167f2f4a 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,7 +1,8 @@ use itertools::Itertools; use rayon::prelude::*; use std::collections::BTreeMap; -use std::process::Command; + +use crate::common::compile_c::CppCompilation; use super::argument::Argument; use super::indentation::Indentation; @@ -62,29 +63,30 @@ int main(int argc, char **argv) {{ ) } -pub fn compile_c_programs(compiler_commands: &[String]) -> bool { - compiler_commands +pub fn compile_c_programs(pipeline: &CppCompilation, intrinsics: &[String]) -> bool { + intrinsics .par_iter() - .map(|compiler_command| { - let output = Command::new("sh").arg("-c").arg(compiler_command).output(); - if let Ok(output) = output { - if output.status.success() { - true - } else { - error!( - "Failed to compile code for intrinsics: \n\nstdout:\n{}\n\nstderr:\n{}", + .map( + |intrinsic| match pipeline.run(&[format!("{intrinsic}.cpp")], intrinsic) { + Ok(output) if output.status.success() => Ok(()), + Ok(output) => { + let msg = format!( + "Failed to compile code for intrinsic `{intrinsic}`: \n\nstdout:\n{}\n\nstderr:\n{}", std::str::from_utf8(&output.stdout).unwrap_or(""), std::str::from_utf8(&output.stderr).unwrap_or("") ); - false + error!("{msg}"); + + Err(msg) } - } else { - error!("Command failed: {output:#?}"); - false - } - }) - .find_any(|x| !x) - .is_none() + Err(e) => { + error!("command for `{intrinsic}` failed with IO error: {e:?}"); + Err(e.to_string()) + } + }, + ) + .collect::>() + .is_ok() } // Creates directory structure and file path mappings From 92e803340baff2a520174041b4422795eb98f32b Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 01:25:52 +0200 Subject: [PATCH 05/10] exclude generated rust programs from the workspace --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 0db26f31a2..5979096439 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,8 @@ members = [ "examples", ] exclude = [ - "crates/wasm-assert-instr-tests" + "crates/wasm-assert-instr-tests", + "rust_programs", ] [profile.release] From 9f07931bb7cd19563746f0447945f4ab269d176a Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 01:26:34 +0200 Subject: [PATCH 06/10] `intrinsic-test`: combine C files for more efficient compilation --- crates/intrinsic-test/src/arm/mod.rs | 72 +++- crates/intrinsic-test/src/common/argument.rs | 30 +- crates/intrinsic-test/src/common/compare.rs | 2 +- crates/intrinsic-test/src/common/compile_c.rs | 36 +- crates/intrinsic-test/src/common/gen_c.rs | 325 +++++++++--------- .../intrinsic-test/src/common/write_file.rs | 33 -- 6 files changed, 264 insertions(+), 234 deletions(-) diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index f40def4206..84290d2fd5 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -4,15 +4,20 @@ mod intrinsic; mod json_parser; mod types; +use std::fs::File; + +use rayon::prelude::*; + +use crate::arm::config::POLY128_OSTREAM_DEF; use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; use crate::common::compare::compare_outputs; -use crate::common::gen_c::compile_c_programs; +use crate::common::gen_c::{write_main_cpp, write_mod_cpp}; use crate::common::gen_rust::compile_rust_programs; use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; use crate::common::intrinsic_helpers::TypeKind; -use crate::common::write_file::{write_c_testfiles, write_rust_testfiles}; -use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, POLY128_OSTREAM_DEF, build_notices}; +use crate::common::write_file::write_rust_testfiles; +use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices}; use intrinsic::ArmIntrinsicType; use json_parser::get_neon_intrinsics; @@ -51,24 +56,57 @@ impl SupportedArchitectureTest for ArmArchitectureTest { } fn build_c_file(&self) -> bool { - let target = &self.cli_options.target; let c_target = "aarch64"; + let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; - let intrinsics_name_list = write_c_testfiles( - &self - .intrinsics - .iter() - .map(|i| i as &dyn IntrinsicDefinition<_>) - .collect::>(), - target, - c_target, - &["arm_neon.h", "arm_acle.h", "arm_fp16.h"], - &build_notices("// "), - &[POLY128_OSTREAM_DEF], - ); + let available_parallelism = std::thread::available_parallelism().unwrap().get(); + let chunk_size = self.intrinsics.len().div_ceil(available_parallelism); let pipeline = compile::build_cpp_compilation(&self.cli_options).unwrap(); - compile_c_programs(&pipeline, &intrinsics_name_list) + + let notice = &build_notices("// "); + self.intrinsics + .par_chunks(chunk_size) + .enumerate() + .map(|(i, chunk)| { + let c_filename = format!("c_programs/mod_{i}.cpp"); + let mut file = File::create(&c_filename).unwrap(); + write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap(); + + // compile this cpp file into a .o file + let output = pipeline.run(&[], &[format!("mod_{i}.cpp")], &format!("mod_{i}.o"))?; + assert!(output.status.success()); + + Ok(()) + }) + .collect::>() + .unwrap(); + + let mut file = File::create("c_programs/main.cpp").unwrap(); + write_main_cpp( + &mut file, + c_target, + POLY128_OSTREAM_DEF, + self.intrinsics.iter().map(|i| i.name.as_str()), + ) + .unwrap(); + + // Files to include in the final link step. + let mut includes = vec![]; + for i in 0..Ord::min(available_parallelism, self.intrinsics.len()) { + includes.push(format!("mod_{i}.o")); + } + + let output = pipeline + .run( + &includes, + &["main.cpp".to_string()], + "intrinsic-test-programs", + ) + .unwrap(); + assert!(output.status.success()); + + true } fn build_rust_file(&self) -> bool { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 443ccb919f..338f0d344a 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -125,19 +125,23 @@ where /// Creates a line for each argument that initializes an array for C from which `loads` argument /// values can be loaded as a sliding window. /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. - pub fn gen_arglists_c(&self, indentation: Indentation, loads: u32) -> String { - self.iter() - .filter(|&arg| !arg.has_constraint()) - .map(|arg| { - format!( - "{indentation}const {ty} {name}_vals[] = {values};", - ty = arg.ty.c_scalar_type(), - name = arg.name, - values = arg.ty.populate_random(indentation, loads, &Language::C) - ) - }) - .collect::>() - .join("\n") + pub fn gen_arglists_c( + &self, + w: &mut impl std::io::Write, + indentation: Indentation, + loads: u32, + ) -> std::io::Result<()> { + for arg in self.iter().filter(|&arg| !arg.has_constraint()) { + writeln!( + w, + "{indentation}const {ty} {name}_vals[] = {values};", + ty = arg.ty.c_scalar_type(), + name = arg.name, + values = arg.ty.populate_random(indentation, loads, &Language::C) + )? + } + + Ok(()) } /// Creates a line for each argument that initializes an array for Rust from which `loads` argument diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index 9e0cbe8cd6..0517437a89 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -13,7 +13,7 @@ pub fn compare_outputs( .filter_map(|intrinsic_name| { let c = Command::new("sh") .arg("-c") - .arg(format!("{runner} ./c_programs/{intrinsic_name}")) + .arg(format!("{runner} ./c_programs/intrinsic-test-programs {intrinsic_name}")) .output(); let rust = Command::new("sh") diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs index ed70155002..0bd42c530d 100644 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ b/crates/intrinsic-test/src/common/compile_c.rs @@ -144,14 +144,35 @@ fn clone_command(command: &std::process::Command) -> std::process::Command { } impl CppCompilation { - pub fn run(&self, inputs: &[String], output: &str) -> std::io::Result { - match self { - CppCompilation::Simple(command) => { - let mut cmd = clone_command(command); - cmd.args(inputs); - cmd.args(["-o", output]); + fn compile_cpp( + command: &std::process::Command, + includes: &[String], + inputs: &[String], + output: &str, + ) -> std::io::Result { + let mut cmd = clone_command(command); + cmd.args(includes); + cmd.args(inputs); + cmd.args(["-o", output]); + + if output.ends_with(".o") { + cmd.arg("-c"); + } - cmd.output() + cmd.output() + } + + pub fn run( + &self, + includes: &[String], + inputs: &[String], + output: &str, + ) -> std::io::Result { + match self { + CppCompilation::Simple(command) => Self::compile_cpp(command, includes, inputs, output), + CppCompilation::CustomLinker { cpp_compiler, .. } if output.ends_with(".o") => { + // No need to invoke that custom linker if we're creating an object file. + Self::compile_cpp(cpp_compiler, includes, inputs, output) } CppCompilation::CustomLinker { cpp_compiler, @@ -174,6 +195,7 @@ impl CppCompilation { // Use the custom linker to turn the object file into an executable. let mut cmd = std::process::Command::new(linker); + cmd.args(includes); cmd.args([object_file, "-o", output]); if let Some(current_dir) = cpp_compiler.get_current_dir() { diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index 84167f2f4a..905efb6d89 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -1,9 +1,3 @@ -use itertools::Itertools; -use rayon::prelude::*; -use std::collections::BTreeMap; - -use crate::common::compile_c::CppCompilation; - use super::argument::Argument; use super::indentation::Indentation; use super::intrinsic::IntrinsicDefinition; @@ -12,105 +6,16 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. const PASSES: u32 = 20; -// Formats the main C program template with placeholders -pub fn format_c_main_template( - notices: &str, - header_files: &[&str], - arch_identifier: &str, - arch_specific_definitions: &[&str], - arglists: &str, - passes: &str, -) -> String { - format!( - r#"{notices}{header_files} -#include -#include -#include -#include - -template T1 cast(T2 x) {{ - static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); - T1 ret{{}}; - memcpy(&ret, &x, sizeof(T1)); - return ret; -}} - -std::ostream& operator<<(std::ostream& os, float16_t value) {{ - uint16_t temp = 0; - memcpy(&temp, &value, sizeof(float16_t)); - std::stringstream ss; - ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; - os << ss.str(); - return os; -}} - -#ifdef __{arch_identifier}__ -{arch_specific_definitions} -#endif - -{arglists} - -int main(int argc, char **argv) {{ -{passes} - return 0; -}}"#, - header_files = header_files - .iter() - .map(|header| format!("#include <{header}>")) - .collect::>() - .join("\n"), - arch_specific_definitions = arch_specific_definitions.join("\n"), - ) -} - -pub fn compile_c_programs(pipeline: &CppCompilation, intrinsics: &[String]) -> bool { - intrinsics - .par_iter() - .map( - |intrinsic| match pipeline.run(&[format!("{intrinsic}.cpp")], intrinsic) { - Ok(output) if output.status.success() => Ok(()), - Ok(output) => { - let msg = format!( - "Failed to compile code for intrinsic `{intrinsic}`: \n\nstdout:\n{}\n\nstderr:\n{}", - std::str::from_utf8(&output.stdout).unwrap_or(""), - std::str::from_utf8(&output.stderr).unwrap_or("") - ); - error!("{msg}"); - - Err(msg) - } - Err(e) => { - error!("command for `{intrinsic}` failed with IO error: {e:?}"); - Err(e.to_string()) - } - }, - ) - .collect::>() - .is_ok() -} - -// Creates directory structure and file path mappings -pub fn setup_c_file_paths(identifiers: &Vec) -> BTreeMap<&String, String> { - let _ = std::fs::create_dir("c_programs"); - identifiers - .par_iter() - .map(|identifier| { - let c_filename = format!(r#"c_programs/{identifier}.cpp"#); - - (identifier, c_filename) - }) - .collect::>() -} - pub fn generate_c_test_loop( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, indentation: Indentation, additional: &str, passes: u32, - _target: &str, -) -> String { +) -> std::io::Result<()> { let body_indentation = indentation.nested(); - format!( + writeln!( + w, "{indentation}for (int i=0; i<{passes}; i++) {{\n\ {loaded_args}\ {body_indentation}auto __return_value = {intrinsic_call}({args});\n\ @@ -123,78 +28,172 @@ pub fn generate_c_test_loop( ) } -pub fn generate_c_constraint_blocks( +pub fn generate_c_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, indentation: Indentation, - constraints: &[&Argument], + constraints: &mut (impl Iterator> + Clone), name: String, - target: &str, -) -> String { - if let Some((current, constraints)) = constraints.split_last() { - let range = current - .constraint - .iter() - .map(|c| c.to_range()) - .flat_map(|r| r.into_iter()); - - let body_indentation = indentation.nested(); - range - .map(|i| { - format!( - "{indentation}{{\n\ - {body_indentation}{ty} {name} = {val};\n\ - {pass}\n\ - {indentation}}}", - name = current.name, - ty = current.ty.c_type(), - val = i, - pass = generate_c_constraint_blocks( - intrinsic, - body_indentation, - constraints, - format!("{name}-{i}"), - target, - ) - ) - }) - .join("\n") - } else { - generate_c_test_loop(intrinsic, indentation, &name, PASSES, target) +) -> std::io::Result<()> { + let Some(current) = constraints.next() else { + return generate_c_test_loop(w, intrinsic, indentation, &name, PASSES); + }; + + let body_indentation = indentation.nested(); + for i in current.constraint.iter().flat_map(|c| c.to_range()) { + let ty = current.ty.c_type(); + + writeln!(w, "{indentation}{{")?; + writeln!(w, "{body_indentation}{ty} {} = {i};", current.name)?; + + generate_c_constraint_blocks( + w, + intrinsic, + body_indentation, + &mut constraints.clone(), + format!("{name}-{i}"), + )?; + + writeln!(w, "{indentation}}}")?; } + + Ok(()) } // Compiles C test programs using specified compiler -pub fn create_c_test_program( +pub fn create_c_test_function( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, - header_files: &[&str], - target: &str, - c_target: &str, - notices: &str, - arch_specific_definitions: &[&str], -) -> String { +) -> std::io::Result<()> { + let indentation = Indentation::default(); + + writeln!(w, "int run_{}() {{", intrinsic.name())?; + + // Define the arrays of arguments. let arguments = intrinsic.arguments(); - let constraints = arguments - .iter() - .filter(|&i| i.has_constraint()) - .collect_vec(); + arguments.gen_arglists_c(w, indentation.nested(), PASSES)?; - let indentation = Indentation::default(); - format_c_main_template( - notices, - header_files, - c_target, - arch_specific_definitions, - intrinsic - .arguments() - .gen_arglists_c(indentation, PASSES) - .as_str(), - generate_c_constraint_blocks( - intrinsic, - indentation.nested(), - constraints.as_slice(), - Default::default(), - target, - ) - .as_str(), - ) + generate_c_constraint_blocks( + w, + intrinsic, + indentation.nested(), + &mut arguments.iter().rev().filter(|&i| i.has_constraint()), + Default::default(), + )?; + + writeln!(w, " return 0;")?; + writeln!(w, "}}")?; + + Ok(()) +} + +pub fn write_mod_cpp( + w: &mut impl std::io::Write, + notice: &str, + architecture: &str, + platform_headers: &[&str], + intrinsics: &[impl IntrinsicDefinition], +) -> std::io::Result<()> { + write!(w, "{notice}")?; + + for header in platform_headers { + writeln!(w, "#include <{header}>")?; + } + + writeln!( + w, + r#" +#include +#include +#include +#include + +template T1 cast(T2 x) {{ + static_assert(sizeof(T1) == sizeof(T2), "sizeof T1 and T2 must be the same"); + T1 ret{{}}; + memcpy(&ret, &x, sizeof(T1)); + return ret; +}} + +std::ostream& operator<<(std::ostream& os, float16_t value); + + + +"# + )?; + + writeln!(w, "#ifdef __{architecture}__")?; + writeln!( + w, + "std::ostream& operator<<(std::ostream& os, poly128_t value);" + )?; + writeln!(w, "#endif")?; + + for intrinsic in intrinsics { + create_c_test_function(w, intrinsic)?; + } + + Ok(()) +} + +pub fn write_main_cpp<'a>( + w: &mut impl std::io::Write, + architecture: &str, + arch_specific_definitions: &str, + intrinsics: impl Iterator + Clone, +) -> std::io::Result<()> { + writeln!(w, "#include ")?; + writeln!(w, "#include ")?; + + for header in ["arm_neon.h", "arm_acle.h", "arm_fp16.h"] { + writeln!(w, "#include <{header}>")?; + } + + writeln!( + w, + r#" +#include +#include +#include + +std::ostream& operator<<(std::ostream& os, float16_t value) {{ + uint16_t temp = 0; + memcpy(&temp, &value, sizeof(float16_t)); + std::stringstream ss; + ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp; + os << ss.str(); + return os; +}} +"# + )?; + + writeln!(w, "#ifdef __{architecture}__")?; + writeln!(w, "{arch_specific_definitions }")?; + writeln!(w, "#endif")?; + + for intrinsic in intrinsics.clone() { + writeln!(w, "extern int run_{intrinsic}(void);")?; + } + + writeln!(w, "int main(int argc, char **argv) {{")?; + writeln!(w, " std::string intrinsic_name = argv[1];")?; + + writeln!(w, " if (false) {{")?; + + for intrinsic in intrinsics { + writeln!(w, " }} else if (intrinsic_name == \"{intrinsic}\") {{")?; + writeln!(w, " return run_{intrinsic}();")?; + } + + writeln!(w, " }} else {{")?; + writeln!( + w, + " std::cerr << \"Unknown command: \" << intrinsic_name << \"\\n\";" + )?; + writeln!(w, " return -1;")?; + writeln!(w, " }}")?; + + writeln!(w, "}}")?; + + Ok(()) } diff --git a/crates/intrinsic-test/src/common/write_file.rs b/crates/intrinsic-test/src/common/write_file.rs index 0ba3e829a6..92dd70b7c5 100644 --- a/crates/intrinsic-test/src/common/write_file.rs +++ b/crates/intrinsic-test/src/common/write_file.rs @@ -1,5 +1,3 @@ -use super::gen_c::create_c_test_program; -use super::gen_c::setup_c_file_paths; use super::gen_rust::{create_rust_test_program, setup_rust_file_paths}; use super::intrinsic::IntrinsicDefinition; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -11,37 +9,6 @@ pub fn write_file(filename: &String, code: String) { file.write_all(code.into_bytes().as_slice()).unwrap(); } -pub fn write_c_testfiles( - intrinsics: &Vec<&dyn IntrinsicDefinition>, - target: &str, - c_target: &str, - headers: &[&str], - notice: &str, - arch_specific_definitions: &[&str], -) -> Vec { - let intrinsics_name_list = intrinsics - .iter() - .map(|i| i.name().clone()) - .collect::>(); - let filename_mapping = setup_c_file_paths(&intrinsics_name_list); - - intrinsics.iter().for_each(|&i| { - let c_code = create_c_test_program( - i, - headers, - target, - c_target, - notice, - arch_specific_definitions, - ); - if let Some(filename) = filename_mapping.get(&i.name()) { - write_file(filename, c_code) - }; - }); - - intrinsics_name_list -} - pub fn write_rust_testfiles( intrinsics: Vec<&dyn IntrinsicDefinition>, rust_target: &str, From 2f92542592c551b63bd44a97b05c5e0fc14619ed Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 12:54:04 +0200 Subject: [PATCH 07/10] combine rust files into one compilation --- crates/intrinsic-test/src/arm/mod.rs | 67 +++- crates/intrinsic-test/src/common/argument.rs | 34 +- crates/intrinsic-test/src/common/compare.rs | 37 ++- crates/intrinsic-test/src/common/compile_c.rs | 1 + crates/intrinsic-test/src/common/gen_rust.rs | 313 +++++++++--------- crates/intrinsic-test/src/common/mod.rs | 1 - .../intrinsic-test/src/common/write_file.rs | 33 -- 7 files changed, 244 insertions(+), 242 deletions(-) delete mode 100644 crates/intrinsic-test/src/common/write_file.rs diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 84290d2fd5..3c33e89d34 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -13,10 +13,9 @@ use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; use crate::common::compare::compare_outputs; use crate::common::gen_c::{write_main_cpp, write_mod_cpp}; -use crate::common::gen_rust::compile_rust_programs; -use crate::common::intrinsic::{Intrinsic, IntrinsicDefinition}; +use crate::common::gen_rust::{compile_rust_programs, write_cargo_toml, write_main_rs}; +use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; -use crate::common::write_file::write_rust_testfiles; use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices}; use intrinsic::ArmIntrinsicType; use json_parser::get_neon_intrinsics; @@ -110,30 +109,65 @@ impl SupportedArchitectureTest for ArmArchitectureTest { } fn build_rust_file(&self) -> bool { - let rust_target = if self.cli_options.target.contains("v7") { + std::fs::create_dir_all("rust_programs/src").unwrap(); + + let architecture = if self.cli_options.target.contains("v7") { "arm" } else { "aarch64" }; + + let available_parallelism = std::thread::available_parallelism().unwrap().get(); + let chunk_size = self.intrinsics.len().div_ceil(available_parallelism); + + let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); + write_cargo_toml(&mut cargo, &[]).unwrap(); + + let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); + write_main_rs( + &mut main_rs, + available_parallelism, + architecture, + AARCH_CONFIGURATIONS, + F16_FORMATTING_DEF, + self.intrinsics.iter().map(|i| i.name.as_str()), + ) + .unwrap(); + let target = &self.cli_options.target; let toolchain = self.cli_options.toolchain.as_deref(); let linker = self.cli_options.linker.as_deref(); - let intrinsics_name_list = write_rust_testfiles( - self.intrinsics - .iter() - .map(|i| i as &dyn IntrinsicDefinition<_>) - .collect::>(), - rust_target, - &build_notices("// "), - F16_FORMATTING_DEF, - AARCH_CONFIGURATIONS, - ); - compile_rust_programs(intrinsics_name_list, toolchain, target, linker) + let notice = &build_notices("// "); + self.intrinsics + .par_chunks(chunk_size) + .enumerate() + .map(|(i, chunk)| { + use std::io::Write; + + let rust_filename = format!("rust_programs/src/mod_{i}.rs"); + trace!("generating `{rust_filename}`"); + let mut file = File::create(rust_filename).unwrap(); + + write!(file, "{notice}")?; + + writeln!(file, "use core_arch::arch::{architecture}::*;")?; + writeln!(file, "use crate::{{debug_simd_finish, debug_f16}};")?; + + for intrinsic in chunk { + crate::common::gen_rust::create_rust_test_module(&mut file, intrinsic)?; + } + + Ok(()) + }) + .collect::>() + .unwrap(); + + compile_rust_programs(toolchain, target, linker) } fn compare_outputs(&self) -> bool { - if let Some(ref toolchain) = self.cli_options.toolchain { + if self.cli_options.toolchain.is_some() { let intrinsics_name_list = self .intrinsics .iter() @@ -142,7 +176,6 @@ impl SupportedArchitectureTest for ArmArchitectureTest { compare_outputs( &intrinsics_name_list, - toolchain, &self.cli_options.c_runner, &self.cli_options.target, ) diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 338f0d344a..b72c954f4a 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -146,21 +146,25 @@ where /// Creates a line for each argument that initializes an array for Rust from which `loads` argument /// values can be loaded as a sliding window, e.g `const A_VALS: [u32; 20] = [...];` - pub fn gen_arglists_rust(&self, indentation: Indentation, loads: u32) -> String { - self.iter() - .filter(|&arg| !arg.has_constraint()) - .map(|arg| { - format!( - "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};", - bind = arg.rust_vals_array_binding(), - name = arg.rust_vals_array_name(), - ty = arg.ty.rust_scalar_type(), - load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads, &Language::Rust) - ) - }) - .collect::>() - .join("\n") + pub fn gen_arglists_rust( + &self, + w: &mut impl std::io::Write, + indentation: Indentation, + loads: u32, + ) -> std::io::Result<()> { + for arg in self.iter().filter(|&arg| !arg.has_constraint()) { + writeln!( + w, + "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};", + bind = arg.rust_vals_array_binding(), + name = arg.rust_vals_array_name(), + ty = arg.ty.rust_scalar_type(), + load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, + values = arg.ty.populate_random(indentation, loads, &Language::Rust) + )? + } + + Ok(()) } /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index 0517437a89..183bb23ee0 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -2,27 +2,28 @@ use super::cli::FailureReason; use rayon::prelude::*; use std::process::Command; -pub fn compare_outputs( - intrinsic_name_list: &Vec, - toolchain: &str, - runner: &str, - target: &str, -) -> bool { +fn runner_command(runner: &str) -> Command { + let mut it = runner.split_whitespace(); + let mut cmd = Command::new(it.next().unwrap()); + cmd.args(it); + + cmd +} + +pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: &str) -> bool { let intrinsics = intrinsic_name_list .par_iter() .filter_map(|intrinsic_name| { - let c = Command::new("sh") - .arg("-c") - .arg(format!("{runner} ./c_programs/intrinsic-test-programs {intrinsic_name}")) + + let c = runner_command(runner) + .arg("intrinsic-test-programs") + .arg(intrinsic_name) + .current_dir("c_programs") .output(); - let rust = Command::new("sh") - .current_dir("rust_programs") - .arg("-c") - .arg(format!( - "cargo {toolchain} run --target {target} --bin {intrinsic_name} --release", - )) - .env("RUSTFLAGS", "-Cdebuginfo=0") + let rust = runner_command(runner) + .arg(format!("target/{target}/release/intrinsic-test-programs")) + .arg(intrinsic_name) .output(); let (c, rust) = match (c, rust) { @@ -32,7 +33,7 @@ pub fn compare_outputs( if !c.status.success() { error!( - "Failed to run C program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}", + "Failed to run C program for intrinsic `{intrinsic_name}`\nstdout: {stdout}\nstderr: {stderr}", stdout = std::str::from_utf8(&c.stdout).unwrap_or(""), stderr = std::str::from_utf8(&c.stderr).unwrap_or(""), ); @@ -41,7 +42,7 @@ pub fn compare_outputs( if !rust.status.success() { error!( - "Failed to run Rust program for intrinsic {intrinsic_name}\nstdout: {stdout}\nstderr: {stderr}", + "Failed to run Rust program for intrinsic `{intrinsic_name}`\nstdout: {stdout}\nstderr: {stderr}", stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""), stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""), ); diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs index 0bd42c530d..87c1946b7b 100644 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ b/crates/intrinsic-test/src/common/compile_c.rs @@ -209,6 +209,7 @@ impl CppCompilation { let linker_output = cmd.output()?; if !linker_output.status.success() { error!("custom linker failed"); + error!("{}", String::from_utf8_lossy(&linker_output.stderr)); return Ok(linker_output); } diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 52bccaf905..fb88e87d7e 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,8 +1,4 @@ use itertools::Itertools; -use rayon::prelude::*; -use std::collections::BTreeMap; -use std::fs::File; -use std::io::Write; use std::process::Command; use super::argument::Argument; @@ -13,88 +9,103 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. const PASSES: u32 = 20; -pub fn format_rust_main_template( - notices: &str, - definitions: &str, - configurations: &str, - arch_definition: &str, - arglists: &str, - passes: &str, -) -> String { - format!( - r#"{notices}#![feature(simd_ffi)] -#![feature(link_llvm_intrinsics)] -#![feature(f16)] -{configurations} -{definitions} - -use core_arch::arch::{arch_definition}::*; - -fn main() {{ -{arglists} -{passes} -}} -"#, - ) +pub fn write_cargo_toml(w: &mut impl std::io::Write, binaries: &[String]) -> std::io::Result<()> { + writeln!( + w, + concat!( + "[package]\n", + "name = \"intrinsic-test-programs\"\n", + "version = \"{version}\"\n", + "authors = [{authors}]\n", + "license = \"{license}\"\n", + "edition = \"2018\"\n", + "[workspace]\n", + "[dependencies]\n", + "core_arch = {{ path = \"../crates/core_arch\" }}", + ), + version = env!("CARGO_PKG_VERSION"), + authors = env!("CARGO_PKG_AUTHORS") + .split(":") + .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))), + license = env!("CARGO_PKG_LICENSE"), + )?; + + for binary in binaries { + writeln!( + w, + concat!( + "[[bin]]\n", + "name = \"{binary}\"\n", + "path = \"{binary}/main.rs\"\n", + ), + binary = binary, + )?; + } + + Ok(()) } -pub fn compile_rust_programs( - binaries: Vec, - toolchain: Option<&str>, - target: &str, - linker: Option<&str>, -) -> bool { - let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); - cargo - .write_all( - format!( - r#"[package] -name = "intrinsic-test-programs" -version = "{version}" -authors = [{authors}] -license = "{license}" -edition = "2018" -[workspace] -[dependencies] -core_arch = {{ path = "../crates/core_arch" }} -{binaries}"#, - version = env!("CARGO_PKG_VERSION"), - authors = env!("CARGO_PKG_AUTHORS") - .split(":") - .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))), - license = env!("CARGO_PKG_LICENSE"), - binaries = binaries - .iter() - .map(|binary| { - format!( - r#"[[bin]] -name = "{binary}" -path = "{binary}/main.rs""#, - ) - }) - .collect::>() - .join("\n") - ) - .into_bytes() - .as_slice(), - ) - .unwrap(); - - let toolchain = match toolchain { - None => return true, - Some(t) => t, - }; +pub fn write_main_rs<'a>( + w: &mut impl std::io::Write, + available_parallelism: usize, + architecture: &str, + cfg: &str, + definitions: &str, + intrinsics: impl Iterator + Clone, +) -> std::io::Result<()> { + writeln!(w, "#![feature(simd_ffi)]")?; + writeln!(w, "#![feature(f16)]")?; + writeln!(w, "#![allow(unused)]")?; + + // Cargo will spam the logs if these warnings are not silenced. + writeln!(w, "#![allow(non_upper_case_globals)]")?; + writeln!(w, "#![allow(non_camel_case_types)]")?; + writeln!(w, "#![allow(non_snake_case)]")?; + + writeln!(w, "{cfg}")?; + writeln!(w, "{definitions}")?; + + writeln!(w, "use core_arch::arch::{architecture}::*;")?; + + for module in 0..Ord::min(available_parallelism, intrinsics.clone().count()) { + writeln!(w, "mod mod_{module};")?; + writeln!(w, "use mod_{module}::*;")?; + } + + writeln!(w, "fn main() {{")?; + writeln!(w, " match std::env::args().nth(1).unwrap().as_str() {{")?; + + for binary in intrinsics { + writeln!(w, " \"{binary}\" => run_{binary}(),")?; + } + + writeln!( + w, + " other => panic!(\"unknown intrinsic `{{}}`\", other)," + )?; + + writeln!(w, " }}")?; + writeln!(w, "}}")?; + + Ok(()) +} + +pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Option<&str>) -> bool { /* If there has been a linker explicitly set from the command line then * we want to set it via setting it in the RUSTFLAGS*/ - let cargo_command = format!("cargo {toolchain} build --target {target} --release"); + trace!("Building cargo command"); + + let mut cargo_command = Command::new("cargo"); + cargo_command.current_dir("rust_programs"); - let mut command = Command::new("sh"); - command - .current_dir("rust_programs") - .arg("-c") - .arg(cargo_command); + if let Some(toolchain) = toolchain + && !toolchain.is_empty() + { + cargo_command.arg(toolchain); + } + cargo_command.args(["build", "--target", target, "--release"]); let mut rust_flags = "-Cdebuginfo=0".to_string(); if let Some(linker) = linker { @@ -102,11 +113,20 @@ path = "{binary}/main.rs""#, rust_flags.push_str(linker); rust_flags.push_str(" -C link-args=-static"); - command.env("CPPFLAGS", "-fuse-ld=lld"); + cargo_command.env("CPPFLAGS", "-fuse-ld=lld"); } - command.env("RUSTFLAGS", rust_flags); - let output = command.output(); + cargo_command.env("RUSTFLAGS", rust_flags); + + trace!("running cargo"); + + if log::log_enabled!(log::Level::Trace) { + cargo_command.stdout(std::process::Stdio::inherit()); + cargo_command.stderr(std::process::Stdio::inherit()); + } + + let output = cargo_command.output(); + trace!("cargo is done"); if let Ok(output) = output { if output.status.success() { @@ -125,26 +145,13 @@ path = "{binary}/main.rs""#, } } -// Creates directory structure and file path mappings -pub fn setup_rust_file_paths(identifiers: &Vec) -> BTreeMap<&String, String> { - identifiers - .par_iter() - .map(|identifier| { - let rust_dir = format!("rust_programs/{identifier}"); - let _ = std::fs::create_dir_all(&rust_dir); - let rust_filename = format!("{rust_dir}/main.rs"); - - (identifier, rust_filename) - }) - .collect::>() -} - pub fn generate_rust_test_loop( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, indentation: Indentation, additional: &str, passes: u32, -) -> String { +) -> std::io::Result<()> { let constraints = intrinsic.arguments().as_constraint_parameters_rust(); let constraints = if !constraints.is_empty() { format!("::<{constraints}>") @@ -155,7 +162,8 @@ pub fn generate_rust_test_loop( let return_value = format_f16_return_value(intrinsic); let indentation2 = indentation.nested(); let indentation3 = indentation2.nested(); - format!( + writeln!( + w, "{indentation}for i in 0..{passes} {{\n\ {indentation2}unsafe {{\n\ {loaded_args}\ @@ -170,74 +178,63 @@ pub fn generate_rust_test_loop( ) } -pub fn generate_rust_constraint_blocks( +fn generate_rust_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, indentation: Indentation, - constraints: &[&Argument], + constraints: &mut (impl Iterator> + Clone), name: String, -) -> String { - if let Some((current, constraints)) = constraints.split_last() { - let range = current - .constraint - .iter() - .map(|c| c.to_range()) - .flat_map(|r| r.into_iter()); - - let body_indentation = indentation.nested(); - range - .map(|i| { - format!( - "{indentation}{{\n\ - {body_indentation}const {name}: {ty} = {val};\n\ - {pass}\n\ - {indentation}}}", - name = current.name, - ty = current.ty.rust_type(), - val = i, - pass = generate_rust_constraint_blocks( - intrinsic, - body_indentation, - constraints, - format!("{name}-{i}") - ) - ) - }) - .join("\n") - } else { - generate_rust_test_loop(intrinsic, indentation, &name, PASSES) +) -> std::io::Result<()> { + let Some(current) = constraints.next() else { + return generate_rust_test_loop(w, intrinsic, indentation, &name, PASSES); + }; + + let body_indentation = indentation.nested(); + for i in current.constraint.iter().flat_map(|c| c.to_range()) { + let ty = current.ty.rust_type(); + + writeln!(w, "{indentation}{{")?; + + writeln!(w, "{body_indentation}const {}: {ty} = {i};", current.name)?; + + generate_rust_constraint_blocks( + w, + intrinsic, + body_indentation, + &mut constraints.clone(), + format!("{name}-{i}"), + )?; + + writeln!(w, "{indentation}}}")?; } + + Ok(()) } // Top-level function to create complete test program -pub fn create_rust_test_program( +pub fn create_rust_test_module( + w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, - target: &str, - notice: &str, - definitions: &str, - cfg: &str, -) -> String { +) -> std::io::Result<()> { + trace!("generating `{}`", intrinsic.name()); + let indentation = Indentation::default(); + + writeln!(w, "pub fn run_{}() {{", intrinsic.name())?; + + // Define the arrays of arguments. let arguments = intrinsic.arguments(); - let constraints = arguments - .iter() - .filter(|i| i.has_constraint()) - .collect_vec(); + arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?; - let indentation = Indentation::default(); - format_rust_main_template( - notice, - definitions, - cfg, - target, - intrinsic - .arguments() - .gen_arglists_rust(indentation.nested(), PASSES) - .as_str(), - generate_rust_constraint_blocks( - intrinsic, - indentation.nested(), - &constraints, - Default::default(), - ) - .as_str(), - ) + // Define any const generics as `const` items, then generate the actual test loop. + generate_rust_constraint_blocks( + w, + intrinsic, + indentation.nested(), + &mut arguments.iter().rev().filter(|i| i.has_constraint()), + Default::default(), + )?; + + writeln!(w, "}}")?; + + Ok(()) } diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs index 5d51d3460e..6c3154af38 100644 --- a/crates/intrinsic-test/src/common/mod.rs +++ b/crates/intrinsic-test/src/common/mod.rs @@ -11,7 +11,6 @@ pub mod indentation; pub mod intrinsic; pub mod intrinsic_helpers; pub mod values; -pub mod write_file; /// Architectures must support this trait /// to be successfully tested. diff --git a/crates/intrinsic-test/src/common/write_file.rs b/crates/intrinsic-test/src/common/write_file.rs deleted file mode 100644 index 92dd70b7c5..0000000000 --- a/crates/intrinsic-test/src/common/write_file.rs +++ /dev/null @@ -1,33 +0,0 @@ -use super::gen_rust::{create_rust_test_program, setup_rust_file_paths}; -use super::intrinsic::IntrinsicDefinition; -use super::intrinsic_helpers::IntrinsicTypeDefinition; -use std::fs::File; -use std::io::Write; - -pub fn write_file(filename: &String, code: String) { - let mut file = File::create(filename).unwrap(); - file.write_all(code.into_bytes().as_slice()).unwrap(); -} - -pub fn write_rust_testfiles( - intrinsics: Vec<&dyn IntrinsicDefinition>, - rust_target: &str, - notice: &str, - definitions: &str, - cfg: &str, -) -> Vec { - let intrinsics_name_list = intrinsics - .iter() - .map(|i| i.name().clone()) - .collect::>(); - let filename_mapping = setup_rust_file_paths(&intrinsics_name_list); - - intrinsics.iter().for_each(|&i| { - let rust_code = create_rust_test_program(i, rust_target, notice, definitions, cfg); - if let Some(filename) = filename_mapping.get(&i.name()) { - write_file(filename, rust_code) - } - }); - - intrinsics_name_list -} From 28e255cd9615d6a55cb89e184a6594c461b7c612 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 18:49:52 +0200 Subject: [PATCH 08/10] `intrinsic-test`: simplify linker logic just always compile .cpp to .o to maximize thread utilization, and only invoke the (maybe custom) linker at the very end --- crates/intrinsic-test/src/arm/compile.rs | 2 +- crates/intrinsic-test/src/arm/mod.rs | 50 ++++++--- crates/intrinsic-test/src/common/compile_c.rs | 104 +++--------------- 3 files changed, 52 insertions(+), 104 deletions(-) diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs index 03079a89cf..5572c240bc 100644 --- a/crates/intrinsic-test/src/arm/compile.rs +++ b/crates/intrinsic-test/src/arm/compile.rs @@ -1,7 +1,7 @@ use crate::common::cli::ProcessedCli; use crate::common::compile_c::{CompilationCommandBuilder, CppCompilation}; -pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { +pub fn configure_cpp_compiler(config: &ProcessedCli) -> Option { let cpp_compiler = config.cpp_compiler.as_ref()?; // -ffp-contract=off emulates Rust's approach of not fusing separate mul-add operations diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index 3c33e89d34..a1ed43eb1c 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -59,9 +59,10 @@ impl SupportedArchitectureTest for ArmArchitectureTest { let platform_headers = &["arm_neon.h", "arm_acle.h", "arm_fp16.h"]; let available_parallelism = std::thread::available_parallelism().unwrap().get(); - let chunk_size = self.intrinsics.len().div_ceil(available_parallelism); + let chunk_count = Ord::min(available_parallelism, self.intrinsics.len()); + let chunk_size = self.intrinsics.len().div_ceil(chunk_count); - let pipeline = compile::build_cpp_compilation(&self.cli_options).unwrap(); + let cpp_compiler = compile::configure_cpp_compiler(&self.cli_options).unwrap(); let notice = &build_notices("// "); self.intrinsics @@ -69,18 +70,23 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .enumerate() .map(|(i, chunk)| { let c_filename = format!("c_programs/mod_{i}.cpp"); + info!("writing {c_filename}"); let mut file = File::create(&c_filename).unwrap(); write_mod_cpp(&mut file, notice, c_target, platform_headers, chunk).unwrap(); // compile this cpp file into a .o file - let output = pipeline.run(&[], &[format!("mod_{i}.cpp")], &format!("mod_{i}.o"))?; + info!("compiling {c_filename}"); + let output = cpp_compiler + .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))?; assert!(output.status.success()); + info!("done compiling {c_filename}"); Ok(()) }) .collect::>() .unwrap(); + info!("writing main.cpp"); let mut file = File::create("c_programs/main.cpp").unwrap(); write_main_cpp( &mut file, @@ -90,21 +96,35 @@ impl SupportedArchitectureTest for ArmArchitectureTest { ) .unwrap(); - // Files to include in the final link step. - let mut includes = vec![]; - for i in 0..Ord::min(available_parallelism, self.intrinsics.len()) { - includes.push(format!("mod_{i}.o")); - } - - let output = pipeline - .run( - &includes, - &["main.cpp".to_string()], - "intrinsic-test-programs", - ) + // compile this cpp file into a .o file + info!("compiling main.cpp"); + let output = cpp_compiler + .compile_object_file("main.cpp", "intrinsic-test-programs.o") .unwrap(); assert!(output.status.success()); + let object_files = (0..chunk_count) + .map(|i| format!("mod_{i}.o")) + .chain(["intrinsic-test-programs.o".to_owned()]); + + let output = match &self.cli_options.linker { + Some(custom_linker) => { + let mut linker = std::process::Command::new(custom_linker); + + linker.current_dir("c_programs"); + + linker.args(object_files); + linker.args(["-o", "intrinsic-test-programs"]); + + info!("linking final C binary with {custom_linker}"); + linker.output() + } + None => cpp_compiler.link_executable(object_files, "intrinsic-test-programs"), + }; + + let output = output.unwrap(); + assert!(output.status.success()); + true } diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs index 87c1946b7b..e674e9642d 100644 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ b/crates/intrinsic-test/src/common/compile_c.rs @@ -104,30 +104,19 @@ impl CompilationCommandBuilder { cpp_compiler.arg(format!("--target={target}")); } - if let (Some(linker), Some(cxx_toolchain_dir)) = (&self.linker, &self.cxx_toolchain_dir) { + if let Some(cxx_toolchain_dir) = &self.cxx_toolchain_dir { cpp_compiler.args( self.include_paths .iter() - .map(|path| "--include-directory=".to_string() + cxx_toolchain_dir + path), + .map(|path| format!("--include-directory={cxx_toolchain_dir}{path}")), ); - - CppCompilation::CustomLinker { - cpp_compiler, - linker: linker.to_owned(), - } - } else { - CppCompilation::Simple(cpp_compiler) } + + CppCompilation(cpp_compiler) } } -pub enum CppCompilation { - Simple(std::process::Command), - CustomLinker { - cpp_compiler: std::process::Command, - linker: String, - }, -} +pub struct CppCompilation(std::process::Command); fn clone_command(command: &std::process::Command) -> std::process::Command { let mut cmd = std::process::Command::new(command.get_program()); @@ -144,85 +133,24 @@ fn clone_command(command: &std::process::Command) -> std::process::Command { } impl CppCompilation { - fn compile_cpp( - command: &std::process::Command, - includes: &[String], - inputs: &[String], + pub fn compile_object_file( + &self, + input: &str, output: &str, ) -> std::io::Result { - let mut cmd = clone_command(command); - cmd.args(includes); - cmd.args(inputs); - cmd.args(["-o", output]); - - if output.ends_with(".o") { - cmd.arg("-c"); - } - + let mut cmd = clone_command(&self.0); + cmd.args([input, "-c", "-o", output]); cmd.output() } - pub fn run( + pub fn link_executable( &self, - includes: &[String], - inputs: &[String], + inputs: impl Iterator, output: &str, ) -> std::io::Result { - match self { - CppCompilation::Simple(command) => Self::compile_cpp(command, includes, inputs, output), - CppCompilation::CustomLinker { cpp_compiler, .. } if output.ends_with(".o") => { - // No need to invoke that custom linker if we're creating an object file. - Self::compile_cpp(cpp_compiler, includes, inputs, output) - } - CppCompilation::CustomLinker { - cpp_compiler, - linker, - } => { - let object_file = &format!("{output}.o"); - - // Build an object file using the cpp compiler. - let mut cmd = clone_command(cpp_compiler); - cmd.args(inputs); - cmd.args(["-c", "-o", object_file]); - - let cpp_output = cmd.output()?; - if !cpp_output.status.success() { - error!("c++ compilaton failed"); - return Ok(cpp_output); - } - - trace!("using custom linker"); - - // Use the custom linker to turn the object file into an executable. - let mut cmd = std::process::Command::new(linker); - cmd.args(includes); - cmd.args([object_file, "-o", output]); - - if let Some(current_dir) = cpp_compiler.get_current_dir() { - cmd.current_dir(current_dir); - } - - for (key, val) in cpp_compiler.get_envs() { - cmd.env(key, val.unwrap_or_default()); - } - - let linker_output = cmd.output()?; - if !linker_output.status.success() { - error!("custom linker failed"); - error!("{}", String::from_utf8_lossy(&linker_output.stderr)); - return Ok(linker_output); - } - - trace!("removing {object_file}"); - let object_file_path = match cpp_compiler.get_current_dir() { - Some(current_dir) => &format!("{}/{object_file}", current_dir.display()), - None => object_file, - }; - - std::fs::remove_file(object_file_path)?; - - Ok(cpp_output) - } - } + let mut cmd = clone_command(&self.0); + cmd.args(inputs); + cmd.args(["-o", output]); + cmd.output() } } From bb9d602b8961557a76d55e1aa980ce536a8dda12 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 20:49:54 +0200 Subject: [PATCH 09/10] split rust code into crates so that we get more parallelism out of cargo --- crates/intrinsic-test/src/arm/config.rs | 1 - crates/intrinsic-test/src/arm/mod.rs | 41 +++++++++++++--- crates/intrinsic-test/src/common/compare.rs | 1 + crates/intrinsic-test/src/common/gen_rust.rs | 51 +++++++++++--------- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs index cee80374ae..9a7b37253d 100644 --- a/crates/intrinsic-test/src/arm/config.rs +++ b/crates/intrinsic-test/src/arm/config.rs @@ -114,7 +114,6 @@ pub const AARCH_CONFIGURATIONS: &str = r#" #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fcma))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_dotprod))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))] -#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sha3))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))] #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))] #![feature(fmt_helpers_for_derive)] diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs index a1ed43eb1c..e5b4d72f7c 100644 --- a/crates/intrinsic-test/src/arm/mod.rs +++ b/crates/intrinsic-test/src/arm/mod.rs @@ -13,7 +13,9 @@ use crate::common::SupportedArchitectureTest; use crate::common::cli::ProcessedCli; use crate::common::compare::compare_outputs; use crate::common::gen_c::{write_main_cpp, write_mod_cpp}; -use crate::common::gen_rust::{compile_rust_programs, write_cargo_toml, write_main_rs}; +use crate::common::gen_rust::{ + compile_rust_programs, write_bin_cargo_toml, write_lib_cargo_toml, write_main_rs, +}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use config::{AARCH_CONFIGURATIONS, F16_FORMATTING_DEF, build_notices}; @@ -137,19 +139,20 @@ impl SupportedArchitectureTest for ArmArchitectureTest { "aarch64" }; + // Experimentally, keeping 2 cores free is fastest for cargo. let available_parallelism = std::thread::available_parallelism().unwrap().get(); - let chunk_size = self.intrinsics.len().div_ceil(available_parallelism); + let chunk_count = Ord::min(available_parallelism, self.intrinsics.len()); + let chunk_size = self.intrinsics.len().div_ceil(chunk_count); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); - write_cargo_toml(&mut cargo, &[]).unwrap(); + write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); let mut main_rs = File::create("rust_programs/src/main.rs").unwrap(); write_main_rs( &mut main_rs, - available_parallelism, - architecture, + chunk_count, AARCH_CONFIGURATIONS, - F16_FORMATTING_DEF, + "", self.intrinsics.iter().map(|i| i.name.as_str()), ) .unwrap(); @@ -165,19 +168,41 @@ impl SupportedArchitectureTest for ArmArchitectureTest { .map(|(i, chunk)| { use std::io::Write; - let rust_filename = format!("rust_programs/src/mod_{i}.rs"); + std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; + + let rust_filename = format!("rust_programs/mod_{i}/src/lib.rs"); trace!("generating `{rust_filename}`"); let mut file = File::create(rust_filename).unwrap(); write!(file, "{notice}")?; + writeln!(file, "#![feature(simd_ffi)]")?; + writeln!(file, "#![feature(f16)]")?; + writeln!(file, "#![allow(unused)]")?; + + // Cargo will spam the logs if these warnings are not silenced. + writeln!(file, "#![allow(non_upper_case_globals)]")?; + writeln!(file, "#![allow(non_camel_case_types)]")?; + writeln!(file, "#![allow(non_snake_case)]")?; + + let cfg = AARCH_CONFIGURATIONS; + writeln!(file, "{cfg}")?; + writeln!(file, "use core_arch::arch::{architecture}::*;")?; - writeln!(file, "use crate::{{debug_simd_finish, debug_f16}};")?; + + let definitions = F16_FORMATTING_DEF; + writeln!(file, "{definitions}")?; for intrinsic in chunk { crate::common::gen_rust::create_rust_test_module(&mut file, intrinsic)?; } + let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); + trace!("generating `{toml_filename}`"); + let mut file = File::create(toml_filename).unwrap(); + + write_lib_cargo_toml(&mut file, &format!("mod_{i}"))?; + Ok(()) }) .collect::>() diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index 183bb23ee0..1ad00839ef 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -24,6 +24,7 @@ pub fn compare_outputs(intrinsic_name_list: &Vec, runner: &str, target: let rust = runner_command(runner) .arg(format!("target/{target}/release/intrinsic-test-programs")) .arg(intrinsic_name) + .current_dir("rust_programs") .output(); let (c, rust) = match (c, rust) { diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index fb88e87d7e..35b41a2239 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -9,46 +9,53 @@ use super::intrinsic_helpers::IntrinsicTypeDefinition; // The number of times each intrinsic will be called. const PASSES: u32 = 20; -pub fn write_cargo_toml(w: &mut impl std::io::Write, binaries: &[String]) -> std::io::Result<()> { +fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { writeln!( w, concat!( "[package]\n", - "name = \"intrinsic-test-programs\"\n", + "name = \"{name}\"\n", "version = \"{version}\"\n", "authors = [{authors}]\n", "license = \"{license}\"\n", "edition = \"2018\"\n", - "[workspace]\n", - "[dependencies]\n", - "core_arch = {{ path = \"../crates/core_arch\" }}", ), + name = name, version = env!("CARGO_PKG_VERSION"), authors = env!("CARGO_PKG_AUTHORS") .split(":") .format_with(", ", |author, fmt| fmt(&format_args!("\"{author}\""))), license = env!("CARGO_PKG_LICENSE"), - )?; + ) +} - for binary in binaries { - writeln!( - w, - concat!( - "[[bin]]\n", - "name = \"{binary}\"\n", - "path = \"{binary}/main.rs\"\n", - ), - binary = binary, - )?; +pub fn write_bin_cargo_toml( + w: &mut impl std::io::Write, + module_count: usize, +) -> std::io::Result<()> { + write_cargo_toml_header(w, "intrinsic-test-programs")?; + + writeln!(w, "[dependencies]")?; + + for i in 0..module_count { + writeln!(w, "mod_{i} = {{ path = \"mod_{i}/\" }}")?; } Ok(()) } +pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { + write_cargo_toml_header(w, name)?; + + writeln!(w, "[dependencies]")?; + writeln!(w, "core_arch = {{ path = \"../../crates/core_arch\" }}")?; + + Ok(()) +} + pub fn write_main_rs<'a>( w: &mut impl std::io::Write, - available_parallelism: usize, - architecture: &str, + chunk_count: usize, cfg: &str, definitions: &str, intrinsics: impl Iterator + Clone, @@ -65,10 +72,7 @@ pub fn write_main_rs<'a>( writeln!(w, "{cfg}")?; writeln!(w, "{definitions}")?; - writeln!(w, "use core_arch::arch::{architecture}::*;")?; - - for module in 0..Ord::min(available_parallelism, intrinsics.clone().count()) { - writeln!(w, "mod mod_{module};")?; + for module in 0..chunk_count { writeln!(w, "use mod_{module}::*;")?; } @@ -100,6 +104,9 @@ pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Opti let mut cargo_command = Command::new("cargo"); cargo_command.current_dir("rust_programs"); + // Do not use the target directory of the workspace please. + cargo_command.env("CARGO_TARGET_DIR", "target"); + if let Some(toolchain) = toolchain && !toolchain.is_empty() { From cbd82733ab1dcc14c2f6de85ac3513c370720535 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Fri, 11 Jul 2025 22:10:56 +0200 Subject: [PATCH 10/10] generate arrays of type-erased function pointers i.e. don't duplicate blocks with loop logic. This compiles quite a bit faster, I think mostly because it's just less code --- crates/intrinsic-test/src/arm/types.rs | 19 --- crates/intrinsic-test/src/common/argument.rs | 8 -- crates/intrinsic-test/src/common/gen_rust.rs | 126 ++++++++++-------- .../src/common/intrinsic_helpers.rs | 3 - 4 files changed, 73 insertions(+), 83 deletions(-) diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 9f3d6302f4..17c395fcac 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -33,25 +33,6 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } } - fn rust_type(&self) -> String { - let rust_prefix = self.0.kind.rust_prefix(); - let c_prefix = self.0.kind.c_prefix(); - if self.0.ptr_constant { - self.c_type() - } else if let (Some(bit_len), simd_len, vec_len) = - (self.0.bit_len, self.0.simd_len, self.0.vec_len) - { - match (simd_len, vec_len) { - (None, None) => format!("{rust_prefix}{bit_len}"), - (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), - (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), - (None, Some(_)) => todo!("{:#?}", self), // Likely an invalid case - } - } else { - todo!("{:#?}", self) - } - } - /// Determines the load function for this type. fn get_load_function(&self, language: Language) -> String { if let IntrinsicType { diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index b72c954f4a..bc2e1f2bec 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -114,14 +114,6 @@ where .join(", ") } - pub fn as_constraint_parameters_rust(&self) -> String { - self.iter() - .filter(|a| a.has_constraint()) - .map(|arg| arg.name.clone()) - .collect::>() - .join(", ") - } - /// Creates a line for each argument that initializes an array for C from which `loads` argument /// values can be loaded as a sliding window. /// e.g `const int32x2_t a_vals = {0x3effffff, 0x3effffff, 0x3f7fffff}`, if loads=2. diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 35b41a2239..bda5cefce5 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,7 +1,6 @@ use itertools::Itertools; use std::process::Command; -use super::argument::Argument; use super::indentation::Indentation; use super::intrinsic::{IntrinsicDefinition, format_f16_return_value}; use super::intrinsic_helpers::IntrinsicTypeDefinition; @@ -156,66 +155,87 @@ pub fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &dyn IntrinsicDefinition, indentation: Indentation, - additional: &str, + specializations: &[Vec], passes: u32, ) -> std::io::Result<()> { - let constraints = intrinsic.arguments().as_constraint_parameters_rust(); - let constraints = if !constraints.is_empty() { - format!("::<{constraints}>") - } else { - constraints - }; + let intrinsic_name = intrinsic.name(); + + // Each function (and each specialization) has its own type. Erase that type with a cast. + let mut coerce = String::from("unsafe fn("); + for _ in intrinsic.arguments().iter().filter(|a| !a.has_constraint()) { + coerce += "_, "; + } + coerce += ") -> _"; + + match specializations { + [] => { + writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; + } + [const_args] if const_args.is_empty() => { + writeln!(w, " let specializations = [(\"\", {intrinsic_name})];")?; + } + _ => { + writeln!(w, " let specializations = [")?; + + for specialization in specializations { + let mut specialization: Vec<_> = + specialization.iter().map(|d| d.to_string()).collect(); + + let const_args = specialization.join(","); + + // The identifier is reversed. + specialization.reverse(); + let id = specialization.join("-"); + + writeln!( + w, + " (\"-{id}\", {intrinsic_name}::<{const_args}> as {coerce})," + )?; + } + + writeln!(w, " ];")?; + } + } let return_value = format_f16_return_value(intrinsic); let indentation2 = indentation.nested(); let indentation3 = indentation2.nested(); writeln!( w, - "{indentation}for i in 0..{passes} {{\n\ - {indentation2}unsafe {{\n\ - {loaded_args}\ - {indentation3}let __return_value = {intrinsic_call}{const}({args});\n\ - {indentation3}println!(\"Result {additional}-{{}}: {{:?}}\", i + 1, {return_value});\n\ - {indentation2}}}\n\ - {indentation}}}", + "\ + for (id, f) in specializations {{\n\ + for i in 0..{passes} {{\n\ + unsafe {{\n\ + {loaded_args}\ + let __return_value = f({args});\n\ + println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});\n\ + }}\n\ + }}\n\ + }}", loaded_args = intrinsic.arguments().load_values_rust(indentation3), - intrinsic_call = intrinsic.name(), - const = constraints, args = intrinsic.arguments().as_call_param_rust(), ) } -fn generate_rust_constraint_blocks<'a, T: IntrinsicTypeDefinition + 'a>( - w: &mut impl std::io::Write, - intrinsic: &dyn IntrinsicDefinition, - indentation: Indentation, - constraints: &mut (impl Iterator> + Clone), - name: String, -) -> std::io::Result<()> { - let Some(current) = constraints.next() else { - return generate_rust_test_loop(w, intrinsic, indentation, &name, PASSES); - }; - - let body_indentation = indentation.nested(); - for i in current.constraint.iter().flat_map(|c| c.to_range()) { - let ty = current.ty.rust_type(); - - writeln!(w, "{indentation}{{")?; - - writeln!(w, "{body_indentation}const {}: {ty} = {i};", current.name)?; - - generate_rust_constraint_blocks( - w, - intrinsic, - body_indentation, - &mut constraints.clone(), - format!("{name}-{i}"), - )?; - - writeln!(w, "{indentation}}}")?; +/// Generate the specializations (unique sequences of const-generic arguments) for this intrinsic. +fn generate_rust_specializations<'a>( + constraints: &mut impl Iterator>, +) -> Vec> { + let mut specializations = vec![vec![]]; + + for constraint in constraints { + specializations = constraint + .flat_map(|right| { + specializations.iter().map(move |left| { + let mut left = left.clone(); + left.push(u8::try_from(right).unwrap()); + left + }) + }) + .collect(); } - Ok(()) + specializations } // Top-level function to create complete test program @@ -233,13 +253,13 @@ pub fn create_rust_test_module( arguments.gen_arglists_rust(w, indentation.nested(), PASSES)?; // Define any const generics as `const` items, then generate the actual test loop. - generate_rust_constraint_blocks( - w, - intrinsic, - indentation.nested(), - &mut arguments.iter().rev().filter(|i| i.has_constraint()), - Default::default(), - )?; + let specializations = generate_rust_specializations( + &mut arguments + .iter() + .filter_map(|i| i.constraint.as_ref().map(|v| v.to_range())), + ); + + generate_rust_test_loop(w, intrinsic, indentation, &specializations, PASSES)?; writeln!(w, "}}")?; diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs index 3d200b1946..fcd8b13069 100644 --- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -290,7 +290,4 @@ pub trait IntrinsicTypeDefinition: Deref { /// can be directly defined in `impl` blocks fn c_single_vector_type(&self) -> String; - - /// can be defined in `impl` blocks - fn rust_type(&self) -> String; }