Skip to content

Commit 99ff3fb

Browse files
committed
Auto merge of #149202 - ZuseZ4:automate-offload-clangs, r=oli-obk
automate offload, part 2 - clang calls This automates steps 2+3 (the clang invocations) of the rust offload usage pipeline. Now all that remains is a clang-linker-invocation after this step. r? oli-obk
2 parents 0bd13c3 + 8e1d803 commit 99ff3fb

File tree

11 files changed

+183
-44
lines changed

11 files changed

+183
-44
lines changed

compiler/rustc_codegen_llvm/messages.ftl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ codegen_llvm_lto_bitcode_from_rlib = failed to get bitcode from object file for
1919
codegen_llvm_mismatch_data_layout =
2020
data-layout for target `{$rustc_target}`, `{$rustc_layout}`, differs from LLVM target's `{$llvm_target}` default layout, `{$llvm_layout}`
2121
22-
codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=Enable
22+
codegen_llvm_offload_bundleimages_failed = call to BundleImages failed, `host.out` was not created
23+
codegen_llvm_offload_embed_failed = call to EmbedBufferInModule failed, `host.o` was not created
24+
codegen_llvm_offload_no_abs_path = using the `-Z offload=Host=/absolute/path/to/host.out` flag requires an absolute path
25+
codegen_llvm_offload_no_host_out = using the `-Z offload=Host=/absolute/path/to/host.out` flag must point to a `host.out` file
26+
codegen_llvm_offload_nonexisting = the given path/file to `host.out` does not exist. Did you forget to run the device compilation first?
27+
codegen_llvm_offload_without_enable = using the offload feature requires -Z offload=<Device or Host=/absolute/path/to/host.out>
2328
codegen_llvm_offload_without_fat_lto = using the offload feature requires -C lto=fat
2429
2530
codegen_llvm_parse_bitcode = failed to parse bitcode for LTO module

compiler/rustc_codegen_llvm/src/back/write.rs

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -703,10 +703,9 @@ pub(crate) unsafe fn llvm_optimize(
703703
llvm::set_value_name(new_fn, &name);
704704
}
705705

706-
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
706+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
707707
let cx =
708708
SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size);
709-
710709
for func in cx.get_functions() {
711710
let offload_kernel = "offload-kernel";
712711
if attributes::has_string_attr(func, offload_kernel) {
@@ -775,12 +774,77 @@ pub(crate) unsafe fn llvm_optimize(
775774
)
776775
};
777776

778-
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Enable) {
777+
if cgcx.target_is_like_gpu && config.offload.contains(&config::Offload::Device) {
778+
let device_path = cgcx.output_filenames.path(OutputType::Object);
779+
let device_dir = device_path.parent().unwrap();
780+
let device_out = device_dir.join("host.out");
781+
let device_out_c = path_to_c_string(device_out.as_path());
779782
unsafe {
780-
llvm::LLVMRustBundleImages(module.module_llvm.llmod(), module.module_llvm.tm.raw());
783+
// 1) Bundle device module into offload image host.out (device TM)
784+
let ok = llvm::LLVMRustBundleImages(
785+
module.module_llvm.llmod(),
786+
module.module_llvm.tm.raw(),
787+
device_out_c.as_ptr(),
788+
);
789+
if !ok || !device_out.exists() {
790+
dcx.emit_err(crate::errors::OffloadBundleImagesFailed);
791+
}
781792
}
782793
}
783794

795+
// This assumes that we previously compiled our kernels for a gpu target, which created a
796+
// `host.out` artifact. The user is supposed to provide us with a path to this artifact, we
797+
// don't need any other artifacts from the previous run. We will embed this artifact into our
798+
// LLVM-IR host module, to create a `host.o` ObjectFile, which we will write to disk.
799+
// The last, not yet automated steps uses the `clang-linker-wrapper` to process `host.o`.
800+
if !cgcx.target_is_like_gpu {
801+
if let Some(device_path) = config
802+
.offload
803+
.iter()
804+
.find_map(|o| if let config::Offload::Host(path) = o { Some(path) } else { None })
805+
{
806+
let device_pathbuf = PathBuf::from(device_path);
807+
if device_pathbuf.is_relative() {
808+
dcx.emit_err(crate::errors::OffloadWithoutAbsPath);
809+
} else if device_pathbuf
810+
.file_name()
811+
.and_then(|n| n.to_str())
812+
.is_some_and(|n| n != "host.out")
813+
{
814+
dcx.emit_err(crate::errors::OffloadWrongFileName);
815+
} else if !device_pathbuf.exists() {
816+
dcx.emit_err(crate::errors::OffloadNonexistingPath);
817+
}
818+
let host_path = cgcx.output_filenames.path(OutputType::Object);
819+
let host_dir = host_path.parent().unwrap();
820+
let out_obj = host_dir.join("host.o");
821+
let host_out_c = path_to_c_string(device_pathbuf.as_path());
822+
823+
// 2) Finalize host: lib.bc + host.out -> host.o (host TM)
824+
// We create a full clone of our LLVM host module, since we will embed the device IR
825+
// into it, and this might break caching or incremental compilation otherwise.
826+
let llmod2 = llvm::LLVMCloneModule(module.module_llvm.llmod());
827+
let ok =
828+
unsafe { llvm::LLVMRustOffloadEmbedBufferInModule(llmod2, host_out_c.as_ptr()) };
829+
if !ok {
830+
dcx.emit_err(crate::errors::OffloadEmbedFailed);
831+
}
832+
write_output_file(
833+
dcx,
834+
module.module_llvm.tm.raw(),
835+
config.no_builtins,
836+
llmod2,
837+
&out_obj,
838+
None,
839+
llvm::FileType::ObjectFile,
840+
&cgcx.prof,
841+
true,
842+
);
843+
// We ignore cgcx.save_temps here and unconditionally always keep our `host.out` artifact.
844+
// Otherwise, recompiling the host code would fail since we deleted that device artifact
845+
// in the previous host compilation, which would be confusing at best.
846+
}
847+
}
784848
result.into_result().unwrap_or_else(|()| llvm_err(dcx, LlvmError::RunLlvmPasses))
785849
}
786850

compiler/rustc_codegen_llvm/src/base.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ pub(crate) fn compile_codegen_unit(
9393
// They are necessary for correct offload execution. We do this here to simplify the
9494
// `offload` intrinsic, avoiding the need for tracking whether it's the first
9595
// intrinsic call or not.
96-
if cx.sess().opts.unstable_opts.offload.contains(&Offload::Enable)
97-
&& !cx.sess().target.is_like_gpu
98-
{
96+
let has_host_offload =
97+
cx.sess().opts.unstable_opts.offload.iter().any(|o| matches!(o, Offload::Host(_)));
98+
if has_host_offload && !cx.sess().target.is_like_gpu {
9999
cx.offload_globals.replace(Some(OffloadGlobals::declare(&cx)));
100100
}
101101

compiler/rustc_codegen_llvm/src/errors.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,26 @@ pub(crate) struct OffloadWithoutEnable;
5252
#[diag(codegen_llvm_offload_without_fat_lto)]
5353
pub(crate) struct OffloadWithoutFatLTO;
5454

55+
#[derive(Diagnostic)]
56+
#[diag(codegen_llvm_offload_no_abs_path)]
57+
pub(crate) struct OffloadWithoutAbsPath;
58+
59+
#[derive(Diagnostic)]
60+
#[diag(codegen_llvm_offload_no_host_out)]
61+
pub(crate) struct OffloadWrongFileName;
62+
63+
#[derive(Diagnostic)]
64+
#[diag(codegen_llvm_offload_nonexisting)]
65+
pub(crate) struct OffloadNonexistingPath;
66+
67+
#[derive(Diagnostic)]
68+
#[diag(codegen_llvm_offload_bundleimages_failed)]
69+
pub(crate) struct OffloadBundleImagesFailed;
70+
71+
#[derive(Diagnostic)]
72+
#[diag(codegen_llvm_offload_embed_failed)]
73+
pub(crate) struct OffloadEmbedFailed;
74+
5575
#[derive(Diagnostic)]
5676
#[diag(codegen_llvm_lto_bitcode_from_rlib)]
5777
pub(crate) struct LtoBitcodeFromRlib {

compiler/rustc_codegen_llvm/src/intrinsic.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
202202
return Ok(());
203203
}
204204
sym::offload => {
205-
if !tcx
206-
.sess
207-
.opts
208-
.unstable_opts
209-
.offload
210-
.contains(&rustc_session::config::Offload::Enable)
211-
{
205+
if tcx.sess.opts.unstable_opts.offload.is_empty() {
212206
let _ = tcx.dcx().emit_almost_fatal(OffloadWithoutEnable);
213207
}
214208

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,7 +1666,15 @@ mod Offload {
16661666
use super::*;
16671667
unsafe extern "C" {
16681668
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
1669-
pub(crate) fn LLVMRustBundleImages<'a>(M: &'a Module, TM: &'a TargetMachine) -> bool;
1669+
pub(crate) fn LLVMRustBundleImages<'a>(
1670+
M: &'a Module,
1671+
TM: &'a TargetMachine,
1672+
host_out: *const c_char,
1673+
) -> bool;
1674+
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
1675+
_M: &'a Module,
1676+
_host_out: *const c_char,
1677+
) -> bool;
16701678
pub(crate) fn LLVMRustOffloadMapper<'a>(OldFn: &'a Value, NewFn: &'a Value);
16711679
}
16721680
}
@@ -1680,7 +1688,17 @@ mod Offload_fallback {
16801688
/// Processes the module and writes it in an offload compatible way into a "host.out" file.
16811689
/// Marked as unsafe to match the real offload wrapper which is unsafe due to FFI.
16821690
#[allow(unused_unsafe)]
1683-
pub(crate) unsafe fn LLVMRustBundleImages<'a>(_M: &'a Module, _TM: &'a TargetMachine) -> bool {
1691+
pub(crate) unsafe fn LLVMRustBundleImages<'a>(
1692+
_M: &'a Module,
1693+
_TM: &'a TargetMachine,
1694+
_host_out: *const c_char,
1695+
) -> bool {
1696+
unimplemented!("This rustc version was not built with LLVM Offload support!");
1697+
}
1698+
pub(crate) unsafe fn LLVMRustOffloadEmbedBufferInModule<'a>(
1699+
_M: &'a Module,
1700+
_host_out: *const c_char,
1701+
) -> bool {
16841702
unimplemented!("This rustc version was not built with LLVM Offload support!");
16851703
}
16861704
#[allow(unused_unsafe)]

compiler/rustc_interface/src/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ fn test_unstable_options_tracking_hash() {
837837
tracked!(no_profiler_runtime, true);
838838
tracked!(no_trait_vptr, true);
839839
tracked!(no_unique_section_names, true);
840-
tracked!(offload, vec![Offload::Enable]);
840+
tracked!(offload, vec![Offload::Device]);
841841
tracked!(on_broken_pipe, OnBrokenPipe::Kill);
842842
tracked!(osx_rpath_install_name, true);
843843
tracked!(packed_bundled_libs, true);

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@
4343
// available. As such, we only try to build it in the first place, if
4444
// llvm.offload is enabled.
4545
#ifdef OFFLOAD
46+
#include "llvm/Bitcode/BitcodeReader.h"
4647
#include "llvm/Object/OffloadBinary.h"
4748
#include "llvm/Target/TargetMachine.h"
49+
#include "llvm/Transforms/Utils/ModuleUtils.h"
4850
#endif
4951

5052
// for raw `write` in the bad-alloc handler
@@ -174,12 +176,13 @@ static Error writeFile(StringRef Filename, StringRef Data) {
174176
// --image=file=device.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp
175177
// The input module is the rust code compiled for a gpu target like amdgpu.
176178
// Based on clang/tools/clang-offload-packager/ClangOffloadPackager.cpp
177-
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
179+
extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM,
180+
const char *HostOutPath) {
178181
std::string Storage;
179182
llvm::raw_string_ostream OS1(Storage);
180183
llvm::WriteBitcodeToFile(*unwrap(M), OS1);
181184
OS1.flush();
182-
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "module.bc");
185+
auto MB = llvm::MemoryBuffer::getMemBufferCopy(Storage, "device.bc");
183186

184187
SmallVector<char, 1024> BinaryData;
185188
raw_svector_ostream OS2(BinaryData);
@@ -188,19 +191,38 @@ extern "C" bool LLVMRustBundleImages(LLVMModuleRef M, TargetMachine &TM) {
188191
ImageBinary.TheImageKind = object::IMG_Bitcode;
189192
ImageBinary.Image = std::move(MB);
190193
ImageBinary.TheOffloadKind = object::OFK_OpenMP;
191-
ImageBinary.StringData["triple"] = TM.getTargetTriple().str();
192-
ImageBinary.StringData["arch"] = TM.getTargetCPU();
194+
195+
std::string TripleStr = TM.getTargetTriple().str();
196+
llvm::StringRef CPURef = TM.getTargetCPU();
197+
ImageBinary.StringData["triple"] = TripleStr;
198+
ImageBinary.StringData["arch"] = CPURef;
193199
llvm::SmallString<0> Buffer = OffloadBinary::write(ImageBinary);
194200
if (Buffer.size() % OffloadBinary::getAlignment() != 0)
195201
// Offload binary has invalid size alignment
196202
return false;
197203
OS2 << Buffer;
198-
if (Error E = writeFile("host.out",
204+
if (Error E = writeFile(HostOutPath,
199205
StringRef(BinaryData.begin(), BinaryData.size())))
200206
return false;
201207
return true;
202208
}
203209

210+
extern "C" bool LLVMRustOffloadEmbedBufferInModule(LLVMModuleRef HostM,
211+
const char *HostOutPath) {
212+
auto MBOrErr = MemoryBuffer::getFile(HostOutPath);
213+
if (!MBOrErr) {
214+
auto E = MBOrErr.getError();
215+
auto _B = errorCodeToError(E);
216+
return false;
217+
}
218+
MemoryBufferRef Buf = (*MBOrErr)->getMemBufferRef();
219+
Module *M = unwrap(HostM);
220+
StringRef SectionName = ".llvm.offloading";
221+
Align Alignment = Align(8);
222+
llvm::embedBufferInModule(*M, Buf, SectionName, Alignment);
223+
return true;
224+
}
225+
204226
extern "C" void LLVMRustOffloadMapper(LLVMValueRef OldFn, LLVMValueRef NewFn) {
205227
llvm::Function *oldFn = llvm::unwrap<llvm::Function>(OldFn);
206228
llvm::Function *newFn = llvm::unwrap<llvm::Function>(NewFn);

compiler/rustc_session/src/config.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,12 @@ pub enum CoverageLevel {
190190
}
191191

192192
// The different settings that the `-Z offload` flag can have.
193-
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
193+
#[derive(Clone, PartialEq, Hash, Debug)]
194194
pub enum Offload {
195-
/// Enable the llvm offload pipeline
196-
Enable,
195+
/// Entry point for `std::offload`, enables kernel compilation for a gpu device
196+
Device,
197+
/// Second step in the offload pipeline, generates the host code to call kernels.
198+
Host(String),
197199
}
198200

199201
/// The different settings that the `-Z autodiff` flag can have.
@@ -2578,9 +2580,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
25782580
)
25792581
}
25802582

2581-
if !nightly_options::is_unstable_enabled(matches)
2582-
&& unstable_opts.offload.contains(&Offload::Enable)
2583-
{
2583+
if !nightly_options::is_unstable_enabled(matches) && !unstable_opts.offload.is_empty() {
25842584
early_dcx.early_fatal(
25852585
"`-Zoffload=Enable` also requires `-Zunstable-options` \
25862586
and a nightly compiler",

compiler/rustc_session/src/options.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1451,8 +1451,27 @@ pub mod parse {
14511451
let mut v: Vec<&str> = v.split(",").collect();
14521452
v.sort_unstable();
14531453
for &val in v.iter() {
1454-
let variant = match val {
1455-
"Enable" => Offload::Enable,
1454+
// Split each entry on '=' if it has an argument
1455+
let (key, arg) = match val.split_once('=') {
1456+
Some((k, a)) => (k, Some(a)),
1457+
None => (val, None),
1458+
};
1459+
1460+
let variant = match key {
1461+
"Host" => {
1462+
if let Some(p) = arg {
1463+
Offload::Host(p.to_string())
1464+
} else {
1465+
return false;
1466+
}
1467+
}
1468+
"Device" => {
1469+
if let Some(_) = arg {
1470+
// Device does not accept a value
1471+
return false;
1472+
}
1473+
Offload::Device
1474+
}
14561475
_ => {
14571476
// FIXME(ZuseZ4): print an error saying which value is not recognized
14581477
return false;

0 commit comments

Comments
 (0)