From 02cacc08f27634bffd1ae0f4dfafb7ba2a5bd829 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 13:47:07 +0300 Subject: [PATCH 1/5] spv/lift: elide empty targets of an `if`-`else`/`switch`, as allowed by SPIR-V. --- src/spv/lift.rs | 62 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 5bbb655..c0c8348 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -863,22 +863,72 @@ impl<'a> FuncLifting<'a> { // HACK(eddyb) this takes advantage of `blocks` being an `IndexMap`, // to iterate at the same time as mutating other entries. for block_idx in (0..blocks.len()).rev() { - let BlockLifting { terminator: original_terminator, .. } = &blocks[block_idx]; + // HACK(eddyb) elide empty cases of an `if`-`else`/`switch`, as + // SPIR-V allows their targets to just be the whole merge block + // (the same one that `OpSelectionMerge` describes). + let block = &blocks[block_idx]; + if let (cfg::ControlInstKind::SelectBranch(_), Some(Merge::Selection(merge_point))) = + (&*block.terminator.kind, block.terminator.merge) + { + for target_idx in 0..block.terminator.targets.len() { + let block = &blocks[block_idx]; + let target = block.terminator.targets[target_idx]; + if !block + .terminator + .target_phi_values + .get(&target) + .copied() + .unwrap_or_default() + .is_empty() + { + continue; + } + + let target_is_trivial_branch = { + let BlockLifting { + phis, + insts, + terminator: + Terminator { attrs, kind, inputs, targets, target_phi_values, merge }, + } = &blocks[&target]; + + (phis.is_empty() + && insts.iter().all(|insts| insts.is_empty()) + && *attrs == AttrSet::default() + && matches!(**kind, cfg::ControlInstKind::Branch) + && inputs.is_empty() + && targets.len() == 1 + && target_phi_values.is_empty() + && merge.is_none()) + .then(|| targets[0]) + }; + if let Some(target_of_target) = target_is_trivial_branch { + // FIXME(eddyb) what does it mean for this to not be true? + // (can it even happen?) + if target_of_target == merge_point { + blocks[block_idx].terminator.targets[target_idx] = target_of_target; + *use_counts.get_mut(&target).unwrap() -= 1; + *use_counts.get_mut(&target_of_target).unwrap() += 1; + } + } + } + } + let block = &blocks[block_idx]; let is_trivial_branch = { let Terminator { attrs, kind, inputs, targets, target_phi_values, merge } = - original_terminator; + &block.terminator; - *attrs == AttrSet::default() + (*attrs == AttrSet::default() && matches!(**kind, cfg::ControlInstKind::Branch) && inputs.is_empty() && targets.len() == 1 && target_phi_values.is_empty() - && merge.is_none() + && merge.is_none()) + .then(|| targets[0]) }; - if is_trivial_branch { - let target = original_terminator.targets[0]; + if let Some(target) = is_trivial_branch { let target_use_count = use_counts.get_mut(&target).unwrap(); if *target_use_count == 1 { From d3ae59f138f51d96c8b95665566afcb22356e91e Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 2/5] Split a new `mem` module (for "memory" ops/passes/etc.), out of `qptr`. --- examples/spv-lower-link-qptr-lift.rs | 12 +- src/lib.rs | 13 +- src/{qptr => mem}/analyze.rs | 646 +++++++++++++-------------- src/{qptr => mem}/layout.rs | 28 +- src/mem/mod.rs | 151 +++++++ src/{qptr => mem}/shapes.rs | 7 +- src/passes/qptr.rs | 10 +- src/print/mod.rs | 251 ++++++----- src/qptr/lift.rs | 122 ++--- src/qptr/lower.rs | 17 +- src/qptr/mod.rs | 113 +---- src/spv/lift.rs | 10 +- src/transform.rs | 79 ++-- src/visit.rs | 55 +-- 14 files changed, 810 insertions(+), 704 deletions(-) rename src/{qptr => mem}/analyze.rs (69%) rename src/{qptr => mem}/layout.rs (97%) create mode 100644 src/mem/mod.rs rename src/{qptr => mem}/shapes.rs (93%) diff --git a/examples/spv-lower-link-qptr-lift.rs b/examples/spv-lower-link-qptr-lift.rs index d4b788d..f71238b 100644 --- a/examples/spv-lower-link-qptr-lift.rs +++ b/examples/spv-lower-link-qptr-lift.rs @@ -80,10 +80,10 @@ fn main() -> std::io::Result<()> { after_pass("", &module)?; // HACK(eddyb) this is roughly what Rust-GPU would need. - let layout_config = &spirt::qptr::LayoutConfig { + let layout_config = &spirt::mem::LayoutConfig { abstract_bool_size_align: (1, 1), logical_ptr_size_align: (4, 4), - ..spirt::qptr::LayoutConfig::VULKAN_SCALAR_LAYOUT + ..spirt::mem::LayoutConfig::VULKAN_SCALAR_LAYOUT }; eprint_duration(|| { @@ -92,9 +92,11 @@ fn main() -> std::io::Result<()> { eprintln!("qptr::lower_from_spv_ptrs"); after_pass("qptr::lower_from_spv_ptrs", &module)?; - eprint_duration(|| spirt::passes::qptr::analyze_uses(&mut module, layout_config)); - eprintln!("qptr::analyze_uses"); - after_pass("qptr::analyze_uses", &module)?; + eprint_duration(|| { + spirt::passes::qptr::analyze_mem_accesses(&mut module, layout_config) + }); + eprintln!("mem::analyze_accesses"); + after_pass("mem::analyze_accesses", &module)?; eprint_duration(|| spirt::passes::qptr::lift_to_spv_ptrs(&mut module, layout_config)); eprintln!("qptr::lift_to_spv_ptrs"); diff --git a/src/lib.rs b/src/lib.rs index 4f1b084..2dc726d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -169,6 +169,7 @@ pub mod passes { pub mod link; pub mod qptr; } +pub mod mem; pub mod qptr; pub mod spv; @@ -397,6 +398,10 @@ pub enum Attr { // of `AttrSetDef::{dbg_src_loc,set_dbg_src_loc}`. DbgSrcLoc(OrdAssertEq), + /// Memory-specific attributes (see [`mem::MemAttr`]). + #[from] + Mem(mem::MemAttr), + /// `QPtr`-specific attributes (see [`qptr::QPtrAttr`]). #[from] QPtr(qptr::QPtrAttr), @@ -489,7 +494,7 @@ pub enum DiagMsgPart { Attrs(AttrSet), Type(Type), Const(Const), - QPtrUsage(qptr::QPtrUsage), + MemAccesses(mem::MemAccesses), } /// Wrapper to limit `Ord` for interned index types (e.g. [`InternedStr`]) @@ -638,7 +643,7 @@ pub struct GlobalVarDecl { /// When `type_of_ptr_to` is `QPtr`, `shape` must be used to describe the /// global variable (see `GlobalVarShape`'s documentation for more details). - pub shape: Option, + pub shape: Option, /// The address space the global variable will be allocated into. pub addr_space: AddrSpace, @@ -947,6 +952,10 @@ pub enum DataInstKind { // to avoid needing special handling for recursion where it's impossible. FuncCall(Func), + /// Memory-specific operations (see [`mem::MemOp`]). + #[from] + Mem(mem::MemOp), + /// `QPtr`-specific operations (see [`qptr::QPtrOp`]). #[from] QPtr(qptr::QPtrOp), diff --git a/src/qptr/analyze.rs b/src/mem/analyze.rs similarity index 69% rename from src/qptr/analyze.rs rename to src/mem/analyze.rs index f1f1e23..f9b8b28 100644 --- a/src/qptr/analyze.rs +++ b/src/mem/analyze.rs @@ -1,10 +1,10 @@ -//! [`QPtr`](crate::TypeKind::QPtr) usage analysis (for legalizing/lifting). +//! Memory access analysis (for "type recovery", i.e. untyped -> typed memory). +// +// TODO(eddyb) consider renaming this to `mem::typed`. -// HACK(eddyb) sharing layout code with other modules. -use super::{QPtrMemUsageKind, layout::*}; - -use super::{QPtrAttr, QPtrMemUsage, QPtrOp, QPtrUsage, shapes}; use crate::func_at::FuncAt; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp, shapes}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::visit::{InnerVisit, Visitor}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstKind, Context, DataInst, DataInstKind, @@ -19,14 +19,18 @@ use std::num::NonZeroU32; use std::ops::Bound; use std::rc::Rc; +// HACK(eddyb) sharing layout code with other modules. +// FIXME(eddyb) can this just be a non-glob import? +use crate::mem::layout::*; + #[derive(Clone)] struct AnalysisError(Diag); -struct UsageMerger<'a> { +struct AccessMerger<'a> { layout_cache: &'a LayoutCache<'a>, } -/// Result type for `UsageMerger` methods - unlike `Result`, +/// Result type for `AccessMerger` methods - unlike `Result`, /// this always keeps the `T` value, even in the case of an error. struct MergeResult { merged: T, @@ -53,28 +57,28 @@ impl MergeResult { } } -impl UsageMerger<'_> { - fn merge(&self, a: QPtrUsage, b: QPtrUsage) -> MergeResult { +impl AccessMerger<'_> { + fn merge(&self, a: MemAccesses, b: MemAccesses) -> MergeResult { match (a, b) { ( - QPtrUsage::Handles(shapes::Handle::Opaque(a)), - QPtrUsage::Handles(shapes::Handle::Opaque(b)), - ) if a == b => MergeResult::ok(QPtrUsage::Handles(shapes::Handle::Opaque(a))), + MemAccesses::Handles(shapes::Handle::Opaque(a)), + MemAccesses::Handles(shapes::Handle::Opaque(b)), + ) if a == b => MergeResult::ok(MemAccesses::Handles(shapes::Handle::Opaque(a))), ( - QPtrUsage::Handles(shapes::Handle::Buffer(a_as, a)), - QPtrUsage::Handles(shapes::Handle::Buffer(b_as, b)), + MemAccesses::Handles(shapes::Handle::Buffer(a_as, a)), + MemAccesses::Handles(shapes::Handle::Buffer(b_as, b)), ) => { // HACK(eddyb) the `AddrSpace` field is entirely redundant. assert!(a_as == AddrSpace::Handles && b_as == AddrSpace::Handles); - self.merge_mem(a, b).map(|usage| { - QPtrUsage::Handles(shapes::Handle::Buffer(AddrSpace::Handles, usage)) + self.merge_data(a, b).map(|happ| { + MemAccesses::Handles(shapes::Handle::Buffer(AddrSpace::Handles, happ)) }) } - (QPtrUsage::Memory(a), QPtrUsage::Memory(b)) => { - self.merge_mem(a, b).map(QPtrUsage::Memory) + (MemAccesses::Data(a), MemAccesses::Data(b)) => { + self.merge_data(a, b).map(MemAccesses::Data) } (a, b) => { @@ -94,9 +98,9 @@ impl UsageMerger<'_> { } } - fn merge_mem(&self, a: QPtrMemUsage, b: QPtrMemUsage) -> MergeResult { - // NOTE(eddyb) this is possible because it's currently impossible for - // the merged usage to be outside the bounds of *both* `a` and `b`. + fn merge_data(&self, a: DataHapp, b: DataHapp) -> MergeResult { + // NOTE(eddyb) this is doable because it's currently impossible for + // the merged HAPP to be outside the bounds of *both* `a` and `b`. let max_size = match (a.max_size, b.max_size) { (Some(a), Some(b)) => Some(a.max(b)), (None, _) | (_, None) => None, @@ -107,14 +111,14 @@ impl UsageMerger<'_> { // to make it easier to handle all the possible interactions below, // by skipping (or deprioritizing, if supported) the "wrong direction". let mut sorted = [a, b]; - sorted.sort_by_key(|usage| { + sorted.sort_by_key(|happ| { #[derive(PartialEq, Eq, PartialOrd, Ord)] enum MaxSize { Fixed(T), // FIXME(eddyb) this probably needs to track "min size"? Dynamic, } - let max_size = usage.max_size.map_or(MaxSize::Dynamic, MaxSize::Fixed); + let max_size = happ.max_size.map_or(MaxSize::Dynamic, MaxSize::Fixed); // When sizes are equal, pick the more restrictive side. #[derive(PartialEq, Eq, PartialOrd, Ord)] @@ -124,21 +128,21 @@ impl UsageMerger<'_> { Exact, } #[allow(clippy::match_same_arms)] - let type_strictness = match usage.kind { - QPtrMemUsageKind::Unused | QPtrMemUsageKind::OffsetBase(_) => TypeStrictness::Any, + let type_strictness = match happ.kind { + DataHappKind::Dead | DataHappKind::Disjoint(_) => TypeStrictness::Any, - QPtrMemUsageKind::DynOffsetBase { .. } => TypeStrictness::Array, + DataHappKind::Repeated { .. } => TypeStrictness::Array, // FIXME(eddyb) this should be `Any`, even if in theory it // could contain arrays or structs that need decomposition // (note that, for typed reads/write, arrays do not need to be // *indexed* to work, i.e. they *do not* require `DynOffset`s, - // `Offset`s suffice, and for them `DynOffsetBase` is at most - // a "run-length"/deduplication optimization over `OffsetBase`). - // NOTE(eddyb) this should still prefer `OpTypeVector` over `DynOffsetBase`! - QPtrMemUsageKind::DirectAccess(_) => TypeStrictness::Exact, + // `Offset`s suffice, and for them `Repeated` is at most + // a "run-length"/deduplication optimization over `Disjoint`). + // NOTE(eddyb) this should still prefer `OpTypeVector` over `Repeated`! + DataHappKind::Direct(_) => TypeStrictness::Exact, - QPtrMemUsageKind::StrictlyTyped(_) => TypeStrictness::Exact, + DataHappKind::StrictlyTyped(_) => TypeStrictness::Exact, }; (max_size, type_strictness) @@ -146,33 +150,28 @@ impl UsageMerger<'_> { let [b, a] = sorted; assert_eq!(max_size, a.max_size); - self.merge_mem_at(a, 0, b) + self.merge_data_at(a, 0, b) } // FIXME(eddyb) make the name of this clarify the asymmetric effect, something // like "make `a` compatible with `offset => b`". - fn merge_mem_at( - &self, - a: QPtrMemUsage, - b_offset_in_a: u32, - b: QPtrMemUsage, - ) -> MergeResult { - // NOTE(eddyb) this is possible because it's currently impossible for - // the merged usage to be outside the bounds of *both* `a` and `b`. + fn merge_data_at(&self, a: DataHapp, b_offset_in_a: u32, b: DataHapp) -> MergeResult { + // NOTE(eddyb) this is doable because it's currently impossible for + // the merged HAPP to be outside the bounds of *both* `a` and `b`. let max_size = match (a.max_size, b.max_size) { (Some(a), Some(b)) => Some(a.max(b.checked_add(b_offset_in_a).unwrap())), (None, _) | (_, None) => None, }; - // HACK(eddyb) we require biased `a` vs `b` (see `merge_mem` method above). + // HACK(eddyb) we require biased `a` vs `b` (see `merge_data` method above). assert_eq!(max_size, a.max_size); // Decompose the "smaller" and/or "less strict" side (`b`) first. match b.kind { - // `Unused`s are always ignored. - QPtrMemUsageKind::Unused => return MergeResult::ok(a), + // `Dead`s are always ignored. + DataHappKind::Dead => return MergeResult::ok(a), - QPtrMemUsageKind::OffsetBase(b_entries) + DataHappKind::Disjoint(b_entries) if { // HACK(eddyb) this check was added later, after it turned out // that *deep* flattening of arbitrary offsets in `b` would've @@ -190,11 +189,11 @@ impl UsageMerger<'_> { let mut ab = a; let mut all_errors = None; - for (b_offset, b_sub_usage) in b_entries { - let MergeResult { merged, error: new_error } = self.merge_mem_at( + for (b_offset, b_sub_happ) in b_entries { + let MergeResult { merged, error: new_error } = self.merge_data_at( ab, b_offset.checked_add(b_offset_in_a).unwrap(), - b_sub_usage, + b_sub_happ, ); ab = merged; @@ -218,7 +217,7 @@ impl UsageMerger<'_> { // FIXME(eddyb) should this mean `MergeResult` should // use `errors: Vec` instead of `Option`? error: all_errors.map(|AnalysisError(mut e)| { - e.message.insert(0, "merge_mem: conflicts:\n".into()); + e.message.insert(0, "merge_data: conflicts:\n".into()); AnalysisError(e) }), }; @@ -228,16 +227,15 @@ impl UsageMerger<'_> { } let kind = match a.kind { - // `Unused`s are always ignored. - QPtrMemUsageKind::Unused => MergeResult::ok(b.kind), + // `Dead`s are always ignored. + DataHappKind::Dead => MergeResult::ok(b.kind), - // Typed leaves must support any possible usage applied to them - // (when they match, or overtake, that usage, in size, like here), + // Typed leaves must support any possible accesses applied to them + // (when they match, or overtake, that access, in size, like here), // with their inherent hierarchy (i.e. their array/struct nesting). - QPtrMemUsageKind::StrictlyTyped(a_type) | QPtrMemUsageKind::DirectAccess(a_type) => { + DataHappKind::StrictlyTyped(a_type) | DataHappKind::Direct(a_type) => { let b_type_at_offset_0 = match b.kind { - QPtrMemUsageKind::StrictlyTyped(b_type) - | QPtrMemUsageKind::DirectAccess(b_type) + DataHappKind::StrictlyTyped(b_type) | DataHappKind::Direct(b_type) if b_offset_in_a == 0 => { Some(b_type) @@ -247,11 +245,11 @@ impl UsageMerger<'_> { let ty = if Some(a_type) == b_type_at_offset_0 { MergeResult::ok(a_type) } else { - // Returns `Some(MergeResult::ok(ty))` iff `usage` is valid + // Returns `Some(MergeResult::ok(ty))` iff `happ` is valid // for type `ty`, and `None` iff invalid w/o layout errors - // (see `mem_layout_supports_usage_at_offset` for more details). - let type_supporting_usage_at_offset = |ty, usage_offset, usage| { - let supports_usage = match self.layout_of(ty) { + // (see `mem_layout_supports_happ_at_offset` for more details). + let type_supporting_happ_at_offset = |ty, happ_offset, happ| { + let supports_happ = match self.layout_of(ty) { // FIXME(eddyb) should this be `unreachable!()`? also, is // it possible to end up with `ty` being an `OpTypeStruct` // decorated with `Block`, showing up as a `Buffer` handle? @@ -261,27 +259,27 @@ impl UsageMerger<'_> { // conflict with our own `Block`-annotated wrapper. Ok(TypeLayout::Handle(_) | TypeLayout::HandleArray(..)) => { Err(AnalysisError(Diag::bug([ - "merge_mem: impossible handle type for QPtrMemUsage".into(), + "merge_data: impossible handle type for DataHapp".into(), ]))) } Ok(TypeLayout::Concrete(concrete)) => { - Ok(concrete.supports_usage_at_offset(usage_offset, usage)) + Ok(concrete.supports_happ_at_offset(happ_offset, happ)) } Err(e) => Err(e), }; - match supports_usage { + match supports_happ { Ok(false) => None, Ok(true) | Err(_) => { - Some(MergeResult { merged: ty, error: supports_usage.err() }) + Some(MergeResult { merged: ty, error: supports_happ.err() }) } } }; - type_supporting_usage_at_offset(a_type, b_offset_in_a, &b) + type_supporting_happ_at_offset(a_type, b_offset_in_a, &b) .or_else(|| { b_type_at_offset_0.and_then(|b_type_at_offset_0| { - type_supporting_usage_at_offset(b_type_at_offset_0, 0, &a) + type_supporting_happ_at_offset(b_type_at_offset_0, 0, &a) }) }) .unwrap_or_else(|| { @@ -290,11 +288,11 @@ impl UsageMerger<'_> { // FIXME(eddyb) this should ideally embed the types in the // error somehow. error: Some(AnalysisError(Diag::bug([ - "merge_mem: type subcomponents incompatible with usage (" + "merge_data: type subcomponents incompatible with accesses (" .into(), - QPtrUsage::Memory(a.clone()).into(), + MemAccesses::Data(a.clone()).into(), " vs ".into(), - QPtrUsage::Memory(b.clone()).into(), + MemAccesses::Data(b.clone()).into(), ")".into(), ]))), } @@ -307,19 +305,19 @@ impl UsageMerger<'_> { // FIXME(eddyb) this might not enough because the // strict leaf could be *nested* inside `b`!!! - let is_strict = |kind| matches!(kind, &QPtrMemUsageKind::StrictlyTyped(_)); + let is_strict = |kind| matches!(kind, &DataHappKind::StrictlyTyped(_)); if is_strict(&a.kind) || is_strict(&b.kind) { - ty.map(QPtrMemUsageKind::StrictlyTyped) + ty.map(DataHappKind::StrictlyTyped) } else { - ty.map(QPtrMemUsageKind::DirectAccess) + ty.map(DataHappKind::Direct) } } - QPtrMemUsageKind::DynOffsetBase { element: mut a_element, stride: a_stride } => { + DataHappKind::Repeated { element: mut a_element, stride: a_stride } => { let b_offset_in_a_element = b_offset_in_a % a_stride; - // Array-like dynamic offsetting needs to always merge any usage that - // fits inside the stride, with its "element" usage, no matter how + // Array-like dynamic offsetting needs to always merge any HAPP that + // fits inside the stride, with its "element" HAPP, no matter how // complex it may be (notably, this is needed for nested arrays). if b.max_size .and_then(|b_max_size| b_max_size.checked_add(b_offset_in_a_element)) @@ -328,50 +326,44 @@ impl UsageMerger<'_> { // FIXME(eddyb) this in-place merging dance only needed due to `Rc`. ({ let a_element_mut = Rc::make_mut(&mut a_element); - let a_element = mem::replace(a_element_mut, QPtrMemUsage::UNUSED); - // FIXME(eddyb) remove this silliness by making `merge_mem_at` do symmetrical sorting. + let a_element = mem::replace(a_element_mut, DataHapp::DEAD); + // FIXME(eddyb) remove this silliness by making `merge_data_at` do symmetrical sorting. if b_offset_in_a_element == 0 { - self.merge_mem(a_element, b) + self.merge_data(a_element, b) } else { - self.merge_mem_at(a_element, b_offset_in_a_element, b) + self.merge_data_at(a_element, b_offset_in_a_element, b) } .map(|merged| *a_element_mut = merged) }) - .map(|()| QPtrMemUsageKind::DynOffsetBase { - element: a_element, - stride: a_stride, - }) + .map(|()| DataHappKind::Repeated { element: a_element, stride: a_stride }) } else { match b.kind { - QPtrMemUsageKind::DynOffsetBase { - element: b_element, - stride: b_stride, - } if b_offset_in_a_element == 0 && a_stride == b_stride => { + DataHappKind::Repeated { element: b_element, stride: b_stride } + if b_offset_in_a_element == 0 && a_stride == b_stride => + { // FIXME(eddyb) this in-place merging dance only needed due to `Rc`. ({ let a_element_mut = Rc::make_mut(&mut a_element); - let a_element = mem::replace(a_element_mut, QPtrMemUsage::UNUSED); + let a_element = mem::replace(a_element_mut, DataHapp::DEAD); let b_element = Rc::try_unwrap(b_element).unwrap_or_else(|e| (*e).clone()); - self.merge_mem(a_element, b_element) + self.merge_data(a_element, b_element) .map(|merged| *a_element_mut = merged) }) - .map(|()| { - QPtrMemUsageKind::DynOffsetBase { - element: a_element, - stride: a_stride, - } + .map(|()| DataHappKind::Repeated { + element: a_element, + stride: a_stride, }) } _ => { // FIXME(eddyb) implement somehow (by adjusting stride?). - // NOTE(eddyb) with `b` as an `DynOffsetBase`/`OffsetBase`, it could + // NOTE(eddyb) with `b` as an `Repeated`/`Disjoint`, it could // also be possible to superimpose its offset patterns onto `a`, - // though that's easier for `OffsetBase` than `DynOffsetBase`. + // though that's easier for `Disjoint` than `Repeated`. // HACK(eddyb) needed due to `a` being moved out of. - let a = QPtrMemUsage { + let a = DataHapp { max_size: a.max_size, - kind: QPtrMemUsageKind::DynOffsetBase { + kind: DataHappKind::Repeated { element: a_element, stride: a_stride, }, @@ -380,13 +372,13 @@ impl UsageMerger<'_> { merged: a.kind.clone(), error: Some(AnalysisError(Diag::bug([ format!( - "merge_mem: unimplemented \ + "merge_data: unimplemented \ non-intra-element merging into stride={a_stride} (" ) .into(), - QPtrUsage::Memory(a).into(), + MemAccesses::Data(a).into(), " vs ".into(), - QPtrUsage::Memory(b).into(), + MemAccesses::Data(b).into(), ")".into(), ]))), } @@ -395,7 +387,7 @@ impl UsageMerger<'_> { } } - QPtrMemUsageKind::OffsetBase(mut a_entries) => { + DataHappKind::Disjoint(mut a_entries) => { let overlapping_entries = a_entries .range(( Bound::Unbounded, @@ -404,8 +396,8 @@ impl UsageMerger<'_> { }), )) .rev() - .take_while(|(a_sub_offset, a_sub_usage)| { - a_sub_usage.max_size.is_none_or(|a_sub_max_size| { + .take_while(|(a_sub_offset, a_sub_happ)| { + a_sub_happ.max_size.is_none_or(|a_sub_max_size| { a_sub_offset.checked_add(a_sub_max_size).unwrap() > b_offset_in_a }) }); @@ -418,15 +410,15 @@ impl UsageMerger<'_> { let mut all_errors = None; let (mut b_offset_in_a, mut b) = (b_offset_in_a, b); for a_sub_offset in overlapping_offsets { - let a_sub_usage = a_entries_mut.remove(&a_sub_offset).unwrap(); + let a_sub_happ = a_entries_mut.remove(&a_sub_offset).unwrap(); // HACK(eddyb) this replicates the condition in which - // `merge_mem_at` would fail its similar assert, some of + // `merge_data_at` would fail its similar assert, some of // the cases denied here might be legal, but they're rare // enough that we can do this for now. let is_illegal = a_sub_offset != b_offset_in_a && { let (a_sub_total_max_size, b_total_max_size) = ( - a_sub_usage.max_size.map(|a| a.checked_add(a_sub_offset).unwrap()), + a_sub_happ.max_size.map(|a| a.checked_add(a_sub_offset).unwrap()), b.max_size.map(|b| b.checked_add(b_offset_in_a).unwrap()), ); let total_max_size_merged = match (a_sub_total_max_size, b_total_max_size) { @@ -442,24 +434,21 @@ impl UsageMerger<'_> { }; if is_illegal { // HACK(eddyb) needed due to `a` being moved out of. - let a = QPtrMemUsage { + let a = DataHapp { max_size: a.max_size, - kind: QPtrMemUsageKind::OffsetBase(a_entries.clone()), + kind: DataHappKind::Disjoint(a_entries.clone()), }; return MergeResult { - merged: QPtrMemUsage { - max_size, - kind: QPtrMemUsageKind::OffsetBase(a_entries), - }, + merged: DataHapp { max_size, kind: DataHappKind::Disjoint(a_entries) }, error: Some(AnalysisError(Diag::bug([ format!( - "merge_mem: unsupported straddling overlap \ + "merge_data: unsupported straddling overlap \ at offsets {a_sub_offset} vs {b_offset_in_a} (" ) .into(), - QPtrUsage::Memory(a).into(), + MemAccesses::Data(a).into(), " vs ".into(), - QPtrUsage::Memory(b).into(), + MemAccesses::Data(b).into(), ")".into(), ]))), }; @@ -470,16 +459,16 @@ impl UsageMerger<'_> { if a_sub_offset < b_offset_in_a { ( a_sub_offset, - self.merge_mem_at(a_sub_usage, b_offset_in_a - a_sub_offset, b), + self.merge_data_at(a_sub_happ, b_offset_in_a - a_sub_offset, b), ) } else { - // FIXME(eddyb) remove this silliness by making `merge_mem_at` do symmetrical sorting. + // FIXME(eddyb) remove this silliness by making `merge_data_at` do symmetrical sorting. if a_sub_offset - b_offset_in_a == 0 { - (b_offset_in_a, self.merge_mem(b, a_sub_usage)) + (b_offset_in_a, self.merge_data(b, a_sub_happ)) } else { ( b_offset_in_a, - self.merge_mem_at(b, a_sub_offset - b_offset_in_a, a_sub_usage), + self.merge_data_at(b, a_sub_offset - b_offset_in_a, a_sub_happ), ) } }; @@ -501,17 +490,17 @@ impl UsageMerger<'_> { } a_entries_mut.insert(b_offset_in_a, b); MergeResult { - merged: QPtrMemUsageKind::OffsetBase(a_entries), + merged: DataHappKind::Disjoint(a_entries), // FIXME(eddyb) should this mean `MergeResult` should // use `errors: Vec` instead of `Option`? error: all_errors.map(|AnalysisError(mut e)| { - e.message.insert(0, "merge_mem: conflicts:\n".into()); + e.message.insert(0, "merge_data: conflicts:\n".into()); AnalysisError(e) }), } } }; - kind.map(|kind| QPtrMemUsage { max_size, kind }) + kind.map(|kind| DataHapp { max_size, kind }) } /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. @@ -521,85 +510,84 @@ impl UsageMerger<'_> { } impl MemTypeLayout { - /// Determine if this layout is compatible with `usage` at `usage_offset`. + /// Determine if this layout is compatible with `happ` at `happ_offset`. /// - /// That is, all typed leaves of `usage` must be found inside `self`, at - /// their respective offsets, and all [`QPtrMemUsageKind::DynOffsetBase`]s - /// must find a same-stride array inside `self` (to allow dynamic indexing). + /// That is, all typed leaves of `happ` must be found inside `self`, at + /// their respective offsets, and all [`DataHappKind::Repeated`]s must + /// find a same-stride array inside `self` (to allow dynamic indexing). // // FIXME(eddyb) consider using `Result` to make it unambiguous. - fn supports_usage_at_offset(&self, usage_offset: u32, usage: &QPtrMemUsage) -> bool { - if let QPtrMemUsageKind::Unused = usage.kind { + fn supports_happ_at_offset(&self, happ_offset: u32, happ: &DataHapp) -> bool { + if let DataHappKind::Dead = happ.kind { return true; } // "Fast accept" based on type alone (expected as recursion base case). - if let QPtrMemUsageKind::StrictlyTyped(usage_type) - | QPtrMemUsageKind::DirectAccess(usage_type) = usage.kind - && usage_offset == 0 - && self.original_type == usage_type + if let DataHappKind::StrictlyTyped(happ_type) | DataHappKind::Direct(happ_type) = happ.kind + && happ_offset == 0 + && self.original_type == happ_type { return true; } { - // FIXME(eddyb) should `QPtrMemUsage` track a `min_size` as well? + // FIXME(eddyb) should `DataHapp` track a `min_size` as well? // FIXME(eddyb) duplicated below. - let min_usage_offset_range = - usage_offset..usage_offset.saturating_add(usage.max_size.unwrap_or(0)); + let min_happ_offset_range = + happ_offset..happ_offset.saturating_add(happ.max_size.unwrap_or(0)); // "Fast reject" based on size alone (expected w/ multiple attempts). if self.mem_layout.dyn_unit_stride.is_none() - && (self.mem_layout.fixed_base.size < min_usage_offset_range.end - || usage.max_size.is_none()) + && (self.mem_layout.fixed_base.size < min_happ_offset_range.end + || happ.max_size.is_none()) { return false; } } - let any_component_supports = |usage_offset: u32, usage: &QPtrMemUsage| { - // FIXME(eddyb) should `QPtrMemUsage` track a `min_size` as well? + let any_component_supports = |happ_offset: u32, happ: &DataHapp| { + // FIXME(eddyb) should `DataHapp` track a `min_size` as well? // FIXME(eddyb) duplicated above. - let min_usage_offset_range = - usage_offset..usage_offset.saturating_add(usage.max_size.unwrap_or(0)); + let min_happ_offset_range = + happ_offset..happ_offset.saturating_add(happ.max_size.unwrap_or(0)); // FIXME(eddyb) `find_components_containing` is linear today but // could be made logarithmic (via binary search). - self.components.find_components_containing(min_usage_offset_range).any( - |idx| match &self.components { - Components::Scalar => unreachable!(), - Components::Elements { stride, elem, .. } => { - elem.supports_usage_at_offset(usage_offset % stride.get(), usage) - } - Components::Fields { offsets, layouts, .. } => { - layouts[idx].supports_usage_at_offset(usage_offset - offsets[idx], usage) - } - }, - ) + self.components.find_components_containing(min_happ_offset_range).any(|idx| match &self + .components + { + Components::Scalar => unreachable!(), + Components::Elements { stride, elem, .. } => { + elem.supports_happ_at_offset(happ_offset % stride.get(), happ) + } + Components::Fields { offsets, layouts, .. } => { + layouts[idx].supports_happ_at_offset(happ_offset - offsets[idx], happ) + } + }) }; - match &usage.kind { - _ if any_component_supports(usage_offset, usage) => true, + match &happ.kind { + _ if any_component_supports(happ_offset, happ) => true, - QPtrMemUsageKind::Unused => unreachable!(), + DataHappKind::Dead => unreachable!(), - QPtrMemUsageKind::StrictlyTyped(_) | QPtrMemUsageKind::DirectAccess(_) => false, + DataHappKind::StrictlyTyped(_) | DataHappKind::Direct(_) => false, - QPtrMemUsageKind::OffsetBase(entries) => { - entries.iter().all(|(&sub_offset, sub_usage)| { + DataHappKind::Disjoint(entries) => { + entries.iter().all(|(&sub_offset, sub_happ)| { // FIXME(eddyb) maybe this overflow should be propagated up, - // as a sign that `usage` is malformed? - usage_offset.checked_add(sub_offset).is_some_and(|combined_offset| { + // as a sign that `happ` is malformed? + happ_offset.checked_add(sub_offset).is_some_and(|combined_offset| { // NOTE(eddyb) the reason this is only applicable to // offset `0` is that *in all other cases*, every - // individual `OffsetBase` requires its own type, to + // individual `Disjoint` requires its own type, to // allow performing offsets *in steps* (even if the // offsets could easily be constant-folded, they'd // *have to* be constant-folded *before* analysis, // to ensure there is no need for the intermediaries). if combined_offset == 0 { - self.supports_usage_at_offset(0, sub_usage) + self.supports_happ_at_offset(0, sub_happ) } else { - any_component_supports(combined_offset, sub_usage) + any_component_supports(combined_offset, sub_happ) } }) }) @@ -607,16 +595,16 @@ impl MemTypeLayout { // Finding an array entirely nested in a component was handled above, // so here `layout` can only be a matching array (same stride and length). - QPtrMemUsageKind::DynOffsetBase { element: usage_elem, stride: usage_stride } => { - let usage_fixed_len = usage + DataHappKind::Repeated { element: happ_elem, stride: happ_stride } => { + let happ_fixed_len = happ .max_size .map(|size| { - if !size.is_multiple_of(usage_stride.get()) { + if !size.is_multiple_of(happ_stride.get()) { // FIXME(eddyb) maybe this should be propagated up, - // as a sign that `usage` is malformed? + // as a sign that `happ` is malformed? return Err(()); } - NonZeroU32::new(size / usage_stride.get()).ok_or(()) + NonZeroU32::new(size / happ_stride.get()).ok_or(()) }) .transpose(); @@ -631,17 +619,17 @@ impl MemTypeLayout { elem: layout_elem, fixed_len: layout_fixed_len, } => { - // HACK(eddyb) extend the max length implied by `usage`, + // HACK(eddyb) extend the max length implied by `happ`, // such that the array can start at offset `0`. - let ext_usage_offset = usage_offset % usage_stride.get(); - let ext_usage_fixed_len = usage_fixed_len.and_then(|usage_fixed_len| { - usage_fixed_len - .map(|usage_fixed_len| { + let ext_happ_offset = happ_offset % happ_stride.get(); + let ext_happ_fixed_len = happ_fixed_len.and_then(|happ_fixed_len| { + happ_fixed_len + .map(|happ_fixed_len| { NonZeroU32::new( // FIXME(eddyb) maybe this overflow should be propagated up, - // as a sign that `usage` is malformed? - (usage_offset / usage_stride.get()) - .checked_add(usage_fixed_len.get()) + // as a sign that `happ` is malformed? + (happ_offset / happ_stride.get()) + .checked_add(happ_fixed_len.get()) .ok_or(())?, ) .ok_or(()) @@ -651,13 +639,13 @@ impl MemTypeLayout { // FIXME(eddyb) this could maybe be allowed if there is still // some kind of divisibility relation between the strides. - if ext_usage_offset != 0 { + if ext_happ_offset != 0 { return false; } - layout_stride == usage_stride - && Ok(*layout_fixed_len) == ext_usage_fixed_len - && layout_elem.supports_usage_at_offset(0, usage_elem) + layout_stride == happ_stride + && Ok(*layout_fixed_len) == ext_happ_fixed_len + && layout_elem.supports_happ_at_offset(0, happ_elem) } } } @@ -665,59 +653,59 @@ impl MemTypeLayout { } } -struct FuncInferUsageResults { - param_usages: SmallVec<[Option>; 2]>, - usage_or_err_attrs_to_attach: Vec<(Value, Result)>, +struct FuncGatherAccessesResults { + param_accesses: SmallVec<[Option>; 2]>, + accesses_or_err_attrs_to_attach: Vec<(Value, Result)>, } #[derive(Clone)] -enum FuncInferUsageState { +enum FuncGatherAccessesState { InProgress, - Complete(Rc), + Complete(Rc), } -pub struct InferUsage<'a> { +pub struct GatherAccesses<'a> { cx: Rc, layout_cache: LayoutCache<'a>, - global_var_usages: FxIndexMap>>, - func_states: FxIndexMap, + global_var_accesses: FxIndexMap>>, + func_states: FxIndexMap, } -impl<'a> InferUsage<'a> { +impl<'a> GatherAccesses<'a> { pub fn new(cx: Rc, layout_config: &'a LayoutConfig) -> Self { Self { cx: cx.clone(), layout_cache: LayoutCache::new(cx, layout_config), - global_var_usages: Default::default(), + global_var_accesses: Default::default(), func_states: Default::default(), } } - pub fn infer_usage_in_module(mut self, module: &mut Module) { + pub fn gather_accesses_in_module(mut self, module: &mut Module) { for (export_key, &exportee) in &module.exports { if let Exportee::Func(func) = exportee { - self.infer_usage_in_func(module, func); + self.gather_accesses_in_func(module, func); } - // Ensure even unused interface variables get their `qptr.usage`. + // Ensure even unused interface variables get their `mem.accesses`. match export_key { ExportKey::LinkName(_) => {} ExportKey::SpvEntryPoint { imms: _, interface_global_vars } => { for &gv in interface_global_vars { - self.global_var_usages.entry(gv).or_insert_with(|| { + self.global_var_accesses.entry(gv).or_insert_with(|| { Some(Ok(match module.global_vars[gv].shape { Some(shapes::GlobalVarShape::Handles { handle, .. }) => { - QPtrUsage::Handles(match handle { + MemAccesses::Handles(match handle { shapes::Handle::Opaque(ty) => shapes::Handle::Opaque(ty), shapes::Handle::Buffer(..) => shapes::Handle::Buffer( AddrSpace::Handles, - QPtrMemUsage::UNUSED, + DataHapp::DEAD, ), }) } - _ => QPtrUsage::Memory(QPtrMemUsage::UNUSED), + _ => MemAccesses::Data(DataHapp::DEAD), })) }); } @@ -726,18 +714,18 @@ impl<'a> InferUsage<'a> { } // Analysis over, write all attributes back to the module. - for (gv, usage) in self.global_var_usages { - if let Some(usage) = usage { + for (gv, accesses) in self.global_var_accesses { + if let Some(accesses) = accesses { let global_var_def = &mut module.global_vars[gv]; - match usage { - Ok(usage) => { + match accesses { + Ok(accesses) => { // FIXME(eddyb) deduplicate attribute manipulation. global_var_def.attrs = self.cx.intern(AttrSetDef { attrs: self.cx[global_var_def.attrs] .attrs .iter() .cloned() - .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq(usage)))]) + .chain([Attr::Mem(MemAttr::Accesses(OrdAssertEq(accesses)))]) .collect(), }); } @@ -749,24 +737,26 @@ impl<'a> InferUsage<'a> { } for (func, state) in self.func_states { match state { - FuncInferUsageState::InProgress => unreachable!(), - FuncInferUsageState::Complete(func_results) => { - let FuncInferUsageResults { param_usages, usage_or_err_attrs_to_attach } = - Rc::try_unwrap(func_results).ok().unwrap(); + FuncGatherAccessesState::InProgress => unreachable!(), + FuncGatherAccessesState::Complete(func_results) => { + let FuncGatherAccessesResults { + param_accesses, + accesses_or_err_attrs_to_attach, + } = Rc::try_unwrap(func_results).ok().unwrap(); let func_decl = &mut module.funcs[func]; - for (param_decl, usage) in func_decl.params.iter_mut().zip(param_usages) { - if let Some(usage) = usage { - match usage { - Ok(usage) => { + for (param_decl, accesses) in func_decl.params.iter_mut().zip(param_accesses) { + if let Some(accesses) = accesses { + match accesses { + Ok(accesses) => { // FIXME(eddyb) deduplicate attribute manipulation. param_decl.attrs = self.cx.intern(AttrSetDef { attrs: self.cx[param_decl.attrs] .attrs .iter() .cloned() - .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq( - usage, + .chain([Attr::Mem(MemAttr::Accesses(OrdAssertEq( + accesses, )))]) .collect(), }); @@ -783,7 +773,7 @@ impl<'a> InferUsage<'a> { DeclDef::Imported(_) => continue, }; - for (v, usage) in usage_or_err_attrs_to_attach { + for (v, accesses) in accesses_or_err_attrs_to_attach { let attrs = match v { Value::Const(_) => unreachable!(), Value::RegionInput { region, input_idx } => { @@ -798,15 +788,17 @@ impl<'a> InferUsage<'a> { &mut func_def_body.at_mut(data_inst).def().attrs } }; - match usage { - Ok(usage) => { + match accesses { + Ok(accesses) => { // FIXME(eddyb) deduplicate attribute manipulation. *attrs = self.cx.intern(AttrSetDef { attrs: self.cx[*attrs] .attrs .iter() .cloned() - .chain([Attr::QPtr(QPtrAttr::Usage(OrdAssertEq(usage)))]) + .chain([Attr::Mem(MemAttr::Accesses(OrdAssertEq( + accesses, + )))]) .collect(), }); } @@ -820,62 +812,66 @@ impl<'a> InferUsage<'a> { } } - // HACK(eddyb) `FuncInferUsageState` also serves to indicate recursion errors. - fn infer_usage_in_func(&mut self, module: &Module, func: Func) -> FuncInferUsageState { + // HACK(eddyb) `FuncGatherAccessesState` also serves to indicate recursion errors. + fn gather_accesses_in_func(&mut self, module: &Module, func: Func) -> FuncGatherAccessesState { if let Some(cached) = self.func_states.get(&func).cloned() { return cached; } - self.func_states.insert(func, FuncInferUsageState::InProgress); + self.func_states.insert(func, FuncGatherAccessesState::InProgress); - let completed_state = - FuncInferUsageState::Complete(Rc::new(self.infer_usage_in_func_uncached(module, func))); + let completed_state = FuncGatherAccessesState::Complete(Rc::new( + self.gather_accesses_in_func_uncached(module, func), + )); self.func_states.insert(func, completed_state.clone()); completed_state } - fn infer_usage_in_func_uncached( + fn gather_accesses_in_func_uncached( &mut self, module: &Module, func: Func, - ) -> FuncInferUsageResults { + ) -> FuncGatherAccessesResults { let cx = self.cx.clone(); let is_qptr = |ty: Type| matches!(cx[ty].kind, TypeKind::QPtr); let func_decl = &module.funcs[func]; - let mut param_usages: SmallVec<[_; 2]> = + let mut param_accesses: SmallVec<[_; 2]> = (0..func_decl.params.len()).map(|_| None).collect(); - let mut usage_or_err_attrs_to_attach = vec![]; + let mut accesses_or_err_attrs_to_attach = vec![]; let func_def_body = match &module.funcs[func].def { DeclDef::Present(func_def_body) => func_def_body, DeclDef::Imported(_) => { - for (param, param_usage) in func_decl.params.iter().zip(&mut param_usages) { + for (param, param_accesses) in func_decl.params.iter().zip(&mut param_accesses) { if is_qptr(param.ty) { - *param_usage = Some(Err(AnalysisError(Diag::bug([ + *param_accesses = Some(Err(AnalysisError(Diag::bug([ "pointer param of imported func".into(), ])))); } } - return FuncInferUsageResults { param_usages, usage_or_err_attrs_to_attach }; + return FuncGatherAccessesResults { + param_accesses, + accesses_or_err_attrs_to_attach, + }; } }; let mut all_data_insts = CollectAllDataInsts::default(); func_def_body.inner_visit_with(&mut all_data_insts); - let mut data_inst_output_usages = FxHashMap::default(); + let mut data_inst_output_accesses = FxHashMap::default(); for insts in all_data_insts.0.into_iter().rev() { for func_at_inst in func_def_body.at(insts).into_iter().rev() { let data_inst = func_at_inst.position; let data_inst_def = func_at_inst.def(); - let output_usage = data_inst_output_usages.remove(&data_inst).flatten(); + let output_accesses = data_inst_output_accesses.remove(&data_inst).flatten(); - let mut generate_usage = |this: &mut Self, ptr: Value, new_usage| { + let mut generate_accesses = |this: &mut Self, ptr: Value, new_accesses| { let slot = match ptr { Value::Const(ct) => match cx[ct].kind { ConstKind::PtrToGlobalVar(gv) => { - this.global_var_usages.entry(gv).or_default() + this.global_var_accesses.entry(gv).or_default() } // FIXME(eddyb) may be relevant? _ => unreachable!(), @@ -883,43 +879,43 @@ impl<'a> InferUsage<'a> { Value::RegionInput { region, input_idx } if region == func_def_body.body => { - &mut param_usages[input_idx as usize] + &mut param_accesses[input_idx as usize] } // FIXME(eddyb) implement Value::RegionInput { .. } | Value::NodeOutput { .. } => { - usage_or_err_attrs_to_attach.push(( + accesses_or_err_attrs_to_attach.push(( ptr, Err(AnalysisError(Diag::bug(["unsupported φ".into()]))), )); return; } Value::DataInstOutput(ptr_inst) => { - data_inst_output_usages.entry(ptr_inst).or_default() + data_inst_output_accesses.entry(ptr_inst).or_default() } }; *slot = Some(match slot.take() { Some(old) => old.and_then(|old| { - UsageMerger { layout_cache: &this.layout_cache } - .merge(old, new_usage?) + AccessMerger { layout_cache: &this.layout_cache } + .merge(old, new_accesses?) .into_result() }), - None => new_usage, + None => new_accesses, }); }; match &data_inst_def.kind { &DataInstKind::FuncCall(callee) => { - match self.infer_usage_in_func(module, callee) { - FuncInferUsageState::Complete(callee_results) => { - for (&arg, param_usage) in - data_inst_def.inputs.iter().zip(&callee_results.param_usages) + match self.gather_accesses_in_func(module, callee) { + FuncGatherAccessesState::Complete(callee_results) => { + for (&arg, param_accesses) in + data_inst_def.inputs.iter().zip(&callee_results.param_accesses) { - if let Some(param_usage) = param_usage { - generate_usage(self, arg, param_usage.clone()); + if let Some(param_accesses) = param_accesses { + generate_accesses(self, arg, param_accesses.clone()); } } } - FuncInferUsageState::InProgress => { - usage_or_err_attrs_to_attach.push(( + FuncGatherAccessesState::InProgress => { + accesses_or_err_attrs_to_attach.push(( Value::DataInstOutput(data_inst), Err(AnalysisError(Diag::bug([ "unsupported recursive call".into() @@ -928,55 +924,57 @@ impl<'a> InferUsage<'a> { } }; if data_inst_def.output_type.is_some_and(is_qptr) - && let Some(usage) = output_usage + && let Some(accesses) = output_accesses { - usage_or_err_attrs_to_attach - .push((Value::DataInstOutput(data_inst), usage)); + accesses_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), accesses)); } } - DataInstKind::QPtr(QPtrOp::FuncLocalVar(_)) => { - if let Some(usage) = output_usage { - usage_or_err_attrs_to_attach - .push((Value::DataInstOutput(data_inst), usage)); + DataInstKind::Mem(MemOp::FuncLocalVar(_)) => { + if let Some(accesses) = output_accesses { + accesses_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), accesses)); } } DataInstKind::QPtr(QPtrOp::HandleArrayIndex) => { - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], - output_usage + output_accesses .unwrap_or_else(|| { Err(AnalysisError(Diag::bug([ "HandleArrayIndex: unknown element".into(), ]))) }) - .and_then(|usage| match usage { - QPtrUsage::Handles(handle) => Ok(QPtrUsage::Handles(handle)), - QPtrUsage::Memory(_) => Err(AnalysisError(Diag::bug([ - "HandleArrayIndex: cannot be used as Memory".into(), + .and_then(|accesses| match accesses { + MemAccesses::Handles(handle) => { + Ok(MemAccesses::Handles(handle)) + } + MemAccesses::Data(_) => Err(AnalysisError(Diag::bug([ + "HandleArrayIndex: cannot be accessed as data".into(), ]))), }), ); } DataInstKind::QPtr(QPtrOp::BufferData) => { - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], - output_usage - .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) - .and_then(|usage| { - let usage = match usage { - QPtrUsage::Handles(_) => { + output_accesses + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))) + .and_then(|accesses| { + let happ = match accesses { + MemAccesses::Handles(_) => { return Err(AnalysisError(Diag::bug([ - "BufferData: cannot be used as Handles".into(), + "BufferData: cannot be accessed as handles".into(), ]))); } - QPtrUsage::Memory(usage) => usage, + MemAccesses::Data(happ) => happ, }; - Ok(QPtrUsage::Handles(shapes::Handle::Buffer( + Ok(MemAccesses::Handles(shapes::Handle::Buffer( AddrSpace::Handles, - usage, + happ, ))) }), ); @@ -985,47 +983,47 @@ impl<'a> InferUsage<'a> { fixed_base_size, dyn_unit_stride, }) => { - let array_usage = QPtrMemUsage { + let array_happ = DataHapp { max_size: None, - kind: QPtrMemUsageKind::DynOffsetBase { - element: Rc::new(QPtrMemUsage::UNUSED), + kind: DataHappKind::Repeated { + element: Rc::new(DataHapp::DEAD), stride: dyn_unit_stride, }, }; - let buf_data_usage = if fixed_base_size == 0 { - array_usage + let buf_data_happ = if fixed_base_size == 0 { + array_happ } else { - QPtrMemUsage { + DataHapp { max_size: None, - kind: QPtrMemUsageKind::OffsetBase(Rc::new( - [(fixed_base_size, array_usage)].into(), + kind: DataHappKind::Disjoint(Rc::new( + [(fixed_base_size, array_happ)].into(), )), } }; - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], - Ok(QPtrUsage::Handles(shapes::Handle::Buffer( + Ok(MemAccesses::Handles(shapes::Handle::Buffer( AddrSpace::Handles, - buf_data_usage, + buf_data_happ, ))), ); } &DataInstKind::QPtr(QPtrOp::Offset(offset)) => { - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], - output_usage - .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) - .and_then(|usage| { - let usage = match usage { - QPtrUsage::Handles(_) => { + output_accesses + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))) + .and_then(|accesses| { + let happ = match accesses { + MemAccesses::Handles(_) => { return Err(AnalysisError(Diag::bug([format!( - "Offset({offset}): cannot offset Handles" + "Offset({offset}): cannot offset in handle memory" ) .into()]))); } - QPtrUsage::Memory(usage) => usage, + MemAccesses::Data(happ) => happ, }; let offset = u32::try_from(offset).ok().ok_or_else(|| { AnalysisError(Diag::bug([format!( @@ -1038,11 +1036,11 @@ impl<'a> InferUsage<'a> { // (e.g. constant-folded) out of existence, // but while they exist, they should be noops. if offset == 0 { - return Ok(QPtrUsage::Memory(usage)); + return Ok(MemAccesses::Data(happ)); } - Ok(QPtrUsage::Memory(QPtrMemUsage { - max_size: usage + Ok(MemAccesses::Data(DataHapp { + max_size: happ .max_size .map(|max_size| { offset.checked_add(max_size).ok_or_else(|| { @@ -1057,36 +1055,36 @@ impl<'a> InferUsage<'a> { // FIXME(eddyb) allocating `Rc>` // to represent the one-element case, seems // quite wasteful when it's likely consumed. - kind: QPtrMemUsageKind::OffsetBase(Rc::new( - [(offset, usage)].into(), + kind: DataHappKind::Disjoint(Rc::new( + [(offset, happ)].into(), )), })) }), ); } DataInstKind::QPtr(QPtrOp::DynOffset { stride, index_bounds }) => { - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], - output_usage - .unwrap_or(Ok(QPtrUsage::Memory(QPtrMemUsage::UNUSED))) - .and_then(|usage| { - let usage = match usage { - QPtrUsage::Handles(_) => { + output_accesses + .unwrap_or(Ok(MemAccesses::Data(DataHapp::DEAD))) + .and_then(|accesses| { + let happ = match accesses { + MemAccesses::Handles(_) => { return Err(AnalysisError(Diag::bug([ - "DynOffset: cannot offset Handles".into(), + "DynOffset: cannot offset in handle memory".into(), ]))); } - QPtrUsage::Memory(usage) => usage, + MemAccesses::Data(happ) => happ, }; - match usage.max_size { + match happ.max_size { None => { return Err(AnalysisError(Diag::bug([ "DynOffset: unsized element".into(), ]))); } // FIXME(eddyb) support this by "folding" - // the usage onto itself (i.e. applying + // the HAPP onto itself (i.e. applying // `%= stride` on all offsets inside). Some(max_size) if max_size > stride.get() => { return Err(AnalysisError(Diag::bug([ @@ -1095,7 +1093,7 @@ impl<'a> InferUsage<'a> { } Some(_) => {} } - Ok(QPtrUsage::Memory(QPtrMemUsage { + Ok(MemAccesses::Data(DataHapp { // FIXME(eddyb) does the `None` case allow // for negative offsets? max_size: index_bounds @@ -1120,23 +1118,25 @@ impl<'a> InferUsage<'a> { }) }) .transpose()?, - kind: QPtrMemUsageKind::DynOffsetBase { - element: Rc::new(usage), + kind: DataHappKind::Repeated { + element: Rc::new(happ), stride: *stride, }, })) }), ); } - DataInstKind::QPtr(op @ (QPtrOp::Load | QPtrOp::Store)) => { + DataInstKind::Mem(op @ (MemOp::Load | MemOp::Store)) => { + // HACK(eddyb) `_` will match multiple variants soon. + #[allow(clippy::match_wildcard_for_single_variants)] let (op_name, access_type) = match op { - QPtrOp::Load => ("Load", data_inst_def.output_type.unwrap()), - QPtrOp::Store => { + MemOp::Load => ("Load", data_inst_def.output_type.unwrap()), + MemOp::Store => { ("Store", func_at_inst.at(data_inst_def.inputs[1]).type_of(&cx)) } _ => unreachable!(), }; - generate_usage( + generate_accesses( self, data_inst_def.inputs[0], self.layout_cache @@ -1144,7 +1144,7 @@ impl<'a> InferUsage<'a> { .map_err(|LayoutError(e)| AnalysisError(e)) .and_then(|layout| match layout { TypeLayout::Handle(shapes::Handle::Opaque(ty)) => { - Ok(QPtrUsage::Handles(shapes::Handle::Opaque(ty))) + Ok(MemAccesses::Handles(shapes::Handle::Opaque(ty))) } TypeLayout::Handle(shapes::Handle::Buffer(..)) => { Err(AnalysisError(Diag::bug([format!( @@ -1167,9 +1167,9 @@ impl<'a> InferUsage<'a> { .into()]))) } TypeLayout::Concrete(concrete) => { - Ok(QPtrUsage::Memory(QPtrMemUsage { + Ok(MemAccesses::Data(DataHapp { max_size: Some(concrete.mem_layout.fixed_base.size), - kind: QPtrMemUsageKind::DirectAccess(access_type), + kind: DataHappKind::Direct(access_type), })) } }), @@ -1182,7 +1182,7 @@ impl<'a> InferUsage<'a> { match *attr { Attr::QPtr(QPtrAttr::ToSpvPtrInput { input_idx, pointee }) => { let ty = pointee.0; - generate_usage( + generate_accesses( self, data_inst_def.inputs[input_idx as usize], self.layout_cache @@ -1210,11 +1210,11 @@ impl<'a> InferUsage<'a> { )); } }; - Ok(QPtrUsage::Handles(handle)) + Ok(MemAccesses::Handles(handle)) } // NOTE(eddyb) because we can't represent // the original type, in the same way we - // use `QPtrMemUsageKind::StrictlyTyped` + // use `DataHappKind::StrictlyTyped` // for non-handles, we can't guarantee // a generated type that matches the // desired `pointee` type. @@ -1227,7 +1227,7 @@ impl<'a> InferUsage<'a> { ]))) } TypeLayout::Concrete(concrete) => { - Ok(QPtrUsage::Memory(QPtrMemUsage { + Ok(MemAccesses::Data(DataHapp { max_size: if concrete .mem_layout .dyn_unit_stride @@ -1242,9 +1242,7 @@ impl<'a> InferUsage<'a> { .size, ) }, - kind: QPtrMemUsageKind::StrictlyTyped( - ty, - ), + kind: DataHappKind::StrictlyTyped(ty), })) } } @@ -1263,9 +1261,9 @@ impl<'a> InferUsage<'a> { if has_from_spv_ptr_output_attr { // FIXME(eddyb) merge with `FromSpvPtrOutput`'s `pointee`. - if let Some(usage) = output_usage { - usage_or_err_attrs_to_attach - .push((Value::DataInstOutput(data_inst), usage)); + if let Some(accesses) = output_accesses { + accesses_or_err_attrs_to_attach + .push((Value::DataInstOutput(data_inst), accesses)); } } } @@ -1273,7 +1271,7 @@ impl<'a> InferUsage<'a> { } } - FuncInferUsageResults { param_usages, usage_or_err_attrs_to_attach } + FuncGatherAccessesResults { param_accesses, accesses_or_err_attrs_to_attach } } } diff --git a/src/qptr/layout.rs b/src/mem/layout.rs similarity index 97% rename from src/qptr/layout.rs rename to src/mem/layout.rs index cbc407b..7b0656a 100644 --- a/src/qptr/layout.rs +++ b/src/mem/layout.rs @@ -1,6 +1,6 @@ // FIXME(eddyb) layouts are a bit tricky: this recomputes them from several passes. -use crate::qptr::shapes; +use crate::mem::shapes; use crate::{ AddrSpace, Attr, Const, ConstKind, Context, Diag, FxIndexMap, Type, TypeKind, TypeOrConst, spv, }; @@ -61,10 +61,10 @@ impl LayoutConfig { Self { min_aggregate_legacy_align: 16, ..Self::VULKAN_STANDARD_LAYOUT }; } -pub(super) struct LayoutError(pub(super) Diag); +pub(crate) struct LayoutError(pub(crate) Diag); #[derive(Clone)] -pub(super) enum TypeLayout { +pub(crate) enum TypeLayout { Handle(HandleLayout), HandleArray(HandleLayout, Option), @@ -73,16 +73,16 @@ pub(super) enum TypeLayout { } // NOTE(eddyb) `Handle` is parameterized over the `Buffer` layout. -pub(super) type HandleLayout = shapes::Handle>; +pub(crate) type HandleLayout = shapes::Handle>; -pub(super) struct MemTypeLayout { - pub(super) original_type: Type, - pub(super) mem_layout: shapes::MaybeDynMemLayout, - pub(super) components: Components, +pub(crate) struct MemTypeLayout { + pub(crate) original_type: Type, + pub(crate) mem_layout: shapes::MaybeDynMemLayout, + pub(crate) components: Components, } // FIXME(eddyb) use proper newtypes for byte sizes. -pub(super) enum Components { +pub(crate) enum Components { Scalar, /// Vector and array elements (all of them having the same `elem` layout). @@ -106,7 +106,7 @@ impl Components { /// this can return multiple components, with at most one ever being non-ZST. // // FIXME(eddyb) be more aggressive in pruning ZSTs so this can be simpler. - pub(super) fn find_components_containing( + pub(crate) fn find_components_containing( &self, // FIXME(eddyb) consider renaming such offset ranges to "extent". offset_range: Range, @@ -168,7 +168,7 @@ impl Components { } /// Context for computing `TypeLayout`s from `Type`s (with caching). -pub(super) struct LayoutCache<'a> { +pub(crate) struct LayoutCache<'a> { cx: Rc, wk: &'static spv::spec::WellKnown, @@ -178,7 +178,7 @@ pub(super) struct LayoutCache<'a> { } impl<'a> LayoutCache<'a> { - pub(super) fn new(cx: Rc, config: &'a LayoutConfig) -> Self { + pub(crate) fn new(cx: Rc, config: &'a LayoutConfig) -> Self { Self { cx, wk: &spv::spec::Spec::get().well_known, config, cache: Default::default() } } @@ -197,7 +197,7 @@ impl<'a> LayoutCache<'a> { } /// Attempt to compute a `TypeLayout` for a given (SPIR-V) `Type`. - pub(super) fn layout_of(&self, ty: Type) -> Result { + pub(crate) fn layout_of(&self, ty: Type) -> Result { if let Some(cached) = self.cache.borrow().get(&ty).cloned() { return Ok(cached); } @@ -348,7 +348,7 @@ impl<'a> LayoutCache<'a> { // FIXME(eddyb) !!! what if... types had a min/max size and then... // that would allow surrounding offsets to limit their size... but... ugh... // ugh this doesn't make any sense. maybe if the front-end specifies - // offsets with "abstract types", it must configure `qptr::layout`? + // offsets with "abstract types", it must configure `mem::layout`? let layout = if spv_inst.opcode == wk.OpTypeBool { // FIXME(eddyb) make this properly abstract instead of only configurable. scalar_with_size_and_align(self.config.abstract_bool_size_align) diff --git a/src/mem/mod.rs b/src/mem/mod.rs new file mode 100644 index 0000000..f52208a --- /dev/null +++ b/src/mem/mod.rs @@ -0,0 +1,151 @@ +//! Memory operations, analyses and transformations. +// +// FIXME(eddyb) document at least these aspects: +// - "memory" = indirect storage of data and/or resources +// - (untyped) "data" = mix of plain bytes and pointers (as per RalfJ blog post) +// (does "non-data memory" actually make sense? could be considered typed?) +// +// FIXME(eddyb) dig into past notes (e.g. `qptr::legalize`, Rust-GPU release notes, +// https://github.com/EmbarkStudios/spirt/pull/24, etc.) for useful docs. +// +// FIXME(eddyb) consider taking this into a more (R)VSDG "state type" direction. + +use crate::{OrdAssertEq, Type}; +use std::collections::BTreeMap; +use std::num::NonZeroU32; +use std::rc::Rc; + +// NOTE(eddyb) all the modules are declared here, but they're documented "inside" +// (i.e. using inner doc comments). +pub mod analyze; +// FIXME(eddyb) make this public? +pub(crate) mod layout; +pub mod shapes; + +pub use layout::LayoutConfig; + +/// Memory-specific attributes ([`Attr::Mem`]). +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum MemAttr { + /// When applied to a `GlobalVar` or `FuncLocalVar`, this tracks all possible + /// access patterns its memory may be subjected to (see [`MemAccesses`]). + Accesses(OrdAssertEq), +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum MemAccesses { + /// Accesses to one or more handles (i.e. optionally indexed by + /// [`crate::qptr::QPtrOp::HandleArrayIndex`]), which can be: + /// - `Handle::Opaque(handle_type)`: all accesses involve [`MemOp::Load`] or + /// [`crate::qptr::QPtrAttr::ToSpvPtrInput`], with the common type `handle_type` + /// - `Handle::Buffer(data_happ)`: carries with it `data_happ`, + /// i.e. the access patterns for the memory that is reached through + /// [`crate::qptr::QPtrOp::BufferData`] + Handles(shapes::Handle), + + Data(DataHapp), +} + +/// Data HAPP ("Hierarchical Access Pattern Partitioning"): all access patterns +/// for some memory, structured by disjoint offset ranges ("partitions"). +/// +/// This is the core of "type recovery" (inferring typed memory from untyped), +/// with "struct/"array" equivalents (i.e. as `DataHappKind` variants), but +/// while it can be mapped to an explicitly laid out data type, it also tracks +/// distinctions only needed during merging (e.g. [`DataHappKind::StrictlyTyped`]). +/// +/// **Note**: the only reason for the recursive/"hierarchical" aspect is that +/// (array-like) dynamic indexing allows for compact representation of repeated +/// patterns, which can non-trivially nest for 3+ levels without losing the need +/// for efficient representation - in fact, one can construct a worst-case like: +/// ```ignore +/// [(A, [(B, [(C, [(D, [T; N], X); 2], Y); 2], Z); 2], W); 2] +/// ``` +/// (with only `N * 2**4` leaf `T`s because of the 4 `[(_, ..., _); 2]` levels, +/// and the potential for dozens of such levels while remaining a plausible size) +// +// FIXME(eddyb) reconsider the name (acronym was picked to avoid harder decisions). +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct DataHapp { + /// If present, this is a worst-case upper bound on the offsets at which + /// accesses may be perfomed. + // + // FIXME(eddyb) use proper newtypes for byte amounts. + // + // FIXME(eddyb) suboptimal naming choice, but other options are too verbose, + // including maybe using `RangeTo<_>` to explicitly indicate "exclusive". + // + // FIXME(eddyb) consider renaming such information to "extent", but that might + // be ambiguous with an offset range (as opposed to using the maximum of all + // *possible* `offset_range.end` values to describe a "maximum size"). + pub max_size: Option, + + pub kind: DataHappKind, +} + +impl DataHapp { + pub const DEAD: Self = Self { max_size: Some(0), kind: DataHappKind::Dead }; +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum DataHappKind { + /// Not actually accessed (only an intermediary state, during access analysis). + // + // FIXME(eddyb) use `Option` instead? or an empty `Disjoint`? + Dead, + + // FIXME(eddyb) replace the two leaves with e.g. `Leaf(Type, LeafAccessKind)`. + // + // + /// Accesses through typed pointers (e.g. via unknown SPIR-V instructions), + /// requiring a specific choice of pointee type which cannot be modified, + /// and has to be reused as-is, when lifting to typed memory. + /// + /// Other overlapping accesses can be merged into this one as long as they + /// can be fully expressed using the (transitive) components of this type. + StrictlyTyped(Type), + + /// Direct accesses (e.g. [`MemOp::Load`], [`MemOp::Store`]), which can be + /// decomposed as necessary (down to individual scalar leaves), to allow + /// maximal merging opportunities. + // + // FIXME(eddyb) track whether accesses are `Load`s and/or `Store`s, to allow + // inferring `NonWritable`/`NonReadable` annotations, as well. + Direct(Type), + + /// Partitioning into disjoint offset ranges (the map is keyed by the start + /// of the offset range, while the end is implied by its corresponding value), + /// requiring a "struct" type, when lifting to typed memory. + // + // FIXME(eddyb) make this non-nestable and the fundamental basis of "HAPP". + Disjoint(Rc>), + + /// `Disjoint` counterpart for dynamic offsetting, requiring an "array" type, + /// when lifting to typed memory, with one single element type being repeated + /// across the entire size, at all offsets that are a multiple of `stride`. + Repeated { + // FIXME(eddyb) this feels inefficient. + element: Rc, + stride: NonZeroU32, + }, +} + +/// Memory-specific operations ([`DataInstKind::Mem`]). +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum MemOp { + // HACK(eddyb) `OpVariable` replacement, which itself should not be kept as + // a `SpvInst` - once fn-local variables are lowered, this should go there. + FuncLocalVar(shapes::MemLayout), + + /// Read a single value from a pointer (`inputs[0]`). + // + // FIXME(eddyb) limit this to data - and scalars, maybe vectors at most. + Load, + + /// Write a single value (`inputs[1]`) to a pointer (`inputs[0]`). + // + // FIXME(eddyb) limit this to data - and scalars, maybe vectors at most. + Store, + // + // FIXME(eddyb) implement more ops (e.g. copies, atomics). +} diff --git a/src/qptr/shapes.rs b/src/mem/shapes.rs similarity index 93% rename from src/qptr/shapes.rs rename to src/mem/shapes.rs index ac53d43..dc9f3e2 100644 --- a/src/qptr/shapes.rs +++ b/src/mem/shapes.rs @@ -1,6 +1,7 @@ //! Variable shapes (untyped memory layouts vs abstract resources). // // FIXME(eddyb) does this need its own module still? +// TODO(eddyb) strongly consider moving these to `mem/mod.rs`. use crate::{AddrSpace, Type}; use std::num::NonZeroU32; @@ -22,6 +23,7 @@ pub enum GlobalVarShape { }, // FIXME(eddyb) unify terminology around "concrete"/"memory"/"untyped (data)". + // TODO(eddyb) strongly consider renaming this to just `Data`. UntypedData(MemLayout), /// Non-memory pipeline interface, which must keep the exact original type, @@ -73,7 +75,7 @@ pub enum Handle { // instead of being treated like a buffer? // // FIXME(eddyb) should this be a `Type` of its own, that can be loaded from - // a handle `QPtr`, and then has data pointer / length ops *on that*? + // a handle pointer, and then has data pointer / length ops *on that*? Buffer(AddrSpace, BL), } @@ -83,11 +85,12 @@ pub enum Handle { /// and are both kept track of to detect ambiguity in implicit layouts, e.g. /// field offsets when the `Offset` decoration isn't being used. /// Note, however, that `legacy_align` can be raised to "extended" alignment, -/// or completeley ignored, using [`LayoutConfig`](crate::qptr::LayoutConfig). +/// or completeley ignored, using [`LayoutConfig`](crate::mem::LayoutConfig). /// /// Only `align` is *required*, that is `size % align == 0` must be always enforced. // // FIXME(eddyb) consider supporting specialization-constant-length arrays. +// TODO(eddyb) strongly consider renaming this to `DataLayout`. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub struct MemLayout { // FIXME(eddyb) use proper newtypes (and log2 for align!). diff --git a/src/passes/qptr.rs b/src/passes/qptr.rs index 0cf95ef..59a6ac7 100644 --- a/src/passes/qptr.rs +++ b/src/passes/qptr.rs @@ -4,7 +4,7 @@ use crate::qptr; use crate::visit::{InnerVisit, Visitor}; use crate::{AttrSet, Const, Context, Func, FxIndexSet, GlobalVar, Module, Type}; -pub fn lower_from_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConfig) { +pub fn lower_from_spv_ptrs(module: &mut Module, layout_config: &crate::mem::LayoutConfig) { let cx = &module.cx(); let (seen_global_vars, seen_funcs) = { @@ -34,11 +34,13 @@ pub fn lower_from_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConf } } -pub fn analyze_uses(module: &mut Module, layout_config: &qptr::LayoutConfig) { - qptr::analyze::InferUsage::new(module.cx(), layout_config).infer_usage_in_module(module); +// FIXME(eddyb) this doesn't really belong in `qptr`. +pub fn analyze_mem_accesses(module: &mut Module, layout_config: &crate::mem::LayoutConfig) { + crate::mem::analyze::GatherAccesses::new(module.cx(), layout_config) + .gather_accesses_in_module(module); } -pub fn lift_to_spv_ptrs(module: &mut Module, layout_config: &qptr::LayoutConfig) { +pub fn lift_to_spv_ptrs(module: &mut Module, layout_config: &crate::mem::LayoutConfig) { let cx = &module.cx(); let (seen_global_vars, seen_funcs) = { diff --git a/src/print/mod.rs b/src/print/mod.rs index 8532ab9..bafc5aa 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -20,8 +20,9 @@ use itertools::Itertools as _; use crate::func_at::FuncAt; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; use crate::print::multiversion::Versions; -use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::visit::{InnerVisit, Visit, Visitor}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, @@ -2957,6 +2958,22 @@ impl Print for Attr { ); } + Attr::Mem(attr) => { + let (name, params_inputs) = match attr { + MemAttr::Accesses(accesses) => { + ("accesses", pretty::join_comma_sep("(", [accesses.0.print(printer)], ")")) + } + }; + pretty::Fragment::new([ + printer + .demote_style_for_namespace_prefix(printer.attr_style()) + .apply("mem.") + .into(), + printer.attr_style().apply(name).into(), + params_inputs, + ]) + } + Attr::QPtr(attr) => { let (name, params_inputs) = match attr { QPtrAttr::ToSpvPtrInput { input_idx, pointee } => ( @@ -2985,10 +3002,6 @@ impl Print for Attr { ")", ), ), - - QPtrAttr::Usage(usage) => { - ("usage", pretty::join_comma_sep("(", [usage.0.print(printer)], ")")) - } }; pretty::Fragment::new([ printer @@ -3036,54 +3049,52 @@ impl Print for Vec { DiagMsgPart::Attrs(attrs) => attrs.print(printer), DiagMsgPart::Type(ty) => ty.print(printer), DiagMsgPart::Const(ct) => ct.print(printer), - DiagMsgPart::QPtrUsage(usage) => usage.print(printer), + DiagMsgPart::MemAccesses(accesses) => accesses.print(printer), })) } } -impl Print for QPtrUsage { +impl Print for MemAccesses { type Output = pretty::Fragment; fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { match self { - QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => ty.print(printer), - QPtrUsage::Handles(qptr::shapes::Handle::Buffer(_, data_usage)) => { + MemAccesses::Handles(crate::mem::shapes::Handle::Opaque(ty)) => ty.print(printer), + MemAccesses::Handles(crate::mem::shapes::Handle::Buffer(_, data_happ)) => { pretty::Fragment::new([ printer.declarative_keyword_style().apply("buffer_data").into(), - pretty::join_comma_sep("(", [data_usage.print(printer)], ")"), + pretty::join_comma_sep("(", [data_happ.print(printer)], ")"), ]) } - QPtrUsage::Memory(usage) => usage.print(printer), + MemAccesses::Data(happ) => happ.print(printer), } } } -impl Print for QPtrMemUsage { +impl Print for DataHapp { type Output = pretty::Fragment; fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { // FIXME(eddyb) should this be a helper on `Printer`? let num_lit = |x: u32| printer.numeric_literal_style().apply(format!("{x}")).into(); match &self.kind { - QPtrMemUsageKind::Unused => "_".into(), + DataHappKind::Dead => "_".into(), // FIXME(eddyb) should the distinction be noted? - &QPtrMemUsageKind::StrictlyTyped(ty) | &QPtrMemUsageKind::DirectAccess(ty) => { - ty.print(printer) - } - QPtrMemUsageKind::OffsetBase(entries) => pretty::join_comma_sep( + &DataHappKind::StrictlyTyped(ty) | &DataHappKind::Direct(ty) => ty.print(printer), + DataHappKind::Disjoint(entries) => pretty::join_comma_sep( "{", entries .iter() - .map(|(&offset, sub_usage)| { + .map(|(&offset, sub_happ)| { pretty::Fragment::new([ num_lit(offset), "..".into(), - sub_usage + sub_happ .max_size .and_then(|max_size| offset.checked_add(max_size)) .map(num_lit) .unwrap_or_default(), " => ".into(), - sub_usage.print(printer), + sub_happ.print(printer), ]) }) .map(|entry| { @@ -3091,7 +3102,7 @@ impl Print for QPtrMemUsage { }), "}", ), - QPtrMemUsageKind::DynOffsetBase { element, stride } => pretty::Fragment::new([ + DataHappKind::Repeated { element, stride } => pretty::Fragment::new([ "(".into(), num_lit(0), "..".into(), @@ -3384,51 +3395,54 @@ impl Print for GlobalVarDecl { // ideally the `GlobalVarDecl` would hold that type itself. let type_ascription_suffix = match &printer.cx[*type_of_ptr_to].kind { TypeKind::QPtr if shape.is_some() => match shape.unwrap() { - qptr::shapes::GlobalVarShape::Handles { handle, fixed_count } => { + crate::mem::shapes::GlobalVarShape::Handles { handle, fixed_count } => { let handle = match handle { - qptr::shapes::Handle::Opaque(ty) => ty.print(printer), - qptr::shapes::Handle::Buffer(addr_space, buf) => pretty::Fragment::new([ - printer.declarative_keyword_style().apply("buffer").into(), - pretty::join_comma_sep( - "(", - [ - addr_space.print(printer), - pretty::Fragment::new([ - printer.pretty_named_argument_prefix("size"), - pretty::Fragment::new( - Some(buf.fixed_base.size) - .filter(|&base_size| { - base_size > 0 || buf.dyn_unit_stride.is_none() - }) - .map(|base_size| { - printer - .numeric_literal_style() - .apply(base_size.to_string()) - .into() - }) - .into_iter() - .chain(buf.dyn_unit_stride.map(|stride| { - pretty::Fragment::new([ - "N × ".into(), + crate::mem::shapes::Handle::Opaque(ty) => ty.print(printer), + crate::mem::shapes::Handle::Buffer(addr_space, buf) => { + pretty::Fragment::new([ + printer.declarative_keyword_style().apply("buffer").into(), + pretty::join_comma_sep( + "(", + [ + addr_space.print(printer), + pretty::Fragment::new([ + printer.pretty_named_argument_prefix("size"), + pretty::Fragment::new( + Some(buf.fixed_base.size) + .filter(|&base_size| { + base_size > 0 + || buf.dyn_unit_stride.is_none() + }) + .map(|base_size| { printer .numeric_literal_style() - .apply(stride.to_string()), - ]) - })) - .intersperse_with(|| " + ".into()), - ), - ]), - pretty::Fragment::new([ - printer.pretty_named_argument_prefix("align"), - printer - .numeric_literal_style() - .apply(buf.fixed_base.align.to_string()) - .into(), - ]), - ], - ")", - ), - ]), + .apply(base_size.to_string()) + .into() + }) + .into_iter() + .chain(buf.dyn_unit_stride.map(|stride| { + pretty::Fragment::new([ + "N × ".into(), + printer + .numeric_literal_style() + .apply(stride.to_string()), + ]) + })) + .intersperse_with(|| " + ".into()), + ), + ]), + pretty::Fragment::new([ + printer.pretty_named_argument_prefix("align"), + printer + .numeric_literal_style() + .apply(buf.fixed_base.align.to_string()) + .into(), + ]), + ], + ")", + ), + ]) + } }; let handles = if fixed_count.map_or(0, |c| c.get()) == 1 { @@ -3450,31 +3464,33 @@ impl Print for GlobalVarDecl { }; pretty::join_space(":", [handles]) } - qptr::shapes::GlobalVarShape::UntypedData(mem_layout) => pretty::Fragment::new([ - " ".into(), - printer.declarative_keyword_style().apply("layout").into(), - pretty::join_comma_sep( - "(", - [ - pretty::Fragment::new([ - printer.pretty_named_argument_prefix("size"), - printer - .numeric_literal_style() - .apply(mem_layout.size.to_string()) - .into(), - ]), - pretty::Fragment::new([ - printer.pretty_named_argument_prefix("align"), - printer - .numeric_literal_style() - .apply(mem_layout.align.to_string()) - .into(), - ]), - ], - ")", - ), - ]), - qptr::shapes::GlobalVarShape::TypedInterface(ty) => { + crate::mem::shapes::GlobalVarShape::UntypedData(mem_layout) => { + pretty::Fragment::new([ + " ".into(), + printer.declarative_keyword_style().apply("layout").into(), + pretty::join_comma_sep( + "(", + [ + pretty::Fragment::new([ + printer.pretty_named_argument_prefix("size"), + printer + .numeric_literal_style() + .apply(mem_layout.size.to_string()) + .into(), + ]), + pretty::Fragment::new([ + printer.pretty_named_argument_prefix("align"), + printer + .numeric_literal_style() + .apply(mem_layout.align.to_string()) + .into(), + ]), + ], + ")", + ), + ]) + } + crate::mem::shapes::GlobalVarShape::TypedInterface(ty) => { printer.pretty_type_ascription_suffix(ty) } }, @@ -3903,15 +3919,10 @@ impl Print for FuncAt<'_, DataInst> { pretty::join_comma_sep("(", inputs.iter().map(|v| v.print(printer)), ")"), ]), - DataInstKind::QPtr(op) => { - let (qptr_input, extra_inputs) = match op { - // HACK(eddyb) `FuncLocalVar` should probably not even be in `QPtrOp`. - QPtrOp::FuncLocalVar(_) => (None, &inputs[..]), - _ => (Some(inputs[0]), &inputs[1..]), - }; - let (name, extra_inputs): (_, SmallVec<[_; 1]>) = match op { - QPtrOp::FuncLocalVar(mem_layout) => { - assert!(extra_inputs.len() <= 1); + DataInstKind::Mem(op) => { + let (name, inputs): (_, SmallVec<[_; 1]>) = match op { + MemOp::FuncLocalVar(mem_layout) => { + assert!(inputs.len() <= 1); ( "func_local_var", [ @@ -3931,7 +3942,7 @@ impl Print for FuncAt<'_, DataInst> { ]), ] .into_iter() - .chain(extra_inputs.first().map(|&init| { + .chain(inputs.first().map(|&init| { pretty::Fragment::new([ printer.pretty_named_argument_prefix("initializer"), init.print(printer), @@ -3941,6 +3952,35 @@ impl Print for FuncAt<'_, DataInst> { ) } + MemOp::Load => { + assert_eq!(inputs.len(), 1); + ("load", [inputs[0].print(printer)].into_iter().collect()) + } + MemOp::Store => { + assert_eq!(inputs.len(), 2); + ( + "store", + [inputs[0].print(printer), inputs[1].print(printer)] + .into_iter() + .collect(), + ) + } + }; + + pretty::Fragment::new([ + printer + .demote_style_for_namespace_prefix(printer.declarative_keyword_style()) + .apply("mem.") + .into(), + printer.declarative_keyword_style().apply(name).into(), + pretty::join_comma_sep("(", inputs, ")"), + ]) + } + + DataInstKind::QPtr(op) => { + let qptr_input = inputs[0].print(printer); + let extra_inputs = &inputs[1..]; + let (name, extra_inputs): (_, SmallVec<[_; 1]>) = match op { QPtrOp::HandleArrayIndex => { assert_eq!(extra_inputs.len(), 1); ( @@ -4002,15 +4042,6 @@ impl Print for FuncAt<'_, DataInst> { .collect(), ) } - - QPtrOp::Load => { - assert_eq!(extra_inputs.len(), 0); - ("load", [].into_iter().collect()) - } - QPtrOp::Store => { - assert_eq!(extra_inputs.len(), 1); - ("store", [extra_inputs[0].print(printer)].into_iter().collect()) - } }; pretty::Fragment::new([ @@ -4019,11 +4050,7 @@ impl Print for FuncAt<'_, DataInst> { .apply("qptr.") .into(), printer.declarative_keyword_style().apply(name).into(), - pretty::join_comma_sep( - "(", - qptr_input.map(|v| v.print(printer)).into_iter().chain(extra_inputs), - ")", - ), + pretty::join_comma_sep("(", [qptr_input].into_iter().chain(extra_inputs), ")"), ]) } diff --git a/src/qptr/lift.rs b/src/qptr/lift.rs index c4af4e2..7aa5234 100644 --- a/src/qptr/lift.rs +++ b/src/qptr/lift.rs @@ -1,10 +1,8 @@ //! [`QPtr`](crate::TypeKind::QPtr) lifting (e.g. to SPIR-V). -// HACK(eddyb) sharing layout code with other modules. -use super::layout::*; - use crate::func_at::FuncAtMut; -use crate::qptr::{QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage, shapes}; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp, shapes}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, InnerTransform, Transformed, Transformer}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, @@ -18,6 +16,10 @@ use std::mem; use std::num::NonZeroU32; use std::rc::Rc; +// HACK(eddyb) sharing layout code with other modules. +// FIXME(eddyb) can this just be a non-glob import? +use crate::mem::layout::*; + struct LiftError(Diag); /// Context for lifting `QPtr`s to SPIR-V `OpTypePointer`s. @@ -44,7 +46,7 @@ impl<'a> LiftToSpvPtrs<'a> { pub fn lift_global_var(&self, global_var_decl: &mut GlobalVarDecl) { match self.spv_ptr_type_and_addr_space_for_global_var(global_var_decl) { Ok((spv_ptr_type, addr_space)) => { - global_var_decl.attrs = self.strip_qptr_usage_attr(global_var_decl.attrs); + global_var_decl.attrs = self.strip_mem_accesses_attr(global_var_decl.attrs); global_var_decl.type_of_ptr_to = spv_ptr_type; global_var_decl.addr_space = addr_space; global_var_decl.shape = None; @@ -67,29 +69,29 @@ impl<'a> LiftToSpvPtrs<'a> { deferred_ptr_noops: Default::default(), data_inst_use_counts: Default::default(), - func_has_qptr_analysis_bug_diags: false, + func_has_mem_analysis_bug_diags: false, } .in_place_transform_func_decl(&mut module.funcs[func]); } } - fn find_qptr_usage_attr(&self, attrs: AttrSet) -> Result<&QPtrUsage, LiftError> { + fn find_mem_accesses_attr(&self, attrs: AttrSet) -> Result<&MemAccesses, LiftError> { self.cx[attrs] .attrs .iter() .find_map(|attr| match attr { - Attr::QPtr(QPtrAttr::Usage(usage)) => Some(&usage.0), + Attr::Mem(MemAttr::Accesses(accesses)) => Some(&accesses.0), _ => None, }) - .ok_or_else(|| LiftError(Diag::bug(["missing `qptr.usage` attribute".into()]))) + .ok_or_else(|| LiftError(Diag::bug(["missing `mem.accesses` attribute".into()]))) } - fn strip_qptr_usage_attr(&self, attrs: AttrSet) -> AttrSet { + fn strip_mem_accesses_attr(&self, attrs: AttrSet) -> AttrSet { self.cx.intern(AttrSetDef { attrs: self.cx[attrs] .attrs .iter() - .filter(|attr| !matches!(attr, Attr::QPtr(QPtrAttr::Usage(_)))) + .filter(|attr| !matches!(attr, Attr::Mem(MemAttr::Accesses(_)))) .cloned() .collect(), }) @@ -101,7 +103,7 @@ impl<'a> LiftToSpvPtrs<'a> { ) -> Result<(Type, AddrSpace), LiftError> { let wk = self.wk; - let qptr_usage = self.find_qptr_usage_attr(global_var_decl.attrs)?; + let mem_accesses = self.find_mem_accesses_attr(global_var_decl.attrs)?; let shape = global_var_decl.shape.ok_or_else(|| LiftError(Diag::bug(["missing shape".into()])))?; @@ -109,17 +111,18 @@ impl<'a> LiftToSpvPtrs<'a> { (AddrSpace::Handles, shapes::GlobalVarShape::Handles { handle, fixed_count }) => { let (storage_class, handle_type) = match handle { shapes::Handle::Opaque(ty) => { - if self.pointee_type_for_usage(qptr_usage)? != ty { + if self.pointee_type_for_accesses(mem_accesses)? != ty { return Err(LiftError(Diag::bug([ - "mismatched opaque handle types in `qptr.usage` vs `shape`".into(), + "mismatched opaque handle types in `mem.accesses` vs `shape`" + .into(), ]))); } (wk.UniformConstant, ty) } - // FIXME(eddyb) validate usage against `buf` and/or expand + // FIXME(eddyb) validate accesses against `buf` and/or expand // the type to make sure it has the right size. shapes::Handle::Buffer(AddrSpace::SpvStorageClass(storage_class), _buf) => { - (storage_class, self.pointee_type_for_usage(qptr_usage)?) + (storage_class, self.pointee_type_for_accesses(mem_accesses)?) } shapes::Handle::Buffer(AddrSpace::Handles, _) => { return Err(LiftError(Diag::bug([ @@ -136,12 +139,12 @@ impl<'a> LiftToSpvPtrs<'a> { }, ) } - // FIXME(eddyb) validate usage against `layout` and/or expand + // FIXME(eddyb) validate accesses against `layout` and/or expand // the type to make sure it has the right size. ( AddrSpace::SpvStorageClass(storage_class), shapes::GlobalVarShape::UntypedData(_layout), - ) => (storage_class, self.pointee_type_for_usage(qptr_usage)?), + ) => (storage_class, self.pointee_type_for_accesses(mem_accesses)?), ( AddrSpace::SpvStorageClass(storage_class), shapes::GlobalVarShape::TypedInterface(ty), @@ -197,57 +200,55 @@ impl<'a> LiftToSpvPtrs<'a> { }) } - fn pointee_type_for_usage(&self, usage: &QPtrUsage) -> Result { + fn pointee_type_for_accesses(&self, accesses: &MemAccesses) -> Result { let wk = self.wk; - match usage { - &QPtrUsage::Handles(shapes::Handle::Opaque(ty)) => Ok(ty), - QPtrUsage::Handles(shapes::Handle::Buffer(_, data_usage)) => { + match accesses { + &MemAccesses::Handles(shapes::Handle::Opaque(ty)) => Ok(ty), + MemAccesses::Handles(shapes::Handle::Buffer(_, data_happ)) => { let attr_spv_decorate_block = Attr::SpvAnnotation(spv::Inst { opcode: wk.OpDecorate, imms: [spv::Imm::Short(wk.Decoration, wk.Block)].into_iter().collect(), }); - match &data_usage.kind { - QPtrMemUsageKind::Unused => { - self.spv_op_type_struct([], [attr_spv_decorate_block]) - } - QPtrMemUsageKind::OffsetBase(fields) => self.spv_op_type_struct( - fields.iter().map(|(&field_offset, field_usage)| { - Ok((field_offset, self.pointee_type_for_mem_usage(field_usage)?)) + match &data_happ.kind { + DataHappKind::Dead => self.spv_op_type_struct([], [attr_spv_decorate_block]), + DataHappKind::Disjoint(fields) => self.spv_op_type_struct( + fields.iter().map(|(&field_offset, field_happ)| { + Ok((field_offset, self.pointee_type_for_data_happ(field_happ)?)) }), [attr_spv_decorate_block], ), - QPtrMemUsageKind::StrictlyTyped(_) - | QPtrMemUsageKind::DirectAccess(_) - | QPtrMemUsageKind::DynOffsetBase { .. } => self.spv_op_type_struct( - [Ok((0, self.pointee_type_for_mem_usage(data_usage)?))], + DataHappKind::StrictlyTyped(_) + | DataHappKind::Direct(_) + | DataHappKind::Repeated { .. } => self.spv_op_type_struct( + [Ok((0, self.pointee_type_for_data_happ(data_happ)?))], [attr_spv_decorate_block], ), } } - QPtrUsage::Memory(usage) => self.pointee_type_for_mem_usage(usage), + MemAccesses::Data(happ) => self.pointee_type_for_data_happ(happ), } } - fn pointee_type_for_mem_usage(&self, usage: &QPtrMemUsage) -> Result { - match &usage.kind { - QPtrMemUsageKind::Unused => self.spv_op_type_struct([], []), - &QPtrMemUsageKind::StrictlyTyped(ty) | &QPtrMemUsageKind::DirectAccess(ty) => Ok(ty), - QPtrMemUsageKind::OffsetBase(fields) => self.spv_op_type_struct( - fields.iter().map(|(&field_offset, field_usage)| { - Ok((field_offset, self.pointee_type_for_mem_usage(field_usage)?)) + fn pointee_type_for_data_happ(&self, happ: &DataHapp) -> Result { + match &happ.kind { + DataHappKind::Dead => self.spv_op_type_struct([], []), + &DataHappKind::StrictlyTyped(ty) | &DataHappKind::Direct(ty) => Ok(ty), + DataHappKind::Disjoint(fields) => self.spv_op_type_struct( + fields.iter().map(|(&field_offset, field_happ)| { + Ok((field_offset, self.pointee_type_for_data_happ(field_happ)?)) }), [], ), - QPtrMemUsageKind::DynOffsetBase { element, stride } => { - let element_type = self.pointee_type_for_mem_usage(element)?; + DataHappKind::Repeated { element, stride } => { + let element_type = self.pointee_type_for_data_happ(element)?; - let fixed_len = usage + let fixed_len = happ .max_size .map(|size| { if !size.is_multiple_of(stride.get()) { return Err(LiftError(Diag::bug([format!( - "DynOffsetBase: size ({size}) not a multiple of stride ({stride})" + "Repeated: size ({size}) not a multiple of stride ({stride})" ) .into()]))); } @@ -397,8 +398,8 @@ struct LiftToSpvPtrInstsInFunc<'a> { // FIXME(eddyb) consider removing this and just do a full second traversal. data_inst_use_counts: EntityOrientedDenseMap, - // HACK(eddyb) this is used to avoid noise when `qptr::analyze` failed. - func_has_qptr_analysis_bug_diags: bool, + // HACK(eddyb) this is used to avoid noise when `mem::analyze` failed. + func_has_mem_analysis_bug_diags: bool, } struct DeferredPtrNoop { @@ -458,13 +459,13 @@ impl LiftToSpvPtrInstsInFunc<'_> { return Ok(Transformed::Unchanged); } - DataInstKind::QPtr(QPtrOp::FuncLocalVar(_mem_layout)) => { - let qptr_usage = self.lifter.find_qptr_usage_attr(data_inst_def.attrs)?; + DataInstKind::Mem(MemOp::FuncLocalVar(_mem_layout)) => { + let mem_accesses = self.lifter.find_mem_accesses_attr(data_inst_def.attrs)?; // FIXME(eddyb) validate against `mem_layout`! - let pointee_type = self.lifter.pointee_type_for_usage(qptr_usage)?; + let pointee_type = self.lifter.pointee_type_for_accesses(mem_accesses)?; DataInstDef { - attrs: self.lifter.strip_qptr_usage_attr(data_inst_def.attrs), + attrs: self.lifter.strip_mem_accesses_attr(data_inst_def.attrs), kind: DataInstKind::SpvInst(spv::Inst { opcode: wk.OpVariable, imms: [spv::Imm::Short(wk.StorageClass, wk.Function)].into_iter().collect(), @@ -751,10 +752,12 @@ impl LiftToSpvPtrInstsInFunc<'_> { output_type: Some(self.lifter.spv_ptr_type(addr_space, layout.original_type)), } } - DataInstKind::QPtr(op @ (QPtrOp::Load | QPtrOp::Store)) => { + DataInstKind::Mem(op @ (MemOp::Load | MemOp::Store)) => { + // HACK(eddyb) `_` will match multiple variants soon. + #[allow(clippy::match_wildcard_for_single_variants)] let (spv_opcode, access_type) = match op { - QPtrOp::Load => (wk.OpLoad, data_inst_def.output_type.unwrap()), - QPtrOp::Store => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), + MemOp::Load => (wk.OpLoad, data_inst_def.output_type.unwrap()), + MemOp::Store => (wk.OpStore, type_of_val(data_inst_def.inputs[1])), _ => unreachable!(), }; @@ -1116,16 +1119,15 @@ impl Transformer for LiftToSpvPtrInstsInFunc<'_> { Err(LiftError(e)) => { let data_inst_def = func_at_inst.def(); - // HACK(eddyb) do not add redundant errors to `qptr::analyze` bugs. - self.func_has_qptr_analysis_bug_diags = self - .func_has_qptr_analysis_bug_diags + // HACK(eddyb) do not add redundant errors to `mem::analyze` bugs. + self.func_has_mem_analysis_bug_diags = self.func_has_mem_analysis_bug_diags || self.lifter.cx[data_inst_def.attrs].attrs.iter().any(|attr| { match attr { Attr::Diagnostics(diags) => { diags.0.iter().any(|diag| match diag.level { DiagLevel::Bug(loc) => { - loc.file().ends_with("qptr/analyze.rs") - || loc.file().ends_with("qptr\\analyze.rs") + loc.file().ends_with("mem/analyze.rs") + || loc.file().ends_with("mem\\analyze.rs") } _ => false, }) @@ -1134,7 +1136,7 @@ impl Transformer for LiftToSpvPtrInstsInFunc<'_> { } }); - if !self.func_has_qptr_analysis_bug_diags { + if !self.func_has_mem_analysis_bug_diags { data_inst_def.attrs.push_diag(&self.lifter.cx, e); } } diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 409ed07..776d356 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -1,10 +1,8 @@ //! [`QPtr`](crate::TypeKind::QPtr) lowering (e.g. from SPIR-V). -// HACK(eddyb) layout code used to be in this module. -use super::layout::*; - use crate::func_at::FuncAtMut; -use crate::qptr::{QPtrAttr, QPtrOp, shapes}; +use crate::mem::{MemOp, shapes}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::transform::{InnerInPlaceTransform, Transformed, Transformer}; use crate::{ AddrSpace, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, @@ -16,6 +14,9 @@ use std::cell::Cell; use std::num::NonZeroU32; use std::rc::Rc; +// HACK(eddyb) sharing layout code with other modules. +use crate::mem::layout::*; + struct LowerError(Diag); /// Context for lowering SPIR-V `OpTypePointer`s to `QPtr`s. @@ -412,7 +413,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { match self.lowerer.layout_of(var_data_type)? { TypeLayout::Concrete(concrete) if concrete.mem_layout.dyn_unit_stride.is_none() => { ( - QPtrOp::FuncLocalVar(concrete.mem_layout.fixed_base).into(), + MemOp::FuncLocalVar(concrete.mem_layout.fixed_base).into(), data_inst_def.inputs.clone(), ) } @@ -424,14 +425,14 @@ impl LowerFromSpvPtrInstsInFunc<'_> { return Ok(Transformed::Unchanged); } assert_eq!(data_inst_def.inputs.len(), 1); - (QPtrOp::Load.into(), data_inst_def.inputs.clone()) + (MemOp::Load.into(), data_inst_def.inputs.clone()) } else if spv_inst.opcode == wk.OpStore { // FIXME(eddyb) support memory operands somehow. if !spv_inst.imms.is_empty() { return Ok(Transformed::Unchanged); } assert_eq!(data_inst_def.inputs.len(), 2); - (QPtrOp::Store.into(), data_inst_def.inputs.clone()) + (MemOp::Store.into(), data_inst_def.inputs.clone()) } else if spv_inst.opcode == wk.OpArrayLength { let field_idx = match spv_inst.imms[..] { [spv::Imm::Short(_, field_idx)] => field_idx, @@ -606,7 +607,7 @@ impl LowerFromSpvPtrInstsInFunc<'_> { match data_inst_def.kind { // Known semantics, no need to preserve SPIR-V pointer information. - DataInstKind::FuncCall(_) | DataInstKind::QPtr(_) => return, + DataInstKind::FuncCall(_) | DataInstKind::Mem(_) | DataInstKind::QPtr(_) => return, DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} } diff --git a/src/qptr/mod.rs b/src/qptr/mod.rs index da2091e..ce675eb 100644 --- a/src/qptr/mod.rs +++ b/src/qptr/mod.rs @@ -3,22 +3,17 @@ // FIXME(eddyb) consider `#[cfg(doc)] use crate::TypeKind::QPtr;` for doc comments. // FIXME(eddyb) PR description of https://github.com/EmbarkStudios/spirt/pull/24 // has more useful docs that could be copied here. +// +// FIXME(eddyb) fully update post-`mem`-split. use crate::{AddrSpace, OrdAssertEq, Type}; -use std::collections::BTreeMap; use std::num::NonZeroU32; use std::ops::Range; -use std::rc::Rc; // NOTE(eddyb) all the modules are declared here, but they're documented "inside" // (i.e. using inner doc comments). -pub mod analyze; -mod layout; pub mod lift; pub mod lower; -pub mod shapes; - -pub use layout::LayoutConfig; /// `QPtr`-specific attributes ([`Attr::QPtr`]). #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -52,97 +47,11 @@ pub enum QPtrAttr { addr_space: OrdAssertEq, pointee: OrdAssertEq, }, - - /// When applied to a `QPtr`-typed `GlobalVar`, `DataInst`, - /// `RegionInputDecl` or `NodeOutputDecl`, this tracks all the - /// ways in which the pointer may be used (see `QPtrUsage`). - Usage(OrdAssertEq), -} - -#[derive(Clone, PartialEq, Eq, Hash)] -pub enum QPtrUsage { - /// Used to access one or more handles (i.e. optionally indexed by - /// [`QPtrOp::HandleArrayIndex`]), which can be: - /// - `Handle::Opaque(handle_type)`: all uses involve [`QPtrOp::Load`] or - /// [`QPtrAttr::ToSpvPtrInput`], with the common type `handle_type` - /// - `Handle::Buffer(data_usage)`: carries with it `data_usage`, i.e. the - /// usage of the memory that can be accessed through [`QPtrOp::BufferData`] - Handles(shapes::Handle), - - // FIXME(eddyb) unify terminology around "concrete"/"memory"/"untyped (data)". - Memory(QPtrMemUsage), -} - -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct QPtrMemUsage { - /// If present, this is a worst-case upper bound on memory accesses that may - /// be performed through this pointer. - // - // FIXME(eddyb) use proper newtypes for byte amounts. - // - // FIXME(eddyb) suboptimal naming choice, but other options are too verbose, - // including maybe using `RangeTo<_>` to explicitly indicate "exclusive". - // - // FIXME(eddyb) consider renaming such information to "extent", but that might - // be ambiguous with an offset range (as opposed to min/max of *possible* - // `offset_range.end`, i.e. "size"). - pub max_size: Option, - - pub kind: QPtrMemUsageKind, -} - -impl QPtrMemUsage { - pub const UNUSED: Self = Self { max_size: Some(0), kind: QPtrMemUsageKind::Unused }; -} - -#[derive(Clone, PartialEq, Eq, Hash)] -pub enum QPtrMemUsageKind { - /// Not actually used, which could be caused by pointer offsetting operations - /// with unused results, or as an intermediary state during analyses. - Unused, - - // FIXME(eddyb) replace the two leaves with e.g. `Leaf(Type, QPtrMemLeafUsage)`. - // - // - // - /// Used as a typed pointer (e.g. via unknown SPIR-V instructions), requiring - /// a specific choice of pointee type which cannot be modified, and has to be - /// reused as-is when lifting `QPtr`s back to typed pointers. - /// - /// Other overlapping uses can be merged into this one as long as they can - /// be fully expressed using the (transitive) components of this type. - StrictlyTyped(Type), - - /// Used directly to access memory (e.g. [`QPtrOp::Load`], [`QPtrOp::Store`]), - /// which can be decomposed as necessary (down to individual scalar leaves), - /// to allow maximal merging opportunities. - // - // FIXME(eddyb) track whether `Load`s and/or `Store`s are used, so that we - // can infer `NonWritable`/`NonReadable` annotations as well. - DirectAccess(Type), - - /// Used as a common base for (constant) offsetting, which requires it to have - /// its own (aggregate) type, when lifting `QPtr`s back to typed pointers. - OffsetBase(Rc>), - - /// Used as a common base for (dynamic) offsetting, which requires it to have - /// its own (array) type, when lifting `QPtr`s back to typed pointers, with - /// one single element type being repeated across the entire size. - DynOffsetBase { - // FIXME(eddyb) this feels inefficient. - element: Rc, - stride: NonZeroU32, - }, - // FIXME(eddyb) consider adding an `Union` case for driving legalization. } /// `QPtr`-specific operations ([`DataInstKind::QPtr`]). #[derive(Clone, PartialEq, Eq, Hash)] pub enum QPtrOp { - // HACK(eddyb) `OpVariable` replacement, which itself should not be kept as - // a `SpvInst` - once fn-local variables are lowered, this should go there. - FuncLocalVar(shapes::MemLayout), - /// Adjust a **handle array** `QPtr` (`inputs[0]`), by selecting the handle /// at the index (`inputs[1]`) from the handle array (i.e. the resulting /// `QPtr` is limited to that one handle and can't be further "moved around"). @@ -169,10 +78,7 @@ pub enum QPtrOp { // the redundant `(x * a + b - b) / a` to just `x`? // // FIXME(eddyb) actually lower `OpArrayLength` to this! - BufferDynLen { - fixed_base_size: u32, - dyn_unit_stride: NonZeroU32, - }, + BufferDynLen { fixed_base_size: u32, dyn_unit_stride: NonZeroU32 }, /// Adjust a **memory** `QPtr` (`inputs[0]`), by adding a (signed) immediate /// amount of bytes to its "address" (whether physical or conceptual). @@ -192,17 +98,4 @@ pub enum QPtrOp { // FIXME(eddyb) should this be an attribute/refinement? index_bounds: Option>, }, - - /// Read a single value from a `QPtr` (`inputs[0]`). - // - // FIXME(eddyb) limit this to memory, and scalars, maybe vectors at most. - Load, - - /// Write a single value (`inputs[1]`) to a `QPtr` (`inputs[0]`). - // - // FIXME(eddyb) limit this to memory, and scalars, maybe vectors at most. - Store, - // - // FIXME(eddyb) implement more ops! at the very least copying! - // (and lowering could ignore pointercasts, I guess?) } diff --git a/src/spv/lift.rs b/src/spv/lift.rs index c0c8348..1e7b238 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -198,6 +198,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { fn visit_attr(&mut self, attr: &Attr) { match *attr { Attr::Diagnostics(_) + | Attr::Mem(_) | Attr::QPtr(_) | Attr::SpvAnnotation { .. } | Attr::SpvBitflagsOperand(_) => {} @@ -211,6 +212,12 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { fn visit_data_inst_def(&mut self, data_inst_def: &DataInstDef) { #[allow(clippy::match_same_arms)] match data_inst_def.kind { + // FIXME(eddyb) this should be a proper `Result`-based error instead, + // and/or `spv::lift` should mutate the module for legalization. + DataInstKind::Mem(_) => { + unreachable!("`DataInstKind::Mem` should be legalized away before lifting"); + } + // FIXME(eddyb) this should be a proper `Result`-based error instead, // and/or `spv::lift` should mutate the module for legalization. DataInstKind::QPtr(_) => { @@ -1275,7 +1282,7 @@ impl LazyInst<'_, '_> { Self::DataInst { parent_func, result_id: _, data_inst_def } => { let (inst, extra_initial_id_operand) = match &data_inst_def.kind { // Disallowed while visiting. - DataInstKind::QPtr(_) => unreachable!(), + DataInstKind::Mem(_) | DataInstKind::QPtr(_) => unreachable!(), &DataInstKind::FuncCall(callee) => { (wk.OpFunctionCall.into(), Some(ids.funcs[&callee].func_id)) @@ -1554,6 +1561,7 @@ impl Module { match attr { Attr::DbgSrcLoc(_) | Attr::Diagnostics(_) + | Attr::Mem(_) | Attr::QPtr(_) | Attr::SpvBitflagsOperand(_) => {} Attr::SpvAnnotation(inst @ spv::Inst { opcode, .. }) => { diff --git a/src/transform.rs b/src/transform.rs index 74670d3..ad19033 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1,7 +1,8 @@ //! Mutable IR traversal. use crate::func_at::FuncAtMut; -use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, DataInst, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, EntityListIter, ExportKey, Exportee, Func, FuncDecl, @@ -343,13 +344,31 @@ impl InnerTransform for Attr { }))) } - Attr::QPtr(attr) => transform!({ + Attr::Mem(attr) => transform!({ attr -> match attr { - &QPtrAttr::ToSpvPtrInput { input_idx, pointee } => transform!({ + MemAttr::Accesses(OrdAssertEq(accesses)) => transform!({ + accesses -> match accesses { + &MemAccesses::Handles(crate::mem::shapes::Handle::Opaque(ty)) => transform!({ + ty -> transformer.transform_type_use(ty), + } => MemAccesses::Handles(crate::mem::shapes::Handle::Opaque(ty))), + MemAccesses::Handles(crate::mem::shapes::Handle::Buffer(addr_space, data_happ)) => transform!({ + data_happ -> data_happ.inner_transform_with(transformer), + } => MemAccesses::Handles(crate::mem::shapes::Handle::Buffer(*addr_space, data_happ))), + MemAccesses::Data(happ) => transform!({ + happ -> happ.inner_transform_with(transformer), + } => MemAccesses::Data(happ)), + } + } => MemAttr::Accesses(OrdAssertEq(accesses))), + } + } => Attr::Mem(attr)), + + Attr::QPtr(attr) => transform!({ + attr -> match *attr { + QPtrAttr::ToSpvPtrInput { input_idx, pointee } => transform!({ pointee -> transformer.transform_type_use(pointee.0).map(OrdAssertEq), } => QPtrAttr::ToSpvPtrInput { input_idx, pointee }), - &QPtrAttr::FromSpvPtrOutput { + QPtrAttr::FromSpvPtrOutput { addr_space, pointee, } => transform!({ @@ -358,20 +377,6 @@ impl InnerTransform for Attr { addr_space, pointee, }), - - QPtrAttr::Usage(OrdAssertEq(usage)) => transform!({ - usage -> match usage { - &QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => transform!({ - ty -> transformer.transform_type_use(ty), - } => QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty))), - QPtrUsage::Handles(qptr::shapes::Handle::Buffer(addr_space, data_usage)) => transform!({ - data_usage -> data_usage.inner_transform_with(transformer), - } => QPtrUsage::Handles(qptr::shapes::Handle::Buffer(*addr_space, data_usage))), - QPtrUsage::Memory(usage) => transform!({ - usage -> usage.inner_transform_with(transformer), - } => QPtrUsage::Memory(usage)), - } - } => QPtrAttr::Usage(OrdAssertEq(usage))), } } => Attr::QPtr(attr)), } @@ -385,7 +390,7 @@ impl InnerTransform for Rc { } } -impl InnerTransform for QPtrMemUsage { +impl InnerTransform for DataHapp { fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { let Self { max_size, kind } = self; @@ -398,28 +403,28 @@ impl InnerTransform for QPtrMemUsage { } } -impl InnerTransform for QPtrMemUsageKind { +impl InnerTransform for DataHappKind { fn inner_transform_with(&self, transformer: &mut impl Transformer) -> Transformed { match self { - Self::Unused => Transformed::Unchanged, + Self::Dead => Transformed::Unchanged, &Self::StrictlyTyped(ty) => transform!({ ty -> transformer.transform_type_use(ty), } => Self::StrictlyTyped(ty)), - &Self::DirectAccess(ty) => transform!({ + &Self::Direct(ty) => transform!({ ty -> transformer.transform_type_use(ty), - } => Self::DirectAccess(ty)), - Self::OffsetBase(entries) => transform!({ - entries -> Transformed::map_iter(entries.values(), |sub_usage| { - sub_usage.inner_transform_with(transformer) + } => Self::Direct(ty)), + Self::Disjoint(entries) => transform!({ + entries -> Transformed::map_iter(entries.values(), |sub_happ| { + sub_happ.inner_transform_with(transformer) }).map(|new_iter| { // HACK(eddyb) this is a bit inefficient but `Transformed::map_iter` // limits us here in how it handles the whole `Clone` thing. entries.keys().copied().zip(new_iter).collect() }).map(Rc::new) - } => Self::OffsetBase(entries)), - Self::DynOffsetBase { element, stride } => transform!({ + } => Self::Disjoint(entries)), + Self::Repeated { element, stride } => transform!({ element -> element.inner_transform_with(transformer), - } => Self::DynOffsetBase { element, stride: *stride }), + } => Self::Repeated { element, stride: *stride }), } } } @@ -506,11 +511,11 @@ impl InnerInPlaceTransform for GlobalVarDecl { transformer.transform_type_use(*type_of_ptr_to).apply_to(type_of_ptr_to); if let Some(shape) = shape { match shape { - qptr::shapes::GlobalVarShape::TypedInterface(ty) => { + crate::mem::shapes::GlobalVarShape::TypedInterface(ty) => { transformer.transform_type_use(*ty).apply_to(ty); } - qptr::shapes::GlobalVarShape::Handles { .. } - | qptr::shapes::GlobalVarShape::UntypedData(_) => {} + crate::mem::shapes::GlobalVarShape::Handles { .. } + | crate::mem::shapes::GlobalVarShape::UntypedData(_) => {} } } match addr_space { @@ -716,15 +721,15 @@ impl InnerInPlaceTransform for DataInstKind { fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { match self { DataInstKind::FuncCall(func) => transformer.transform_func_use(*func).apply_to(func), + DataInstKind::Mem(op) => match op { + MemOp::FuncLocalVar(_) | MemOp::Load | MemOp::Store => {} + }, DataInstKind::QPtr(op) => match op { - QPtrOp::FuncLocalVar(_) - | QPtrOp::HandleArrayIndex + QPtrOp::HandleArrayIndex | QPtrOp::BufferData | QPtrOp::BufferDynLen { .. } | QPtrOp::Offset(_) - | QPtrOp::DynOffset { .. } - | QPtrOp::Load - | QPtrOp::Store => {} + | QPtrOp::DynOffset { .. } => {} }, DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} } diff --git a/src/visit.rs b/src/visit.rs index 1829c46..91694e0 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -1,7 +1,8 @@ //! Immutable IR traversal. use crate::func_at::FuncAt; -use crate::qptr::{self, QPtrAttr, QPtrMemUsage, QPtrMemUsageKind, QPtrOp, QPtrUsage}; +use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; +use crate::qptr::{QPtrAttr, QPtrOp}; use crate::{ AddrSpace, Attr, AttrSet, AttrSetDef, Const, ConstDef, ConstKind, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, @@ -245,13 +246,15 @@ impl InnerVisit for Attr { } } + Attr::Mem(attr) => match attr { + MemAttr::Accesses(accesses) => accesses.0.inner_visit_with(visitor), + }, + Attr::QPtr(attr) => match attr { QPtrAttr::ToSpvPtrInput { input_idx: _, pointee } | QPtrAttr::FromSpvPtrOutput { addr_space: _, pointee } => { visitor.visit_type_use(pointee.0); } - - QPtrAttr::Usage(usage) => usage.0.inner_visit_with(visitor), }, } } @@ -267,46 +270,46 @@ impl InnerVisit for Vec { &DiagMsgPart::Attrs(attrs) => visitor.visit_attr_set_use(attrs), &DiagMsgPart::Type(ty) => visitor.visit_type_use(ty), &DiagMsgPart::Const(ct) => visitor.visit_const_use(ct), - DiagMsgPart::QPtrUsage(usage) => usage.inner_visit_with(visitor), + DiagMsgPart::MemAccesses(accesses) => accesses.inner_visit_with(visitor), } } } } -impl InnerVisit for QPtrUsage { +impl InnerVisit for MemAccesses { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { match self { - &QPtrUsage::Handles(qptr::shapes::Handle::Opaque(ty)) => { + &MemAccesses::Handles(crate::mem::shapes::Handle::Opaque(ty)) => { visitor.visit_type_use(ty); } - QPtrUsage::Handles(qptr::shapes::Handle::Buffer(_, data_usage)) => { - data_usage.inner_visit_with(visitor); + MemAccesses::Handles(crate::mem::shapes::Handle::Buffer(_, data_happ)) => { + data_happ.inner_visit_with(visitor); } - QPtrUsage::Memory(usage) => usage.inner_visit_with(visitor), + MemAccesses::Data(happ) => happ.inner_visit_with(visitor), } } } -impl InnerVisit for QPtrMemUsage { +impl InnerVisit for DataHapp { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { let Self { max_size: _, kind } = self; kind.inner_visit_with(visitor); } } -impl InnerVisit for QPtrMemUsageKind { +impl InnerVisit for DataHappKind { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { match self { - Self::Unused => {} - &Self::StrictlyTyped(ty) | &Self::DirectAccess(ty) => { + Self::Dead => {} + &Self::StrictlyTyped(ty) | &Self::Direct(ty) => { visitor.visit_type_use(ty); } - Self::OffsetBase(entries) => { - for sub_usage in entries.values() { - sub_usage.inner_visit_with(visitor); + Self::Disjoint(entries) => { + for sub_happ in entries.values() { + sub_happ.inner_visit_with(visitor); } } - Self::DynOffsetBase { element, stride: _ } => { + Self::Repeated { element, stride: _ } => { element.inner_visit_with(visitor); } } @@ -369,9 +372,11 @@ impl InnerVisit for GlobalVarDecl { visitor.visit_type_use(*type_of_ptr_to); if let Some(shape) = shape { match shape { - qptr::shapes::GlobalVarShape::TypedInterface(ty) => visitor.visit_type_use(*ty), - qptr::shapes::GlobalVarShape::Handles { .. } - | qptr::shapes::GlobalVarShape::UntypedData(_) => {} + crate::mem::shapes::GlobalVarShape::TypedInterface(ty) => { + visitor.visit_type_use(*ty); + } + crate::mem::shapes::GlobalVarShape::Handles { .. } + | crate::mem::shapes::GlobalVarShape::UntypedData(_) => {} } } match addr_space { @@ -534,15 +539,15 @@ impl InnerVisit for DataInstKind { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { match self { &DataInstKind::FuncCall(func) => visitor.visit_func_use(func), + DataInstKind::Mem(op) => match *op { + MemOp::FuncLocalVar(_) | MemOp::Load | MemOp::Store => {} + }, DataInstKind::QPtr(op) => match *op { - QPtrOp::FuncLocalVar(_) - | QPtrOp::HandleArrayIndex + QPtrOp::HandleArrayIndex | QPtrOp::BufferData | QPtrOp::BufferDynLen { .. } | QPtrOp::Offset(_) - | QPtrOp::DynOffset { .. } - | QPtrOp::Load - | QPtrOp::Store => {} + | QPtrOp::DynOffset { .. } => {} }, DataInstKind::SpvInst(_) | DataInstKind::SpvExtInst { .. } => {} } From 219f924941a82c79a02c91088cb51b98431d6760 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 3/5] Add `cf` ("control-flow") module and split `cfg` into `cf::unstructured` vs `cf::structurize`. --- src/{ => cf}/cfgssa.rs | 0 src/cf/mod.rs | 25 ++ src/{cfg.rs => cf/structurize.rs} | 434 +----------------------------- src/cf/unstructured.rs | 429 +++++++++++++++++++++++++++++ src/lib.rs | 20 +- src/passes/legalize.rs | 4 +- src/print/mod.rs | 23 +- src/spv/lift.rs | 74 ++--- src/spv/lower.rs | 35 ++- src/transform.rs | 23 +- src/visit.rs | 20 +- 11 files changed, 572 insertions(+), 515 deletions(-) rename src/{ => cf}/cfgssa.rs (100%) create mode 100644 src/cf/mod.rs rename src/{cfg.rs => cf/structurize.rs} (80%) create mode 100644 src/cf/unstructured.rs diff --git a/src/cfgssa.rs b/src/cf/cfgssa.rs similarity index 100% rename from src/cfgssa.rs rename to src/cf/cfgssa.rs diff --git a/src/cf/mod.rs b/src/cf/mod.rs new file mode 100644 index 0000000..f86d118 --- /dev/null +++ b/src/cf/mod.rs @@ -0,0 +1,25 @@ +//! Control-flow abstractions and passes. +// +// FIXME(eddyb) consider moving more definitions into this module. + +use crate::spv; + +// NOTE(eddyb) all the modules are declared here, but they're documented "inside" +// (i.e. using inner doc comments). +pub mod cfgssa; +pub mod structurize; +pub mod unstructured; + +#[derive(Clone)] +pub enum SelectionKind { + /// Two-case selection based on boolean condition, i.e. `if`-`else`, with + /// the two cases being "then" and "else" (in that order). + BoolCond, + + SpvInst(spv::Inst), +} + +#[derive(Clone)] +pub enum ExitInvocationKind { + SpvInst(spv::Inst), +} diff --git a/src/cfg.rs b/src/cf/structurize.rs similarity index 80% rename from src/cfg.rs rename to src/cf/structurize.rs index 2e87a55..e1d51c3 100644 --- a/src/cfg.rs +++ b/src/cf/structurize.rs @@ -1,431 +1,22 @@ -//! Control-flow graph (CFG) abstractions and utilities. +//! Control-flow structurization (unstructured CFG -> structured regions). +// +// FIXME(eddyb) consider moving docs to the module level? +use crate::cf::SelectionKind; +use crate::cf::unstructured::{ + ControlFlowGraph, ControlInst, ControlInstKind, IncomingEdgeCount, LoopFinder, TraversalState, +}; use crate::transform::{InnerInPlaceTransform as _, Transformer}; use crate::{ AttrSet, Const, ConstDef, ConstKind, Context, EntityOrientedDenseMap, FuncDefBody, FxIndexMap, - FxIndexSet, Node, NodeDef, NodeKind, NodeOutputDecl, Region, RegionDef, SelectionKind, Type, - TypeKind, Value, spv, + FxIndexSet, Node, NodeDef, NodeKind, NodeOutputDecl, Region, RegionDef, Type, TypeKind, Value, + spv, }; use itertools::{Either, Itertools}; use smallvec::SmallVec; use std::mem; use std::rc::Rc; -/// The control-flow graph (CFG) of a function, as control-flow instructions -/// ([`ControlInst`]s) attached to [`Region`]s, as an "action on exit", i.e. -/// "terminator" (while intra-region control-flow is strictly structured). -#[derive(Clone, Default)] -pub struct ControlFlowGraph { - pub control_inst_on_exit_from: EntityOrientedDenseMap, - - // HACK(eddyb) this currently only comes from `OpLoopMerge`, and cannot be - // inferred (because implies too strong of an ownership/uniqueness notion). - pub loop_merge_to_loop_header: FxIndexMap, -} - -#[derive(Clone)] -pub struct ControlInst { - pub attrs: AttrSet, - - pub kind: ControlInstKind, - - pub inputs: SmallVec<[Value; 2]>, - - // FIXME(eddyb) change the inline size of this to fit most instructions. - pub targets: SmallVec<[Region; 4]>, - - /// `target_inputs[region][input_idx]` is the [`Value`] that - /// `Value::RegionInput { region, input_idx }` will get on entry, - /// where `region` must be appear at least once in `targets` - this is a - /// separate map instead of being part of `targets` because it reflects the - /// limitations of φ ("phi") nodes, which (unlike "basic block arguments") - /// cannot tell apart multiple edges with the same source and destination. - pub target_inputs: FxIndexMap>, -} - -#[derive(Clone)] -pub enum ControlInstKind { - /// Reaching this point in the control-flow is undefined behavior, e.g.: - /// * a `SelectBranch` case that's known to be impossible - /// * after a function call, where the function never returns - /// - /// Optimizations can take advantage of this information, to assume that any - /// necessary preconditions for reaching this point, are never met. - Unreachable, - - /// Leave the current function, optionally returning a value. - Return, - - /// Leave the current invocation, similar to returning from every function - /// call in the stack (up to and including the entry-point), but potentially - /// indicating a fatal error as well. - ExitInvocation(ExitInvocationKind), - - /// Unconditional branch to a single target. - Branch, - - /// Branch to one of several targets, chosen by a single value input. - SelectBranch(SelectionKind), -} - -#[derive(Clone)] -pub enum ExitInvocationKind { - SpvInst(spv::Inst), -} - -impl ControlFlowGraph { - /// Iterate over all [`Region`]s making up `func_def_body`'s CFG, in - /// reverse post-order (RPO). - /// - /// RPO iteration over a CFG provides certain guarantees, most importantly - /// that dominators are visited before the entire subgraph they dominate. - pub fn rev_post_order( - &self, - func_def_body: &FuncDefBody, - ) -> impl DoubleEndedIterator + use<> { - let mut post_order = SmallVec::<[_; 8]>::new(); - self.traverse_whole_func( - func_def_body, - &mut TraversalState { - incoming_edge_counts: EntityOrientedDenseMap::new(), - - pre_order_visit: |_| {}, - post_order_visit: |region| post_order.push(region), - - // NOTE(eddyb) this doesn't impact semantics, but combined with - // the final reversal, it should keep targets in the original - // order in the cases when they didn't get deduplicated. - reverse_targets: true, - }, - ); - post_order.into_iter().rev() - } -} - -// HACK(eddyb) this only serves to disallow accessing `private_count` field of -// `IncomingEdgeCount`. -mod sealed { - /// Opaque newtype for the count of incoming edges (into a [`Region`](crate::Region)). - /// - /// The private field prevents direct mutation or construction, forcing the - /// use of [`IncomingEdgeCount::ONE`] and addition operations to produce some - /// specific count (which would require explicit workarounds for misuse). - #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] - pub(super) struct IncomingEdgeCount(usize); - - impl IncomingEdgeCount { - pub(super) const ONE: Self = Self(1); - } - - impl std::ops::Add for IncomingEdgeCount { - type Output = Self; - fn add(self, other: Self) -> Self { - Self(self.0 + other.0) - } - } - - impl std::ops::AddAssign for IncomingEdgeCount { - fn add_assign(&mut self, other: Self) { - *self = *self + other; - } - } -} -use sealed::IncomingEdgeCount; - -struct TraversalState { - incoming_edge_counts: EntityOrientedDenseMap, - pre_order_visit: PreVisit, - post_order_visit: PostVisit, - - // FIXME(eddyb) should this be a generic parameter for "targets iterator"? - reverse_targets: bool, -} - -impl ControlFlowGraph { - fn traverse_whole_func( - &self, - func_def_body: &FuncDefBody, - state: &mut TraversalState, - ) { - let func_at_body = func_def_body.at_body(); - - // Quick sanity check that this is the right CFG for `func_def_body`. - assert!(std::ptr::eq(func_def_body.unstructured_cfg.as_ref().unwrap(), self)); - assert!(func_at_body.def().outputs.is_empty()); - - self.traverse(func_def_body.body, state); - } - - fn traverse( - &self, - region: Region, - state: &mut TraversalState, - ) { - // FIXME(eddyb) `EntityOrientedDenseMap` should have an `entry` API. - if let Some(existing_count) = state.incoming_edge_counts.get_mut(region) { - *existing_count += IncomingEdgeCount::ONE; - return; - } - state.incoming_edge_counts.insert(region, IncomingEdgeCount::ONE); - - (state.pre_order_visit)(region); - - let control_inst = self - .control_inst_on_exit_from - .get(region) - .expect("cfg: missing `ControlInst`, despite having left structured control-flow"); - - let targets = control_inst.targets.iter().copied(); - let targets = if state.reverse_targets { - Either::Left(targets.rev()) - } else { - Either::Right(targets) - }; - for target in targets { - self.traverse(target, state); - } - - (state.post_order_visit)(region); - } -} - -/// Minimal loop analysis, based on Tarjan's SCC (strongly connected components) -/// algorithm, applied recursively (for every level of loop nesting). -/// -/// Here "minimal" means that each loops is the smallest CFG subgraph possible -/// (excluding any control-flow paths that cannot reach a backedge and cycle), -/// i.e. each loop is a CFG SCC (strongly connected component). -/// -/// These "minimal loops" contrast with the "maximal loops" that the greedy -/// architecture of the structurizer would naively produce, with the main impact -/// of the difference being where loop exits (`break`s) "merge" (or "reconverge"), -/// which SPIR-V encodes via `OpLoopMerge`, and is significant for almost anything -/// where shared memory and/or subgroup ops can allow observing when invocations -/// "wait for others in the subgroup to exit the loop" (or when they fail to wait). -/// -/// This analysis was added to because of two observations wrt "reconvergence": -/// 1. syntactic loops (from some high-level language), when truly structured -/// (i.e. only using `while`/`do`-`while` exit conditions, not `break` etc.), -/// *always* map to "minimal loops" on a CFG, as the only loop exit edge is -/// built-in, and no part of the syntactic "loop body" can be its successor -/// 2. more pragmatically, compiling shader languages to SPIR-V seems to (almost?) -/// always *either* fully preserve syntactic loops (via SPIR-V `OpLoopMerge`), -/// *or* structurize CFGs in a way that produces "minimal loops", which can -/// be misleading with explicit `break`s (moving user code from just before -/// the `break` to after the loop), but is less impactful than "maximal loops" -struct LoopFinder<'a> { - cfg: &'a ControlFlowGraph, - - // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?). - loop_header_to_exit_targets: FxIndexMap>, - - /// SCC accumulation stack, where CFG nodes collect during the depth-first - /// traversal, and are only popped when their "SCC root" (loop header) is - /// (note that multiple SCCs on the stack does *not* indicate SCC nesting, - /// but rather a path between two SCCs, i.e. a loop *following* another). - scc_stack: Vec, - /// Per-CFG-node traversal state (often just pointing to a `scc_stack` slot). - scc_state: EntityOrientedDenseMap, -} - -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -struct SccStackIdx(u32); - -#[derive(PartialEq, Eq)] -enum SccState { - /// CFG node has been reached and ended up somewhere on the `scc_stack`, - /// where it will remain until the SCC it's part of will be completed. - Pending(SccStackIdx), - - /// CFG node had been reached once, but is no longer on the `scc_stack`, its - /// parent SCC having been completed (or it wasn't in an SCC to begin with). - Complete(EventualCfgExits), -} - -/// Summary of all the ways in which a CFG node may eventually leave the CFG. -/// -// HACK(eddyb) a loop can reach a CFG subgraph that happens to always "diverge" -// (e.g. ending in `unreachable`, `ExitInvocation`, or even infinite loops, -// though those have other issues) and strictly speaking that would always be -// an edge leaving the SCC of the loop (as it can't reach a backedge), but it -// still shouldn't be treated as an exit because it doesn't reconverge to the -// rest of the function, i.e. it can't reach any `return`s, which is what this -// tracks in order to later make a more accurate decision wrt loop exits. -// -// NOTE(eddyb) only in the case where a loop *also* has non-"diverging" exits, -// do the "diverging" ones not get treated as exits, as the presence of both -// disambiguates `break`s from naturally "diverging" sections of the loop body -// (at least for CFGs built from languages without labelled `break` or `goto`, -// but even then it would be pretty convoluted to set up `break` to diverge, -// while `break some_outer_label` to reconverge to the rest of the function). -#[derive(Copy, Clone, Default, PartialEq, Eq)] -struct EventualCfgExits { - // FIXME(eddyb) do the other situations need their own flags here? - may_return_from_func: bool, -} - -impl std::ops::BitOr for EventualCfgExits { - type Output = Self; - fn bitor(self, other: Self) -> Self { - Self { may_return_from_func: self.may_return_from_func | other.may_return_from_func } - } -} -impl std::ops::BitOrAssign for EventualCfgExits { - fn bitor_assign(&mut self, other: Self) { - *self = *self | other; - } -} - -impl<'a> LoopFinder<'a> { - fn new(cfg: &'a ControlFlowGraph) -> Self { - Self { - cfg, - loop_header_to_exit_targets: FxIndexMap::default(), - scc_stack: vec![], - scc_state: EntityOrientedDenseMap::new(), - } - } - - /// Tarjan's SCC algorithm works by computing the "earliest" reachable node, - /// from every node (often using the name `lowlink`), which will be equal - /// to the origin node itself iff that node is an "SCC root" (loop header), - /// and always point to an "earlier" node if a cycle (via loop backedge) was - /// found from somewhere else in the SCC (i.e. from inside the loop body). - /// - /// Here we track stack indices (as the stack order is the traversal order), - /// and distinguish the acyclic case to avoid treating most nodes as self-loops. - // - // FIXME(eddyb) name of the function is a bit clunky wrt its return type. - fn find_earliest_scc_root_of( - &mut self, - node: Region, - ) -> (Option, EventualCfgExits) { - let state_entry = self.scc_state.entry(node); - if let Some(state) = &state_entry { - return match *state { - SccState::Pending(scc_stack_idx) => { - // HACK(eddyb) this means that `EventualCfgExits`s will be - // inconsistently observed across the `Pending` nodes of a - // loop body, but that is sound as it cannot feed into any - // `Complete` state until the loop header itself is complete, - // and the monotonic nature of `EventualCfgExits` means that - // the loop header will still get to see the complete picture. - (Some(scc_stack_idx), EventualCfgExits::default()) - } - SccState::Complete(eventual_cfg_exits) => (None, eventual_cfg_exits), - }; - } - let scc_stack_idx = SccStackIdx(self.scc_stack.len().try_into().unwrap()); - self.scc_stack.push(node); - *state_entry = Some(SccState::Pending(scc_stack_idx)); - - let control_inst = self - .cfg - .control_inst_on_exit_from - .get(node) - .expect("cfg: missing `ControlInst`, despite having left structured control-flow"); - - let mut eventual_cfg_exits = EventualCfgExits::default(); - - if let ControlInstKind::Return = control_inst.kind { - eventual_cfg_exits.may_return_from_func = true; - } - - let earliest_scc_root = control_inst - .targets - .iter() - .flat_map(|&target| { - let (earliest_scc_root_of_target, eventual_cfg_exits_of_target) = - self.find_earliest_scc_root_of(target); - eventual_cfg_exits |= eventual_cfg_exits_of_target; - - // HACK(eddyb) if one of the edges is already known to be a loop exit - // (from `OpLoopMerge` specifically), treat it almost like a backedge, - // but with the additional requirement that the loop header is already - // on the stack (i.e. this `node` is reachable from that loop header). - let root_candidate_from_loop_merge = - self.cfg.loop_merge_to_loop_header.get(&target).and_then(|&loop_header| { - match self.scc_state.get(loop_header) { - Some(&SccState::Pending(scc_stack_idx)) => Some(scc_stack_idx), - _ => None, - } - }); - - earliest_scc_root_of_target.into_iter().chain(root_candidate_from_loop_merge) - }) - .min(); - - // If this node has been chosen as the root of an SCC, complete that SCC. - if earliest_scc_root == Some(scc_stack_idx) { - let scc_start = scc_stack_idx.0 as usize; - - // It's now possible to find all the loop exits: they're all the - // edges from nodes of this SCC (loop) to nodes not in the SCC. - let target_is_exit = |target| { - match self.scc_state[target] { - SccState::Pending(i) => { - assert!(i >= scc_stack_idx); - false - } - SccState::Complete(eventual_cfg_exits_of_target) => { - let EventualCfgExits { may_return_from_func: loop_may_reconverge } = - eventual_cfg_exits; - let EventualCfgExits { may_return_from_func: target_may_reconverge } = - eventual_cfg_exits_of_target; - - // HACK(eddyb) see comment on `EventualCfgExits` for why - // edges leaving the SCC aren't treated as loop exits - // when they're "more divergent" than the loop itself, - // i.e. if any edges leaving the SCC can reconverge, - // (and therefore the loop as a whole can reconverge) - // only those edges are kept as loop exits. - target_may_reconverge == loop_may_reconverge - } - } - }; - self.loop_header_to_exit_targets.insert( - node, - self.scc_stack[scc_start..] - .iter() - .flat_map(|&scc_node| { - self.cfg.control_inst_on_exit_from[scc_node].targets.iter().copied() - }) - .filter(|&target| target_is_exit(target)) - .collect(), - ); - - // Find nested loops by marking *only* the loop header as complete, - // clearing loop body nodes' state, and recursing on them: all the - // nodes outside the loop (otherwise reachable from within), and the - // loop header itself, are already marked as complete, meaning that - // all exits and backedges will be ignored, and the recursion will - // only find more SCCs within the loop body (i.e. nested loops). - self.scc_state[node] = SccState::Complete(eventual_cfg_exits); - let loop_body_range = scc_start + 1..self.scc_stack.len(); - for &scc_node in &self.scc_stack[loop_body_range.clone()] { - self.scc_state.remove(scc_node); - } - for i in loop_body_range.clone() { - self.find_earliest_scc_root_of(self.scc_stack[i]); - } - assert_eq!(self.scc_stack.len(), loop_body_range.end); - - // Remove the entire SCC from the accumulation stack all at once. - self.scc_stack.truncate(scc_start); - - return (None, eventual_cfg_exits); - } - - // Not actually in an SCC at all, just some node outside any CFG cycles. - if earliest_scc_root.is_none() { - assert!(self.scc_stack.pop() == Some(node)); - self.scc_state[node] = SccState::Complete(eventual_cfg_exits); - } - - (earliest_scc_root, eventual_cfg_exits) - } -} - #[allow(rustdoc::private_intra_doc_links)] /// Control-flow "structurizer", which attempts to convert as much of the CFG /// as possible into structural control-flow (regions). @@ -1026,11 +617,8 @@ impl<'a> Structurizer<'a> { .unstructured_cfg .as_ref() .map(|cfg| { - let loop_header_to_exit_targets = { - let mut loop_finder = LoopFinder::new(cfg); - loop_finder.find_earliest_scc_root_of(func_def_body.body); - loop_finder.loop_header_to_exit_targets - }; + let loop_header_to_exit_targets = + LoopFinder::new(cfg).find_all_loops_starting_at(func_def_body.body); let mut state = TraversalState { incoming_edge_counts: EntityOrientedDenseMap::new(), diff --git a/src/cf/unstructured.rs b/src/cf/unstructured.rs new file mode 100644 index 0000000..de67e27 --- /dev/null +++ b/src/cf/unstructured.rs @@ -0,0 +1,429 @@ +//! Unstructured control-flow graph (CFG) abstractions and utilities. + +use crate::{ + AttrSet, EntityOrientedDenseMap, FuncDefBody, FxIndexMap, FxIndexSet, Region, Value, cf, +}; +use itertools::Either; +use smallvec::SmallVec; + +/// The control-flow graph (CFG) of a function, as control-flow instructions +/// ([`ControlInst`]s) attached to [`Region`]s, as an "action on exit", i.e. +/// "terminator" (while intra-region control-flow is strictly structured). +#[derive(Clone, Default)] +pub struct ControlFlowGraph { + pub control_inst_on_exit_from: EntityOrientedDenseMap, + + // HACK(eddyb) this currently only comes from `OpLoopMerge`, and cannot be + // inferred (because implies too strong of an ownership/uniqueness notion). + pub loop_merge_to_loop_header: FxIndexMap, +} + +#[derive(Clone)] +pub struct ControlInst { + pub attrs: AttrSet, + + pub kind: ControlInstKind, + + pub inputs: SmallVec<[Value; 2]>, + + // FIXME(eddyb) change the inline size of this to fit most instructions. + pub targets: SmallVec<[Region; 4]>, + + /// `target_inputs[region][input_idx]` is the [`Value`] that + /// `Value::RegionInput { region, input_idx }` will get on entry, + /// where `region` must be appear at least once in `targets` - this is a + /// separate map instead of being part of `targets` because it reflects the + /// limitations of φ ("phi") nodes, which (unlike "basic block arguments") + /// cannot tell apart multiple edges with the same source and destination. + pub target_inputs: FxIndexMap>, +} + +#[derive(Clone)] +pub enum ControlInstKind { + /// Reaching this point in the control-flow is undefined behavior, e.g.: + /// * a `SelectBranch` case that's known to be impossible + /// * after a function call, where the function never returns + /// + /// Optimizations can take advantage of this information, to assume that any + /// necessary preconditions for reaching this point, are never met. + Unreachable, + + /// Leave the current function, optionally returning a value. + Return, + + /// Leave the current invocation, similar to returning from every function + /// call in the stack (up to and including the entry-point), but potentially + /// indicating a fatal error as well. + ExitInvocation(cf::ExitInvocationKind), + + /// Unconditional branch to a single target. + Branch, + + /// Branch to one of several targets, chosen by a single value input. + SelectBranch(cf::SelectionKind), +} + +impl ControlFlowGraph { + /// Iterate over all [`Region`]s making up `func_def_body`'s CFG, in + /// reverse post-order (RPO). + /// + /// RPO iteration over a CFG provides certain guarantees, most importantly + /// that dominators are visited before the entire subgraph they dominate. + pub fn rev_post_order( + &self, + func_def_body: &FuncDefBody, + ) -> impl DoubleEndedIterator + use<> { + let mut post_order = SmallVec::<[_; 8]>::new(); + self.traverse_whole_func( + func_def_body, + &mut TraversalState { + incoming_edge_counts: EntityOrientedDenseMap::new(), + + pre_order_visit: |_| {}, + post_order_visit: |region| post_order.push(region), + + // NOTE(eddyb) this doesn't impact semantics, but combined with + // the final reversal, it should keep targets in the original + // order in the cases when they didn't get deduplicated. + reverse_targets: true, + }, + ); + post_order.into_iter().rev() + } +} + +// HACK(eddyb) this only serves to disallow accessing `IncomingEdgeCount`'s private field. +mod sealed { + /// Opaque newtype for the count of incoming edges (into a [`Region`](crate::Region)). + /// + /// The private field prevents direct mutation or construction, forcing the + /// use of [`IncomingEdgeCount::ONE`] and addition operations to produce some + /// specific count (which would require explicit workarounds for misuse). + #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)] + pub struct IncomingEdgeCount(usize); + + impl IncomingEdgeCount { + pub const ONE: Self = Self(1); + } + + impl std::ops::Add for IncomingEdgeCount { + type Output = Self; + fn add(self, other: Self) -> Self { + Self(self.0 + other.0) + } + } + + impl std::ops::AddAssign for IncomingEdgeCount { + fn add_assign(&mut self, other: Self) { + *self = *self + other; + } + } +} +pub use sealed::IncomingEdgeCount; + +pub struct TraversalState { + pub incoming_edge_counts: EntityOrientedDenseMap, + pub pre_order_visit: PreVisit, + pub post_order_visit: PostVisit, + + // FIXME(eddyb) should this be a generic parameter for "targets iterator"? + pub reverse_targets: bool, +} + +impl ControlFlowGraph { + pub fn traverse_whole_func( + &self, + func_def_body: &FuncDefBody, + state: &mut TraversalState, + ) { + let func_at_body = func_def_body.at_body(); + + // Quick sanity check that this is the right CFG for `func_def_body`. + assert!(std::ptr::eq(func_def_body.unstructured_cfg.as_ref().unwrap(), self)); + assert!(func_at_body.def().outputs.is_empty()); + + self.traverse(func_def_body.body, state); + } + + fn traverse( + &self, + region: Region, + state: &mut TraversalState, + ) { + // FIXME(eddyb) `EntityOrientedDenseMap` should have an `entry` API. + if let Some(existing_count) = state.incoming_edge_counts.get_mut(region) { + *existing_count += IncomingEdgeCount::ONE; + return; + } + state.incoming_edge_counts.insert(region, IncomingEdgeCount::ONE); + + (state.pre_order_visit)(region); + + let control_inst = self + .control_inst_on_exit_from + .get(region) + .expect("cfg: missing `ControlInst`, despite having left structured control-flow"); + + let targets = control_inst.targets.iter().copied(); + let targets = if state.reverse_targets { + Either::Left(targets.rev()) + } else { + Either::Right(targets) + }; + for target in targets { + self.traverse(target, state); + } + + (state.post_order_visit)(region); + } +} + +/// Minimal loop analysis, based on Tarjan's SCC (strongly connected components) +/// algorithm, applied recursively (for every level of loop nesting). +/// +/// Here "minimal" means that each loops is the smallest CFG subgraph possible +/// (excluding any control-flow paths that cannot reach a backedge and cycle), +/// i.e. each loop is a CFG SCC (strongly connected component). +/// +/// These "minimal loops" contrast with the "maximal loops" that the greedy +/// architecture of the structurizer would naively produce, with the main impact +/// of the difference being where loop exits (`break`s) "merge" (or "reconverge"), +/// which SPIR-V encodes via `OpLoopMerge`, and is significant for almost anything +/// where shared memory and/or subgroup ops can allow observing when invocations +/// "wait for others in the subgroup to exit the loop" (or when they fail to wait). +/// +/// This analysis was added to because of two observations wrt "reconvergence": +/// 1. syntactic loops (from some high-level language), when truly structured +/// (i.e. only using `while`/`do`-`while` exit conditions, not `break` etc.), +/// *always* map to "minimal loops" on a CFG, as the only loop exit edge is +/// built-in, and no part of the syntactic "loop body" can be its successor +/// 2. more pragmatically, compiling shader languages to SPIR-V seems to (almost?) +/// always *either* fully preserve syntactic loops (via SPIR-V `OpLoopMerge`), +/// *or* structurize CFGs in a way that produces "minimal loops", which can +/// be misleading with explicit `break`s (moving user code from just before +/// the `break` to after the loop), but is less impactful than "maximal loops" +pub struct LoopFinder<'a> { + cfg: &'a ControlFlowGraph, + + // FIXME(eddyb) this feels a bit inefficient (are many-exit loops rare?). + // FIXME(eddyb) rename this and/or wrap the value type in a newtype. + loop_header_to_exit_targets: FxIndexMap>, + + /// SCC accumulation stack, where CFG nodes collect during the depth-first + /// traversal, and are only popped when their "SCC root" (loop header) is + /// (note that multiple SCCs on the stack does *not* indicate SCC nesting, + /// but rather a path between two SCCs, i.e. a loop *following* another). + scc_stack: Vec, + /// Per-CFG-node traversal state (often just pointing to a `scc_stack` slot). + scc_state: EntityOrientedDenseMap, +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct SccStackIdx(u32); + +#[derive(PartialEq, Eq)] +enum SccState { + /// CFG node has been reached and ended up somewhere on the `scc_stack`, + /// where it will remain until the SCC it's part of will be completed. + Pending(SccStackIdx), + + /// CFG node had been reached once, but is no longer on the `scc_stack`, its + /// parent SCC having been completed (or it wasn't in an SCC to begin with). + Complete(EventualCfgExits), +} + +/// Summary of all the ways in which a CFG node may eventually leave the CFG. +/// +// HACK(eddyb) a loop can reach a CFG subgraph that happens to always "diverge" +// (e.g. ending in `unreachable`, `ExitInvocation`, or even infinite loops, +// though those have other issues) and strictly speaking that would always be +// an edge leaving the SCC of the loop (as it can't reach a backedge), but it +// still shouldn't be treated as an exit because it doesn't reconverge to the +// rest of the function, i.e. it can't reach any `return`s, which is what this +// tracks in order to later make a more accurate decision wrt loop exits. +// +// NOTE(eddyb) only in the case where a loop *also* has non-"diverging" exits, +// do the "diverging" ones not get treated as exits, as the presence of both +// disambiguates `break`s from naturally "diverging" sections of the loop body +// (at least for CFGs built from languages without labelled `break` or `goto`, +// but even then it would be pretty convoluted to set up `break` to diverge, +// while `break some_outer_label` to reconverge to the rest of the function). +#[derive(Copy, Clone, Default, PartialEq, Eq)] +struct EventualCfgExits { + // FIXME(eddyb) do the other situations need their own flags here? + may_return_from_func: bool, +} + +impl std::ops::BitOr for EventualCfgExits { + type Output = Self; + fn bitor(self, other: Self) -> Self { + Self { may_return_from_func: self.may_return_from_func | other.may_return_from_func } + } +} +impl std::ops::BitOrAssign for EventualCfgExits { + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } +} + +impl<'a> LoopFinder<'a> { + pub fn new(cfg: &'a ControlFlowGraph) -> Self { + Self { + cfg, + loop_header_to_exit_targets: FxIndexMap::default(), + scc_stack: vec![], + scc_state: EntityOrientedDenseMap::new(), + } + } + + /// Returns a map from every loop header to its set of exit targets, in the + /// control-flow (sub)graph starting at `entry`. + // + // FIXME(eddyb) reconsider this entire API (it used to be all private). + pub fn find_all_loops_starting_at( + mut self, + entry: Region, + ) -> FxIndexMap> { + self.find_earliest_scc_root_of(entry); + self.loop_header_to_exit_targets + } + + /// Tarjan's SCC algorithm works by computing the "earliest" reachable node, + /// from every node (often using the name `lowlink`), which will be equal + /// to the origin node itself iff that node is an "SCC root" (loop header), + /// and always point to an "earlier" node if a cycle (via loop backedge) was + /// found from somewhere else in the SCC (i.e. from inside the loop body). + /// + /// Here we track stack indices (as the stack order is the traversal order), + /// and distinguish the acyclic case to avoid treating most nodes as self-loops. + // + // FIXME(eddyb) name of the function is a bit clunky wrt its return type. + fn find_earliest_scc_root_of( + &mut self, + node: Region, + ) -> (Option, EventualCfgExits) { + let state_entry = self.scc_state.entry(node); + if let Some(state) = &state_entry { + return match *state { + SccState::Pending(scc_stack_idx) => { + // HACK(eddyb) this means that `EventualCfgExits`s will be + // inconsistently observed across the `Pending` nodes of a + // loop body, but that is sound as it cannot feed into any + // `Complete` state until the loop header itself is complete, + // and the monotonic nature of `EventualCfgExits` means that + // the loop header will still get to see the complete picture. + (Some(scc_stack_idx), EventualCfgExits::default()) + } + SccState::Complete(eventual_cfg_exits) => (None, eventual_cfg_exits), + }; + } + let scc_stack_idx = SccStackIdx(self.scc_stack.len().try_into().unwrap()); + self.scc_stack.push(node); + *state_entry = Some(SccState::Pending(scc_stack_idx)); + + let control_inst = self + .cfg + .control_inst_on_exit_from + .get(node) + .expect("cfg: missing `ControlInst`, despite having left structured control-flow"); + + let mut eventual_cfg_exits = EventualCfgExits::default(); + + if let ControlInstKind::Return = control_inst.kind { + eventual_cfg_exits.may_return_from_func = true; + } + + let earliest_scc_root = control_inst + .targets + .iter() + .flat_map(|&target| { + let (earliest_scc_root_of_target, eventual_cfg_exits_of_target) = + self.find_earliest_scc_root_of(target); + eventual_cfg_exits |= eventual_cfg_exits_of_target; + + // HACK(eddyb) if one of the edges is already known to be a loop exit + // (from `OpLoopMerge` specifically), treat it almost like a backedge, + // but with the additional requirement that the loop header is already + // on the stack (i.e. this `node` is reachable from that loop header). + let root_candidate_from_loop_merge = + self.cfg.loop_merge_to_loop_header.get(&target).and_then(|&loop_header| { + match self.scc_state.get(loop_header) { + Some(&SccState::Pending(scc_stack_idx)) => Some(scc_stack_idx), + _ => None, + } + }); + + earliest_scc_root_of_target.into_iter().chain(root_candidate_from_loop_merge) + }) + .min(); + + // If this node has been chosen as the root of an SCC, complete that SCC. + if earliest_scc_root == Some(scc_stack_idx) { + let scc_start = scc_stack_idx.0 as usize; + + // It's now possible to find all the loop exits: they're all the + // edges from nodes of this SCC (loop) to nodes not in the SCC. + let target_is_exit = |target| { + match self.scc_state[target] { + SccState::Pending(i) => { + assert!(i >= scc_stack_idx); + false + } + SccState::Complete(eventual_cfg_exits_of_target) => { + let EventualCfgExits { may_return_from_func: loop_may_reconverge } = + eventual_cfg_exits; + let EventualCfgExits { may_return_from_func: target_may_reconverge } = + eventual_cfg_exits_of_target; + + // HACK(eddyb) see comment on `EventualCfgExits` for why + // edges leaving the SCC aren't treated as loop exits + // when they're "more divergent" than the loop itself, + // i.e. if any edges leaving the SCC can reconverge, + // (and therefore the loop as a whole can reconverge) + // only those edges are kept as loop exits. + target_may_reconverge == loop_may_reconverge + } + } + }; + self.loop_header_to_exit_targets.insert( + node, + self.scc_stack[scc_start..] + .iter() + .flat_map(|&scc_node| { + self.cfg.control_inst_on_exit_from[scc_node].targets.iter().copied() + }) + .filter(|&target| target_is_exit(target)) + .collect(), + ); + + // Find nested loops by marking *only* the loop header as complete, + // clearing loop body nodes' state, and recursing on them: all the + // nodes outside the loop (otherwise reachable from within), and the + // loop header itself, are already marked as complete, meaning that + // all exits and backedges will be ignored, and the recursion will + // only find more SCCs within the loop body (i.e. nested loops). + self.scc_state[node] = SccState::Complete(eventual_cfg_exits); + let loop_body_range = scc_start + 1..self.scc_stack.len(); + for &scc_node in &self.scc_stack[loop_body_range.clone()] { + self.scc_state.remove(scc_node); + } + for i in loop_body_range.clone() { + self.find_earliest_scc_root_of(self.scc_stack[i]); + } + assert_eq!(self.scc_stack.len(), loop_body_range.end); + + // Remove the entire SCC from the accumulation stack all at once. + self.scc_stack.truncate(scc_start); + + return (None, eventual_cfg_exits); + } + + // Not actually in an SCC at all, just some node outside any CFG cycles. + if earliest_scc_root.is_none() { + assert!(self.scc_stack.pop() == Some(node)); + self.scc_state[node] = SccState::Complete(eventual_cfg_exits); + } + + (earliest_scc_root, eventual_cfg_exits) + } +} diff --git a/src/lib.rs b/src/lib.rs index 2dc726d..029e188 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,8 +153,7 @@ // NOTE(eddyb) all the modules are declared here, but they're documented "inside" // (i.e. using inner doc comments). -pub mod cfg; -pub mod cfgssa; +pub mod cf; mod context; pub mod func_at; pub mod print; @@ -717,7 +716,9 @@ pub struct FuncDefBody { /// When present, it starts at `body` (more specifically, its exit), /// effectively replacing the structured return `body` otherwise implies, /// with `body` (or rather, its `children`) always being fully structured. - pub unstructured_cfg: Option, + // + // FIXME(eddyb) replace this with a new `NodeKind` variant. + pub unstructured_cfg: Option, } /// Entity handle for a [`RegionDef`](crate::RegionDef) @@ -881,7 +882,7 @@ pub enum NodeKind { /// /// This corresponds to "gamma" (`γ`) nodes in (R)VSDG, though those are /// sometimes limited only to a two-way selection on a boolean condition. - Select { kind: SelectionKind, scrutinee: Value, cases: SmallVec<[Region; 2]> }, + Select { kind: cf::SelectionKind, scrutinee: Value, cases: SmallVec<[Region; 2]> }, /// Execute `body` repeatedly, until `repeat_condition` evaluates to `false`. /// @@ -910,7 +911,7 @@ pub enum NodeKind { // // FIXME(eddyb) make this less shader-controlflow-centric. ExitInvocation { - kind: cfg::ExitInvocationKind, + kind: cf::ExitInvocationKind, // FIXME(eddyb) centralize `Value` inputs across `Node`s, // and only use stricter types for building/traversing the IR. @@ -918,15 +919,6 @@ pub enum NodeKind { }, } -#[derive(Clone)] -pub enum SelectionKind { - /// Two-case selection based on boolean condition, i.e. `if`-`else`, with - /// the two cases being "then" and "else" (in that order). - BoolCond, - - SpvInst(spv::Inst), -} - /// Entity handle for a [`DataInstDef`](crate::DataInstDef) (a leaf instruction). pub use context::DataInst; diff --git a/src/passes/legalize.rs b/src/passes/legalize.rs index 79d9bcf..3f4cf5b 100644 --- a/src/passes/legalize.rs +++ b/src/passes/legalize.rs @@ -1,5 +1,5 @@ use crate::visit::{InnerVisit, Visitor}; -use crate::{AttrSet, Const, Context, DeclDef, Func, FxIndexSet, GlobalVar, Module, Type, cfg}; +use crate::{AttrSet, Const, Context, DeclDef, Func, FxIndexSet, GlobalVar, Module, Type, cf}; /// Apply the [`cfg::Structurizer`] algorithm to all function definitions in `module`. pub fn structurize_func_cfgs(module: &mut Module) { @@ -22,7 +22,7 @@ pub fn structurize_func_cfgs(module: &mut Module) { for &func in &collector.seen_funcs { if let DeclDef::Present(func_def_body) = &mut module.funcs[func].def { - cfg::Structurizer::new(cx, func_def_body).structurize_func(); + cf::structurize::Structurizer::new(cx, func_def_body).structurize_func(); } } } diff --git a/src/print/mod.rs b/src/print/mod.rs index bafc5aa..e20ed76 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -19,6 +19,7 @@ #![allow(unstable_name_collisions)] use itertools::Itertools as _; +use crate::cf::{self, SelectionKind}; use crate::func_at::FuncAt; use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; use crate::print::multiversion::Versions; @@ -30,8 +31,7 @@ use crate::{ EntityOrientedDenseMap, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, InternedStr, Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, NodeOutputDecl, OrdAssertEq, - Region, RegionDef, RegionInputDecl, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, - cfg, spv, + Region, RegionDef, RegionInputDecl, Type, TypeDef, TypeKind, TypeOrConst, Value, spv, }; use arrayvec::ArrayVec; use itertools::Either; @@ -3858,7 +3858,7 @@ impl Print for FuncAt<'_, Node> { ]) } NodeKind::ExitInvocation { - kind: cfg::ExitInvocationKind::SpvInst(spv::Inst { opcode, imms }), + kind: cf::ExitInvocationKind::SpvInst(spv::Inst { opcode, imms }), inputs, } => printer.pretty_spv_inst( kw_style, @@ -4274,7 +4274,7 @@ impl Print for FuncAt<'_, DataInst> { } } -impl Print for cfg::ControlInst { +impl Print for cf::unstructured::ControlInst { type Output = pretty::Fragment; fn print(&self, printer: &Printer<'_>) -> pretty::Fragment { let Self { attrs, kind, inputs, targets, target_inputs } = self; @@ -4300,12 +4300,12 @@ impl Print for cfg::ControlInst { }); let def = match kind { - cfg::ControlInstKind::Unreachable => { + cf::unstructured::ControlInstKind::Unreachable => { // FIXME(eddyb) use `targets.is_empty()` when that is stabilized. assert!(targets.len() == 0 && inputs.is_empty()); kw("unreachable") } - cfg::ControlInstKind::Return => { + cf::unstructured::ControlInstKind::Return => { // FIXME(eddyb) use `targets.is_empty()` when that is stabilized. assert!(targets.len() == 0); match inputs[..] { @@ -4314,10 +4314,9 @@ impl Print for cfg::ControlInst { _ => unreachable!(), } } - cfg::ControlInstKind::ExitInvocation(cfg::ExitInvocationKind::SpvInst(spv::Inst { - opcode, - imms, - })) => { + cf::unstructured::ControlInstKind::ExitInvocation(cf::ExitInvocationKind::SpvInst( + spv::Inst { opcode, imms }, + )) => { // FIXME(eddyb) use `targets.is_empty()` when that is stabilized. assert!(targets.len() == 0); printer.pretty_spv_inst( @@ -4328,12 +4327,12 @@ impl Print for cfg::ControlInst { ) } - cfg::ControlInstKind::Branch => { + cf::unstructured::ControlInstKind::Branch => { assert_eq!((targets.len(), inputs.len()), (1, 0)); targets.next().unwrap() } - cfg::ControlInstKind::SelectBranch(kind) => { + cf::unstructured::ControlInstKind::SelectBranch(kind) => { assert_eq!(inputs.len(), 1); kind.print_with_scrutinee_and_cases(printer, kw_style, inputs[0], targets) } diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 1e7b238..7628aa2 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -1,5 +1,6 @@ //! SPIR-T to SPIR-V lifting. +use crate::cf::{self, SelectionKind}; use crate::func_at::FuncAt; use crate::spv::{self, spec}; use crate::visit::{InnerVisit, Visitor}; @@ -7,8 +8,8 @@ use crate::{ AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, DataInst, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, EntityList, ExportKey, Exportee, Func, FuncDecl, FuncParam, FxIndexMap, FxIndexSet, GlobalVar, GlobalVarDefBody, Import, Module, ModuleDebugInfo, - ModuleDialect, Node, NodeKind, NodeOutputDecl, OrdAssertEq, Region, RegionInputDecl, - SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, + ModuleDialect, Node, NodeKind, NodeOutputDecl, OrdAssertEq, Region, RegionInputDecl, Type, + TypeDef, TypeKind, TypeOrConst, Value, }; use rustc_hash::FxHashMap; use smallvec::SmallVec; @@ -262,7 +263,7 @@ struct FuncLifting<'a> { /// What determines the values for [`Value::RegionInput`]s, for a specific /// region (effectively the subset of "region parents" that support inputs). /// -/// Note that this is not used when a [`cfg::ControlInst`] has `target_inputs`, +/// Note that this is not used when a [`cf::unstructured::ControlInst`] has `target_inputs`, /// and the target [`Region`] itself has phis for its `inputs`. enum RegionInputsSource { FuncParams, @@ -299,7 +300,7 @@ struct Phi { default_value: Option, } -/// Similar to [`cfg::ControlInst`], except: +/// Similar to [`cf::unstructured::ControlInst`], except: /// * `targets` use [`CfgPoint`]s instead of [`Region`]s, to be able to /// reach any of the SPIR-V blocks being created during lifting /// * φ ("phi") values can be provided for targets regardless of "which side" of @@ -310,7 +311,7 @@ struct Phi { struct Terminator<'a> { attrs: AttrSet, - kind: Cow<'a, cfg::ControlInstKind>, + kind: Cow<'a, cf::unstructured::ControlInstKind>, // FIXME(eddyb) use `Cow` or something, but ideally the "owned" case always // has at most one input, so allocating a whole `Vec` for that seems unwise. @@ -640,8 +641,13 @@ impl<'a> FuncLifting<'a> { .as_ref() .and_then(|cfg| cfg.control_inst_on_exit_from.get(region)); if let Some(terminator) = unstructured_terminator { - let cfg::ControlInst { attrs, kind, inputs, targets, target_inputs } = - terminator; + let cf::unstructured::ControlInst { + attrs, + kind, + inputs, + targets, + target_inputs, + } = terminator; Terminator { attrs: *attrs, kind: Cow::Borrowed(kind), @@ -664,7 +670,7 @@ impl<'a> FuncLifting<'a> { assert!(region == func_def_body.body); Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::Return), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Return), inputs: func_def_body.at_body().def().outputs.clone(), targets: [].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -683,7 +689,9 @@ impl<'a> FuncLifting<'a> { NodeKind::Select { kind, scrutinee, cases } => Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::SelectBranch(kind.clone())), + kind: Cow::Owned(cf::unstructured::ControlInstKind::SelectBranch( + kind.clone(), + )), inputs: [*scrutinee].into_iter().collect(), targets: cases .iter() @@ -696,7 +704,7 @@ impl<'a> FuncLifting<'a> { NodeKind::Loop { initial_inputs: _, body, repeat_condition: _ } => { Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::Branch), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Branch), inputs: [].into_iter().collect(), targets: [CfgPoint::RegionEntry(*body)].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -716,7 +724,9 @@ impl<'a> FuncLifting<'a> { NodeKind::ExitInvocation { kind, inputs } => Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::ExitInvocation(kind.clone())), + kind: Cow::Owned(cf::unstructured::ControlInstKind::ExitInvocation( + kind.clone(), + )), inputs: inputs.clone(), targets: [].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -743,7 +753,7 @@ impl<'a> FuncLifting<'a> { NodeKind::Select { .. } => Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::Branch), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Branch), inputs: [].into_iter().collect(), targets: [parent_exit].into_iter().collect(), target_phi_values: region_outputs @@ -775,7 +785,7 @@ impl<'a> FuncLifting<'a> { if is_infinite_loop { Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::Branch), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Branch), inputs: [].into_iter().collect(), targets: [backedge].into_iter().collect(), target_phi_values, @@ -784,9 +794,11 @@ impl<'a> FuncLifting<'a> { } else { Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::SelectBranch( - SelectionKind::BoolCond, - )), + kind: Cow::Owned( + cf::unstructured::ControlInstKind::SelectBranch( + SelectionKind::BoolCond, + ), + ), inputs: [repeat_condition].into_iter().collect(), targets: [backedge, parent_exit].into_iter().collect(), target_phi_values, @@ -801,7 +813,7 @@ impl<'a> FuncLifting<'a> { // implied edge from a `Block`'s `Entry` to its `Exit`). (_, Some(succ_cursor)) => Terminator { attrs: AttrSet::default(), - kind: Cow::Owned(cfg::ControlInstKind::Branch), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Branch), inputs: [].into_iter().collect(), targets: [succ_cursor.point].into_iter().collect(), target_phi_values: FxIndexMap::default(), @@ -874,8 +886,10 @@ impl<'a> FuncLifting<'a> { // SPIR-V allows their targets to just be the whole merge block // (the same one that `OpSelectionMerge` describes). let block = &blocks[block_idx]; - if let (cfg::ControlInstKind::SelectBranch(_), Some(Merge::Selection(merge_point))) = - (&*block.terminator.kind, block.terminator.merge) + if let ( + cf::unstructured::ControlInstKind::SelectBranch(_), + Some(Merge::Selection(merge_point)), + ) = (&*block.terminator.kind, block.terminator.merge) { for target_idx in 0..block.terminator.targets.len() { let block = &blocks[block_idx]; @@ -902,7 +916,7 @@ impl<'a> FuncLifting<'a> { (phis.is_empty() && insts.iter().all(|insts| insts.is_empty()) && *attrs == AttrSet::default() - && matches!(**kind, cfg::ControlInstKind::Branch) + && matches!(**kind, cf::unstructured::ControlInstKind::Branch) && inputs.is_empty() && targets.len() == 1 && target_phi_values.is_empty() @@ -927,7 +941,7 @@ impl<'a> FuncLifting<'a> { &block.terminator; (*attrs == AttrSet::default() - && matches!(**kind, cfg::ControlInstKind::Branch) + && matches!(**kind, cf::unstructured::ControlInstKind::Branch) && inputs.is_empty() && targets.len() == 1 && target_phi_values.is_empty() @@ -953,7 +967,7 @@ impl<'a> FuncLifting<'a> { new_terminator, Terminator { attrs: Default::default(), - kind: Cow::Owned(cfg::ControlInstKind::Unreachable), + kind: Cow::Owned(cf::unstructured::ControlInstKind::Unreachable), inputs: Default::default(), targets: Default::default(), target_phi_values: Default::default(), @@ -1332,26 +1346,26 @@ impl LazyInst<'_, '_> { }, Self::Terminator { parent_func, terminator } => { let inst = match &*terminator.kind { - cfg::ControlInstKind::Unreachable => wk.OpUnreachable.into(), - cfg::ControlInstKind::Return => { + cf::unstructured::ControlInstKind::Unreachable => wk.OpUnreachable.into(), + cf::unstructured::ControlInstKind::Return => { if terminator.inputs.is_empty() { wk.OpReturn.into() } else { wk.OpReturnValue.into() } } - cfg::ControlInstKind::ExitInvocation(cfg::ExitInvocationKind::SpvInst( + cf::unstructured::ControlInstKind::ExitInvocation( + cf::ExitInvocationKind::SpvInst(inst), + ) + | cf::unstructured::ControlInstKind::SelectBranch(SelectionKind::SpvInst( inst, )) => inst.clone(), - cfg::ControlInstKind::Branch => wk.OpBranch.into(), + cf::unstructured::ControlInstKind::Branch => wk.OpBranch.into(), - cfg::ControlInstKind::SelectBranch(SelectionKind::BoolCond) => { + cf::unstructured::ControlInstKind::SelectBranch(SelectionKind::BoolCond) => { wk.OpBranchConditional.into() } - cfg::ControlInstKind::SelectBranch(SelectionKind::SpvInst(inst)) => { - inst.clone() - } }; spv::InstWithIds { without_ids: inst, diff --git a/src/spv/lower.rs b/src/spv/lower.rs index a2902c4..3f46297 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -1,13 +1,14 @@ //! SPIR-V to SPIR-T lowering. +use crate::cf::{self, SelectionKind}; use crate::spv::{self, spec}; // FIXME(eddyb) import more to avoid `crate::` everywhere. use crate::{ AddrSpace, Attr, AttrSet, Const, ConstDef, ConstKind, Context, DataInstDef, DataInstKind, DbgSrcLoc, DeclDef, Diag, EntityDefs, EntityList, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, FxIndexMap, GlobalVarDecl, GlobalVarDefBody, Import, InternedStr, - Module, NodeDef, NodeKind, Region, RegionDef, RegionInputDecl, SelectionKind, Type, TypeDef, - TypeKind, TypeOrConst, Value, cfg, print, + Module, NodeDef, NodeKind, Region, RegionDef, RegionInputDecl, Type, TypeDef, TypeKind, + TypeOrConst, Value, print, }; use rustc_hash::FxHashMap; use smallvec::SmallVec; @@ -750,7 +751,7 @@ impl Module { nodes: Default::default(), data_insts: Default::default(), body, - unstructured_cfg: Some(cfg::ControlFlowGraph::default()), + unstructured_cfg: Some(cf::unstructured::ControlFlowGraph::default()), }) } }; @@ -867,7 +868,7 @@ impl Module { const SPIRT_CFGSSA_UNDOMINATE: bool = true; SPIRT_CFGSSA_UNDOMINATE.then(|| { - let mut def_map = crate::cfgssa::DefMap::new(); + let mut def_map = cf::cfgssa::DefMap::new(); // HACK(eddyb) allow e.g. `OpFunctionParameter` to // be treated like `OpPhi`s of the entry block. @@ -1036,7 +1037,7 @@ impl Module { let mut cfgssa_use_accumulator = cfgssa_def_map .as_ref() .filter(|_| func_def_body.is_some()) - .map(crate::cfgssa::UseAccumulator::new); + .map(cf::cfgssa::UseAccumulator::new); if let Some(use_acc) = &mut cfgssa_use_accumulator { // HACK(eddyb) ensure e.g. `OpFunctionParameter` // are treated like `OpPhi`s of the entry block. @@ -1380,22 +1381,22 @@ impl Module { let kind = if opcode == wk.OpUnreachable { assert!(targets.is_empty() && inputs.is_empty()); - cfg::ControlInstKind::Unreachable + cf::unstructured::ControlInstKind::Unreachable } else if [wk.OpReturn, wk.OpReturnValue].contains(&opcode) { assert!(targets.is_empty() && inputs.len() <= 1); - cfg::ControlInstKind::Return + cf::unstructured::ControlInstKind::Return } else if targets.is_empty() { - cfg::ControlInstKind::ExitInvocation(cfg::ExitInvocationKind::SpvInst( - raw_inst.without_ids.clone(), - )) + cf::unstructured::ControlInstKind::ExitInvocation( + cf::ExitInvocationKind::SpvInst(raw_inst.without_ids.clone()), + ) } else if opcode == wk.OpBranch { assert_eq!((targets.len(), inputs.len()), (1, 0)); - cfg::ControlInstKind::Branch + cf::unstructured::ControlInstKind::Branch } else if opcode == wk.OpBranchConditional { assert_eq!((targets.len(), inputs.len()), (2, 1)); - cfg::ControlInstKind::SelectBranch(SelectionKind::BoolCond) + cf::unstructured::ControlInstKind::SelectBranch(SelectionKind::BoolCond) } else if opcode == wk.OpSwitch { - cfg::ControlInstKind::SelectBranch(SelectionKind::SpvInst( + cf::unstructured::ControlInstKind::SelectBranch(SelectionKind::SpvInst( raw_inst.without_ids.clone(), )) } else { @@ -1409,7 +1410,13 @@ impl Module { .control_inst_on_exit_from .insert( current_block.region, - cfg::ControlInst { attrs, kind, inputs, targets, target_inputs }, + cf::unstructured::ControlInst { + attrs, + kind, + inputs, + targets, + target_inputs, + }, ); } else if opcode == wk.OpPhi { if !current_block_region_def.children.is_empty() { diff --git a/src/transform.rs b/src/transform.rs index ad19033..e7c9492 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -1,5 +1,6 @@ //! Mutable IR traversal. +use crate::cf::{self, SelectionKind}; use crate::func_at::FuncAtMut; use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; use crate::qptr::{QPtrAttr, QPtrOp}; @@ -8,8 +9,7 @@ use crate::{ DataInstKind, DbgSrcLoc, DeclDef, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, NodeOutputDecl, OrdAssertEq, Region, - RegionDef, RegionInputDecl, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, - spv, + RegionDef, RegionInputDecl, Type, TypeDef, TypeKind, TypeOrConst, Value, spv, }; use std::cmp::Ordering; use std::rc::Rc; @@ -654,7 +654,7 @@ impl InnerInPlaceTransform for FuncAtMut<'_, Node> { transformer.transform_value_use(scrutinee).apply_to(scrutinee); } NodeKind::Loop { initial_inputs: inputs, body: _, repeat_condition: _ } - | NodeKind::ExitInvocation { kind: cfg::ExitInvocationKind::SpvInst(_), inputs } => { + | NodeKind::ExitInvocation { kind: cf::ExitInvocationKind::SpvInst(_), inputs } => { for v in inputs { transformer.transform_value_use(v).apply_to(v); } @@ -674,8 +674,7 @@ impl InnerInPlaceTransform for FuncAtMut<'_, Node> { // Fully handled above, before recursing into any child regions. NodeKind::Block { insts: _ } | NodeKind::Select { kind: _, scrutinee: _, cases: _ } - | NodeKind::ExitInvocation { kind: cfg::ExitInvocationKind::SpvInst(_), inputs: _ } => { - } + | NodeKind::ExitInvocation { kind: cf::ExitInvocationKind::SpvInst(_), inputs: _ } => {} NodeKind::Loop { initial_inputs: _, body: _, repeat_condition } => { transformer.transform_value_use(repeat_condition).apply_to(repeat_condition); @@ -736,17 +735,19 @@ impl InnerInPlaceTransform for DataInstKind { } } -impl InnerInPlaceTransform for cfg::ControlInst { +impl InnerInPlaceTransform for cf::unstructured::ControlInst { fn inner_in_place_transform_with(&mut self, transformer: &mut impl Transformer) { let Self { attrs, kind, inputs, targets: _, target_inputs } = self; transformer.transform_attr_set_use(*attrs).apply_to(attrs); match kind { - cfg::ControlInstKind::Unreachable - | cfg::ControlInstKind::Return - | cfg::ControlInstKind::ExitInvocation(cfg::ExitInvocationKind::SpvInst(_)) - | cfg::ControlInstKind::Branch - | cfg::ControlInstKind::SelectBranch( + cf::unstructured::ControlInstKind::Unreachable + | cf::unstructured::ControlInstKind::Return + | cf::unstructured::ControlInstKind::ExitInvocation(cf::ExitInvocationKind::SpvInst( + _, + )) + | cf::unstructured::ControlInstKind::Branch + | cf::unstructured::ControlInstKind::SelectBranch( SelectionKind::BoolCond | SelectionKind::SpvInst(_), ) => {} } diff --git a/src/visit.rs b/src/visit.rs index 91694e0..9ebb666 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -1,5 +1,6 @@ //! Immutable IR traversal. +use crate::cf::{self, SelectionKind}; use crate::func_at::FuncAt; use crate::mem::{DataHapp, DataHappKind, MemAccesses, MemAttr, MemOp}; use crate::qptr::{QPtrAttr, QPtrOp}; @@ -8,8 +9,7 @@ use crate::{ DbgSrcLoc, DeclDef, DiagMsgPart, EntityListIter, ExportKey, Exportee, Func, FuncDecl, FuncDefBody, FuncParam, GlobalVar, GlobalVarDecl, GlobalVarDefBody, Import, Module, ModuleDebugInfo, ModuleDialect, Node, NodeDef, NodeKind, NodeOutputDecl, OrdAssertEq, Region, - RegionDef, RegionInputDecl, SelectionKind, Type, TypeDef, TypeKind, TypeOrConst, Value, cfg, - spv, + RegionDef, RegionInputDecl, Type, TypeDef, TypeKind, TypeOrConst, Value, spv, }; // FIXME(eddyb) `Sized` bound shouldn't be needed but removing it requires @@ -499,7 +499,7 @@ impl<'a> FuncAt<'a, Node> { visitor.visit_region_def(self.at(*body)); visitor.visit_value_use(repeat_condition); } - NodeKind::ExitInvocation { kind: cfg::ExitInvocationKind::SpvInst(_), inputs } => { + NodeKind::ExitInvocation { kind: cf::ExitInvocationKind::SpvInst(_), inputs } => { for v in inputs { visitor.visit_value_use(v); } @@ -554,17 +554,19 @@ impl InnerVisit for DataInstKind { } } -impl InnerVisit for cfg::ControlInst { +impl InnerVisit for cf::unstructured::ControlInst { fn inner_visit_with<'a>(&'a self, visitor: &mut impl Visitor<'a>) { let Self { attrs, kind, inputs, targets: _, target_inputs } = self; visitor.visit_attr_set_use(*attrs); match kind { - cfg::ControlInstKind::Unreachable - | cfg::ControlInstKind::Return - | cfg::ControlInstKind::ExitInvocation(cfg::ExitInvocationKind::SpvInst(_)) - | cfg::ControlInstKind::Branch - | cfg::ControlInstKind::SelectBranch( + cf::unstructured::ControlInstKind::Unreachable + | cf::unstructured::ControlInstKind::Return + | cf::unstructured::ControlInstKind::ExitInvocation(cf::ExitInvocationKind::SpvInst( + _, + )) + | cf::unstructured::ControlInstKind::Branch + | cf::unstructured::ControlInstKind::SelectBranch( SelectionKind::BoolCond | SelectionKind::SpvInst(_), ) => {} } From 335571abf92c0b6001fbb6a8461d8bf9961d86cd Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 4/5] spv: minimal `OpSpecConstantOp` support. --- src/print/mod.rs | 2 +- src/spv/print.rs | 41 ++++++++++++++++++++++++++++++++++++++++- src/spv/read.rs | 26 ++++++++++++++++++++++++++ src/spv/spec.rs | 1 + src/spv/write.rs | 27 +++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/print/mod.rs b/src/print/mod.rs index e20ed76..0cf2e34 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -1803,7 +1803,7 @@ impl Printer<'_> { fn sanitize_spv_operand_name<'b>(&self, name: &'b str) -> Option> { Some(name).and_then(|name| { // HACK(eddyb) some operand names are useless. - if name == "Type" + if matches!(name, "Opcode" | "Operand" | "Type") || name .strip_prefix("Operand ") .is_some_and(|s| s.chars().all(|c| c.is_ascii_digit())) diff --git a/src/spv/print.rs b/src/spv/print.rs index 1fc99dd..714b064 100644 --- a/src/spv/print.rs +++ b/src/spv/print.rs @@ -77,6 +77,9 @@ impl TokensForOperand { // FIXME(eddyb) keep a `&'static spec::Spec` if that can even speed up anything. struct OperandPrinter, ID, IDS: Iterator> { + // FIXME(eddyb) use a field like this to interpret `Opcode`/`OperandKind`, too. + wk: &'static spv::spec::WellKnown, + /// Input immediate operands to print from (may be grouped e.g. into literals). imms: iter::Peekable, @@ -123,7 +126,41 @@ impl, ID, IDS: Iterator> OperandPrint let def = kind.def(); assert!(matches!(def, spec::OperandKindDef::Literal { .. })); - let literal_token = if kind == spec::Spec::get().well_known.LiteralString { + let literal_token = if kind == self.wk.LiteralSpecConstantOpInteger { + assert_eq!(words.len(), 1); + let (_, inner_name, inner_def) = match u16::try_from(first_word) + .ok() + .and_then(spec::Opcode::try_from_u16_with_name_and_def) + { + Some(opcode_name_and_def) => opcode_name_and_def, + None => { + self.out.tokens.push(Token::Error(format!( + "/* {first_word} not a valid `OpSpecConstantOp` opcode */" + ))); + return; + } + }; + + // FIXME(eddyb) deduplicate this with `enumerant_params`. + self.out.tokens.push(Token::EnumerandName(inner_name)); + + let mut first = true; + for (inner_mode, inner_name_and_kind) in inner_def.all_operands_with_names() { + if inner_mode == spec::OperandMode::Optional && self.is_exhausted() { + break; + } + + self.out.tokens.push(Token::Punctuation(if first { "(" } else { ", " })); + first = false; + + let (inner_name, inner_kind) = inner_name_and_kind.name_and_kind(); + self.operand(inner_name, inner_kind); + } + if !first { + self.out.tokens.push(Token::Punctuation(")")); + } + return; + } else if kind == self.wk.LiteralString { // FIXME(eddyb) deduplicate with `spv::extract_literal_string`. let bytes: SmallVec<[u8; 64]> = words .into_iter() @@ -260,6 +297,7 @@ impl, ID, IDS: Iterator> OperandPrint /// an enumerand with parameters (which consumes more immediates). pub fn operand_from_imms(imms: impl IntoIterator) -> TokensForOperand { let mut printer = OperandPrinter { + wk: &spec::Spec::get().well_known, imms: imms.into_iter().peekable(), ids: iter::empty().peekable(), out: TokensForOperand::default(), @@ -282,6 +320,7 @@ pub fn inst_operands( ids: impl IntoIterator, ) -> impl Iterator> { OperandPrinter { + wk: &spec::Spec::get().well_known, imms: imms.into_iter().peekable(), ids: ids.into_iter().peekable(), out: TokensForOperand::default(), diff --git a/src/spv/read.rs b/src/spv/read.rs index 87c2773..eee77bd 100644 --- a/src/spv/read.rs +++ b/src/spv/read.rs @@ -28,6 +28,9 @@ impl KnownIdDef { // FIXME(eddyb) keep a `&'static spec::Spec` if that can even speed up anything. struct InstParser<'a> { + // FIXME(eddyb) use a field like this to interpret `Opcode`/`OperandKind`, too. + wk: &'static spv::spec::WellKnown, + /// IDs defined so far in the module. known_ids: &'a FxHashMap, @@ -60,6 +63,9 @@ enum InstParseError { /// The type of a `LiteralContextDependentNumber` was not a supported type /// (one of either `OpTypeInt` or `OpTypeFloat`). UnsupportedContextSensitiveLiteralType { type_opcode: spec::Opcode }, + + /// Unsupported `OpSpecConstantOp` (`LiteralSpecConstantOpInteger`) opcode. + UnsupportedSpecConstantOpOpcode(u32), } impl InstParseError { @@ -94,6 +100,9 @@ impl InstParseError { Self::UnsupportedContextSensitiveLiteralType { type_opcode } => { format!("{} is not a supported literal type", type_opcode.name()).into() } + Self::UnsupportedSpecConstantOpOpcode(opcode) => { + format!("{opcode} is not a supported opcode (for `OpSpecConstantOp`)").into() + } } } } @@ -198,6 +207,22 @@ impl InstParser<'_> { } } + // HACK(eddyb) this isn't cleanly uniform because it's an odd special case. + if kind == self.wk.LiteralSpecConstantOpInteger { + // FIXME(eddyb) this partially duplicates the main instruction parsing. + let (_, _, inner_def) = u16::try_from(word) + .ok() + .and_then(spec::Opcode::try_from_u16_with_name_and_def) + .ok_or(Error::UnsupportedSpecConstantOpOpcode(word))?; + + for (inner_mode, inner_kind) in inner_def.all_operands() { + if inner_mode == spec::OperandMode::Optional && self.is_exhausted() { + break; + } + self.operand(inner_kind)?; + } + } + Ok(()) } @@ -324,6 +349,7 @@ impl Iterator for ModuleParser { } let parser = InstParser { + wk: &spec::Spec::get().well_known, known_ids: &self.known_ids, words: words[1..inst_len].iter().copied(), inst: spv::InstWithIds { diff --git a/src/spv/spec.rs b/src/spv/spec.rs index ef59ee6..af34662 100644 --- a/src/spv/spec.rs +++ b/src/spv/spec.rs @@ -183,6 +183,7 @@ def_well_known! { LiteralExtInstInteger, LiteralString, LiteralContextDependentNumber, + LiteralSpecConstantOpInteger, ], // FIXME(eddyb) find a way to namespace these to avoid conflicts. addressing_model: u32 = [ diff --git a/src/spv/write.rs b/src/spv/write.rs index 38c96d7..55f8bf6 100644 --- a/src/spv/write.rs +++ b/src/spv/write.rs @@ -7,6 +7,9 @@ use std::{fs, io, iter, slice}; // FIXME(eddyb) keep a `&'static spec::Spec` if that can even speed up anything. struct OperandEmitter<'a> { + // FIXME(eddyb) use a field like this to interpret `Opcode`/`OperandKind`, too. + wk: &'static spv::spec::WellKnown, + /// Input immediate operands of an instruction. imms: iter::Copied>, @@ -32,6 +35,9 @@ enum OperandEmitError { /// Unsupported enumerand value. UnsupportedEnumerand(spec::OperandKind, u32), + + /// Unsupported `OpSpecConstantOp` (`LiteralSpecConstantOpInteger`) opcode. + UnsupportedSpecConstantOpOpcode(u32), } impl OperandEmitError { @@ -60,6 +66,9 @@ impl OperandEmitError { _ => unreachable!(), } } + Self::UnsupportedSpecConstantOpOpcode(opcode) => { + format!("{opcode} is not a supported opcode (for `OpSpecConstantOp`)").into() + } } } } @@ -140,6 +149,23 @@ impl OperandEmitter<'_> { } } + // HACK(eddyb) this isn't cleanly uniform because it's an odd special case. + if kind == self.wk.LiteralSpecConstantOpInteger { + // FIXME(eddyb) this partially duplicates the main instruction emission. + let &word = self.out.last().unwrap(); + let (_, _, inner_def) = u16::try_from(word) + .ok() + .and_then(spec::Opcode::try_from_u16_with_name_and_def) + .ok_or(Error::UnsupportedSpecConstantOpOpcode(word))?; + + for (inner_mode, inner_kind) in inner_def.all_operands() { + if inner_mode == spec::OperandMode::Optional && self.is_exhausted() { + break; + } + self.operand(inner_kind)?; + } + } + Ok(()) } @@ -221,6 +247,7 @@ impl ModuleEmitter { ); OperandEmitter { + wk: &spec::Spec::get().well_known, imms: inst.imms.iter().copied(), ids: inst.ids.iter().copied(), out: &mut self.words, From 83071b4b1c45c8226ab03215b68044929d664059 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Tue, 9 Sep 2025 11:42:12 +0300 Subject: [PATCH 5/5] spv: minimal `OpConstantFunctionPointerINTEL` support. --- src/lib.rs | 8 +++- src/print/mod.rs | 5 +- src/qptr/lower.rs | 8 ++++ src/spv/lift.rs | 13 +++++- src/spv/lower.rs | 117 ++++++++++++++++++++++++++++++++++++++++------ src/spv/spec.rs | 3 ++ src/transform.rs | 4 ++ src/visit.rs | 1 + 8 files changed, 141 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 029e188..ebf3388 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -594,7 +594,13 @@ pub struct ConstDef { #[derive(Clone, PartialEq, Eq, Hash)] pub enum ConstKind { + // FIXME(eddyb) maybe merge these? however, their connection is somewhat + // tenuous (being one of the LLVM-isms SPIR-V inherited, among other things), + // there's still the need to rename "global variable" post-`Var`-refactor, + // and last but not least, `PtrToFunc` needs `SPV_INTEL_function_pointers`, + // an OpenCL-only extension Intel came up with for their own SPIR-V tooling. PtrToGlobalVar(GlobalVar), + PtrToFunc(Func), // HACK(eddyb) this is a fallback case that should become increasingly rare // (especially wrt recursive consts), `Rc` means it can't bloat `ConstDef`. @@ -683,7 +689,7 @@ pub struct FuncDecl { pub def: DeclDef, } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] pub struct FuncParam { pub attrs: AttrSet, diff --git a/src/print/mod.rs b/src/print/mod.rs index 0cf2e34..afe81f3 100644 --- a/src/print/mod.rs +++ b/src/print/mod.rs @@ -3348,6 +3348,9 @@ impl Print for ConstDef { &ConstKind::PtrToGlobalVar(gv) => { pretty::Fragment::new(["&".into(), gv.print(printer)]) } + &ConstKind::PtrToFunc(func) => { + pretty::Fragment::new(["&".into(), func.print(printer)]) + } ConstKind::SpvInst { spv_inst_and_const_inputs } => { let (spv_inst, const_inputs) = &**spv_inst_and_const_inputs; pretty::Fragment::new([ @@ -4130,7 +4133,7 @@ impl Print for FuncAt<'_, DataInst> { } } } - ConstKind::PtrToGlobalVar(_) => {} + ConstKind::PtrToGlobalVar(_) | ConstKind::PtrToFunc(_) => {} } } None diff --git a/src/qptr/lower.rs b/src/qptr/lower.rs index 776d356..e452bed 100644 --- a/src/qptr/lower.rs +++ b/src/qptr/lower.rs @@ -162,6 +162,14 @@ impl<'a> LowerFromSpvPtrs<'a> { [spv::Imm::Short(_, sc)] => sc, _ => unreachable!(), }; + + // HACK(eddyb) keep function pointers separate, perhaps eventually + // adding an `OpTypeUntypedPointerKHR CodeSectionINTEL` equivalent + // to SPIR-T itself (after `SPV_KHR_untyped_pointers` support). + if sc == self.wk.CodeSectionINTEL { + return None; + } + let pointee = match type_and_const_inputs[..] { [TypeOrConst::Type(elem_type)] => elem_type, _ => unreachable!(), diff --git a/src/spv/lift.rs b/src/spv/lift.rs index 7628aa2..95c3fac 100644 --- a/src/spv/lift.rs +++ b/src/spv/lift.rs @@ -145,7 +145,7 @@ impl Visitor<'_> for NeedsIdsCollector<'_> { } let ct_def = &self.cx[ct]; match ct_def.kind { - ConstKind::PtrToGlobalVar(_) | ConstKind::SpvInst { .. } => { + ConstKind::PtrToGlobalVar(_) | ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } => { self.visit_const_def(ct_def); self.globals.insert(global); } @@ -1103,7 +1103,9 @@ impl LazyInst<'_, '_> { }; (gv_decl.attrs, import) } - ConstKind::SpvInst { .. } => (ct_def.attrs, None), + ConstKind::PtrToFunc(_) | ConstKind::SpvInst { .. } => { + (ct_def.attrs, None) + } // Not inserted into `globals` while visiting. ConstKind::SpvStringLiteralForExtInst(_) => unreachable!(), @@ -1224,6 +1226,13 @@ impl LazyInst<'_, '_> { } } + &ConstKind::PtrToFunc(func) => spv::InstWithIds { + without_ids: wk.OpConstantFunctionPointerINTEL.into(), + result_type_id: Some(ids.globals[&Global::Type(ct_def.ty)]), + result_id, + ids: [ids.funcs[&func].func_id].into_iter().collect(), + }, + ConstKind::SpvInst { spv_inst_and_const_inputs } => { let (spv_inst, const_inputs) = &**spv_inst_and_const_inputs; spv::InstWithIds { diff --git a/src/spv/lower.rs b/src/spv/lower.rs index 3f46297..7e76e0c 100644 --- a/src/spv/lower.rs +++ b/src/spv/lower.rs @@ -25,6 +25,11 @@ enum IdDef { Func(Func), + // HACK(eddyb) despite `FuncBody` deferring ID resolution to allow forward + // references *between* functions, function pointer *constants* need a `Func` + // long before any `OpFunction`s, so they're pre-defined as dummy imports. + FuncForwardRef(Func), + SpvExtInstImport(InternedStr), SpvDebugString(InternedStr), } @@ -37,7 +42,7 @@ impl IdDef { IdDef::Type(_) => "a type".into(), IdDef::Const(_) => "a constant".into(), - IdDef::Func(_) => "a function".into(), + IdDef::Func(_) | IdDef::FuncForwardRef(_) => "a function".into(), IdDef::SpvExtInstImport(name) => { format!("`OpExtInstImport {:?}`", &cx[name]) @@ -114,6 +119,37 @@ impl Module { // HACK(eddyb) used to quickly check whether an `OpVariable` is global. let storage_class_function_imm = spv::Imm::Short(wk.StorageClass, wk.Function); + // HACK(eddyb) used as the `FuncDecl` for an `IdDef::FuncForwardRef`. + let dummy_decl_for_func_forward_ref = FuncDecl { + attrs: { + let mut attrs = AttrSet::default(); + attrs.push_diag( + &cx, + Diag::err(["function ID used as forward reference but never defined".into()]), + ); + attrs + }, + // FIXME(eddyb) this gets simpler w/ disaggregation. + ret_type: cx.intern(TypeKind::SpvInst { + spv_inst: wk.OpTypeVoid.into(), + type_and_const_inputs: [].into_iter().collect(), + }), + params: [].into_iter().collect(), + def: DeclDef::Imported(Import::LinkName(cx.intern(""))), + }; + // HACK(eddyb) no `PartialEq` on `FuncDecl`. + let assert_is_dummy_decl_for_func_forward_ref = |decl: &FuncDecl| { + let [expected, found] = [&dummy_decl_for_func_forward_ref, decl].map( + |FuncDecl { attrs, ret_type, params, def }| { + let DeclDef::Imported(import) = def else { + unreachable!(); + }; + (attrs, ret_type, params, import) + }, + ); + assert!(expected == found); + }; + let mut module = { let [magic, version, generator_magic, id_bound, reserved_inst_schema] = parser.header; @@ -583,6 +619,38 @@ impl Module { }); id_defs.insert(id, IdDef::Type(ty)); + Seq::TypeConstOrGlobalVar + } else if opcode == wk.OpConstantFunctionPointerINTEL { + use std::collections::hash_map::Entry; + + let id = inst.result_id.unwrap(); + + let func_id = inst.ids[0]; + let func = match id_defs.entry(func_id) { + Entry::Occupied(entry) => match entry.get() { + &IdDef::FuncForwardRef(func) => Ok(func), + id_def => Err(id_def.descr(&cx)), + }, + Entry::Vacant(entry) => { + let func = + module.funcs.define(&cx, dummy_decl_for_func_forward_ref.clone()); + entry.insert(IdDef::FuncForwardRef(func)); + Ok(func) + } + } + .map_err(|descr| { + invalid(&format!( + "unsupported use of {descr} as the `OpConstantFunctionPointerINTEL` operand" + )) + })?; + + let ct = cx.intern(ConstDef { + attrs: mem::take(&mut attrs), + ty: result_type.unwrap(), + kind: ConstKind::PtrToFunc(func), + }); + id_defs.insert(id, IdDef::Const(ct)); + Seq::TypeConstOrGlobalVar } else if inst_category == spec::InstructionCategory::Const || opcode == wk.OpUndef { let id = inst.result_id.unwrap(); @@ -755,19 +823,40 @@ impl Module { }) } }; + let decl = FuncDecl { + attrs: mem::take(&mut attrs), + ret_type: func_ret_type, + params: func_type_param_types + .map(|ty| FuncParam { attrs: AttrSet::default(), ty }) + .collect(), + def, + }; - let func = module.funcs.define( - &cx, - FuncDecl { - attrs: mem::take(&mut attrs), - ret_type: func_ret_type, - params: func_type_param_types - .map(|ty| FuncParam { attrs: AttrSet::default(), ty }) - .collect(), - def, - }, - ); - id_defs.insert(func_id, IdDef::Func(func)); + let func = { + use std::collections::hash_map::Entry; + + match id_defs.entry(func_id) { + Entry::Occupied(mut entry) => match entry.get() { + &IdDef::FuncForwardRef(func) => { + let decl_slot = &mut module.funcs[func]; + assert_is_dummy_decl_for_func_forward_ref(decl_slot); + *decl_slot = decl; + + entry.insert(IdDef::Func(func)); + Ok(func) + } + id_def => Err(id_def.descr(&cx)), + }, + Entry::Vacant(entry) => { + let func = module.funcs.define(&cx, decl); + entry.insert(IdDef::Func(func)); + Ok(func) + } + } + .map_err(|descr| { + invalid(&format!("invalid redefinition of {descr} as a new function")) + })? + }; current_func_body = Some(FuncBody { func_id, func, insts: vec![] }); @@ -1171,7 +1260,7 @@ impl Module { "unsupported use of {} outside `OpExtInst`", id_def.descr(&cx), ))), - None => local_id_defs + None | Some(IdDef::FuncForwardRef(_)) => local_id_defs .get(&id) .copied() .ok_or_else(|| invalid(&format!("undefined ID %{id}",))), diff --git a/src/spv/spec.rs b/src/spv/spec.rs index af34662..b7f08eb 100644 --- a/src/spv/spec.rs +++ b/src/spv/spec.rs @@ -137,6 +137,7 @@ def_well_known! { OpConstantTrue, OpConstant, OpUndef, + OpConstantFunctionPointerINTEL, OpVariable, @@ -201,6 +202,8 @@ def_well_known! { HitAttributeKHR, RayPayloadKHR, CallableDataKHR, + + CodeSectionINTEL, ], decoration: u32 = [ LinkageAttributes, diff --git a/src/transform.rs b/src/transform.rs index e7c9492..77d7da9 100644 --- a/src/transform.rs +++ b/src/transform.rs @@ -473,6 +473,10 @@ impl InnerTransform for ConstDef { gv -> transformer.transform_global_var_use(*gv), } => ConstKind::PtrToGlobalVar(gv)), + ConstKind::PtrToFunc(func) => transform!({ + func -> transformer.transform_func_use(*func), + } => ConstKind::PtrToFunc(func)), + ConstKind::SpvInst { spv_inst_and_const_inputs } => { let (spv_inst, const_inputs) = &**spv_inst_and_const_inputs; Transformed::map_iter( diff --git a/src/visit.rs b/src/visit.rs index 9ebb666..63372f3 100644 --- a/src/visit.rs +++ b/src/visit.rs @@ -344,6 +344,7 @@ impl InnerVisit for ConstDef { visitor.visit_type_use(*ty); match kind { &ConstKind::PtrToGlobalVar(gv) => visitor.visit_global_var_use(gv), + &ConstKind::PtrToFunc(func) => visitor.visit_func_use(func), ConstKind::SpvInst { spv_inst_and_const_inputs } => { let (_spv_inst, const_inputs) = &**spv_inst_and_const_inputs; for &ct in const_inputs {