@@ -2,7 +2,7 @@ use std::collections::hash_map::Entry;
22use std:: marker:: PhantomData ;
33use std:: ops:: Range ;
44
5- use rustc_abi:: { BackendRepr , FieldIdx , FieldsShape , Size , VariantIdx } ;
5+ use rustc_abi:: { BackendRepr , FieldIdx , FieldsShape , ScalableElt , Size , VariantIdx } ;
66use rustc_data_structures:: fx:: FxHashMap ;
77use rustc_index:: IndexVec ;
88use rustc_middle:: middle:: codegen_fn_attrs:: CodegenFnAttrFlags ;
@@ -408,6 +408,49 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
408408 return ;
409409 }
410410
411+ // Don't spill `<vscale x N x i1>` for `N != 16`:
412+ //
413+ // SVE predicates are only one bit for each byte in an SVE vector (which makes
414+ // sense, the predicate only needs to keep track of whether a lane is
415+ // enabled/disabled). i.e. a `<vscale x 16 x i8>` vector has a `<vscale x 16 x i1>`
416+ // predicate type. `<vscale x 16 x i1>` corresponds to two bytes of storage,
417+ // multiplied by the `vscale`, with one bit for each of the sixteen lanes.
418+ //
419+ // For a vector with fewer elements, such as `svint32_t`/`<vscale x 4 x i32>`,
420+ // while only a `<vscale x 4 x i1>` predicate type would be strictly necessary,
421+ // relevant intrinsics still take a `svbool_t`/`<vscale x 16 x i1>` - this is
422+ // because a `<vscale x 4 x i1>` is only half of a byte (for `vscale=1`), and with
423+ // memory being byte-addressable, it's unclear how to store that.
424+ //
425+ // Due to this, LLVM ultimately decided not to support stores of `<vscale x N x i1>`
426+ // for `N != 16`. As for `vscale=1` and `N` fewer than sixteen, partial bytes would
427+ // need to be stored (except for `N=8`, but that also isn't supported). `N` can
428+ // never be greater than sixteen as that ends up larger than the 128-bit increment
429+ // size.
430+ //
431+ // Internally, with an intrinsic operating on a `svint32_t`/`<vscale x 4 x i32>`
432+ // (for example), the intrinsic takes the `svbool_t`/`<vscale x 16 x i1>` predicate
433+ // and casts it to a `svbool4_t`/`<vscale x 4 x i1>`. Therefore, it's important that
434+ // the `<vscale x 4 x i32>` never spills because that'll cause errors during
435+ // instruction selection. Spilling to the stack to create debuginfo for these
436+ // intermediate values must be avoided and won't degrade the debugging experience
437+ // anyway.
438+ if operand. layout . ty . is_scalable_vector ( )
439+ && bx. sess ( ) . target . arch == rustc_target:: spec:: Arch :: AArch64
440+ && let ty:: Adt ( adt, args) = & operand. layout . ty . kind ( )
441+ && let Some ( marker_type_field) =
442+ adt. non_enum_variant ( ) . fields . get ( FieldIdx :: from_u32 ( 0 ) )
443+ {
444+ let marker_type = marker_type_field. ty ( bx. tcx ( ) , args) ;
445+ // i.e. `<vscale x N x i1>` when `N != 16`
446+ if let ty:: Slice ( element_ty) = marker_type. kind ( )
447+ && element_ty. is_bool ( )
448+ && adt. repr ( ) . scalable != Some ( ScalableElt :: ElementCount ( 16 ) )
449+ {
450+ return ;
451+ }
452+ }
453+
411454 Self :: spill_operand_to_stack ( * operand, name, bx)
412455 }
413456
0 commit comments