@@ -6,7 +6,7 @@ use libc::{c_char, c_uint};
6
6
use rustc_abi as abi;
7
7
use rustc_abi:: { AddressSpace , Align , HasDataLayout , Size , TargetDataLayout , WrappingRange } ;
8
8
use rustc_codegen_ssa:: MemFlags ;
9
- use rustc_codegen_ssa:: common:: { IntPredicate , RealPredicate , TypeKind } ;
9
+ use rustc_codegen_ssa:: common:: { IntPredicate , RealPredicate , TypeKind , AtomicRmwBinOp } ;
10
10
use rustc_codegen_ssa:: mir:: operand:: { OperandRef , OperandValue } ;
11
11
use rustc_codegen_ssa:: mir:: place:: PlaceRef ;
12
12
use rustc_codegen_ssa:: traits:: * ;
@@ -546,30 +546,13 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
546
546
547
547
fn atomic_load (
548
548
& mut self ,
549
- _ty : & ' ll Type ,
549
+ ty : & ' ll Type ,
550
550
ptr : & ' ll Value ,
551
- _order : AtomicOrdering ,
552
- _size : Size ,
551
+ order : AtomicOrdering ,
552
+ size : Size ,
553
553
) -> & ' ll Value {
554
- // core seems to think that nvptx has atomic loads, which is not true for NVVM IR,
555
- // therefore our only option is to print that this is not supported then trap.
556
- // i have heard of cursed things such as emulating this with __threadfence and volatile loads
557
- // but that needs to be experimented with in terms of safety and behavior.
558
- // NVVM has explicit intrinsics for adding and subtracting floats which we expose elsewhere
559
-
560
- // TODO(RDambrosio016): is there a way we can just generate a panic with a message instead
561
- // of doing this ourselves? since all panics will be aborts, it should be equivalent
562
- // let message = "Atomic Loads are not supported in CUDA.\0";
563
-
564
- // let vprintf = self.get_intrinsic("vprintf");
565
- // let formatlist = self.const_str(Symbol::intern(message)).0;
566
- // let valist = self.const_null(self.type_void());
567
-
568
- // self.call(vprintf, &[formatlist, valist], None);
569
-
570
- let ( ty, f) = self . get_intrinsic ( "llvm.trap" ) ;
571
- self . call ( ty, None , None , f, & [ ] , None , None ) ;
572
- unsafe { llvm:: LLVMBuildLoad ( self . llbuilder , ptr, unnamed ( ) ) }
554
+ // Since for any A, A | 0 = A, and performing atomics on constant memory is UB in Rust, we can abuse or to perform atomic reads.
555
+ self . atomic_rmw ( AtomicRmwBinOp :: AtomicOr , ptr, self . const_int ( ty, 0 ) , order)
573
556
}
574
557
575
558
fn load_operand ( & mut self , place : PlaceRef < ' tcx , & ' ll Value > ) -> OperandRef < ' tcx , & ' ll Value > {
@@ -796,24 +779,13 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
796
779
797
780
fn atomic_store (
798
781
& mut self ,
799
- _val : & ' ll Value ,
782
+ val : & ' ll Value ,
800
783
ptr : & ' ll Value ,
801
- _order : AtomicOrdering ,
802
- _size : Size ,
784
+ order : AtomicOrdering ,
785
+ size : Size ,
803
786
) {
804
- // see comment in atomic_load
805
-
806
- // let message = "Atomic Stores are not supported in CUDA.\0";
807
-
808
- // let vprintf = self.get_intrinsic("vprintf");
809
- // let formatlist = self.const_str(Symbol::intern(message)).0;
810
- // let valist = self.const_null(self.type_void());
811
-
812
- // self.call(vprintf, &[formatlist, valist], None);
813
- self . abort ( ) ;
814
- unsafe {
815
- llvm:: LLVMBuildLoad ( self . llbuilder , ptr, UNNAMED ) ;
816
- }
787
+ // We can exchange *ptr with val, and then discard the result.
788
+ self . atomic_rmw ( AtomicRmwBinOp :: AtomicXchg , ptr, val, order) ;
817
789
}
818
790
819
791
fn gep ( & mut self , ty : & ' ll Type , ptr : & ' ll Value , indices : & [ & ' ll Value ] ) -> & ' ll Value {
@@ -1195,13 +1167,65 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
1195
1167
}
1196
1168
fn atomic_rmw (
1197
1169
& mut self ,
1198
- _op : rustc_codegen_ssa :: common :: AtomicRmwBinOp ,
1199
- _dst : & ' ll Value ,
1200
- _src : & ' ll Value ,
1201
- _order : AtomicOrdering ,
1170
+ op : AtomicRmwBinOp ,
1171
+ dst : & ' ll Value ,
1172
+ src : & ' ll Value ,
1173
+ order : AtomicOrdering ,
1202
1174
) -> & ' ll Value {
1203
- // see cmpxchg comment
1204
- self . fatal ( "atomic rmw is not supported" )
1175
+ if matches ! ( op, AtomicRmwBinOp :: AtomicNand ) {
1176
+ self . fatal ( "Atomic NAND not supported yet!" )
1177
+ }
1178
+ self . atomic_op (
1179
+ dst,
1180
+ |builder, dst| {
1181
+ // We are in a supported address space - just use ordinary atomics
1182
+ unsafe {
1183
+ llvm:: LLVMBuildAtomicRMW (
1184
+ builder. llbuilder ,
1185
+ op,
1186
+ dst,
1187
+ src,
1188
+ crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
1189
+ 0 ,
1190
+ )
1191
+ }
1192
+ } ,
1193
+ |builder, dst| {
1194
+ // Local space is only accessible to the current thread.
1195
+ // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1196
+ let load: & ' ll Value = unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
1197
+ let next_val = match op{
1198
+ AtomicRmwBinOp :: AtomicXchg => src,
1199
+ AtomicRmwBinOp :: AtomicAdd => builder. add ( load, src) ,
1200
+ AtomicRmwBinOp :: AtomicSub => builder. sub ( load, src) ,
1201
+ AtomicRmwBinOp :: AtomicAnd => builder. and ( load, src) ,
1202
+ AtomicRmwBinOp :: AtomicNand => {
1203
+ let and = builder. and ( load, src) ;
1204
+ builder. not ( and)
1205
+ } ,
1206
+ AtomicRmwBinOp :: AtomicOr => builder. or ( load, src) ,
1207
+ AtomicRmwBinOp :: AtomicXor => builder. xor ( load, src) ,
1208
+ AtomicRmwBinOp :: AtomicMax => {
1209
+ let is_src_bigger = builder. icmp ( IntPredicate :: IntSGT , src, load) ;
1210
+ builder. select ( is_src_bigger, src, load)
1211
+ }
1212
+ AtomicRmwBinOp :: AtomicMin => {
1213
+ let is_src_smaller = builder. icmp ( IntPredicate :: IntSLT , src, load) ;
1214
+ builder. select ( is_src_smaller, src, load)
1215
+ }
1216
+ AtomicRmwBinOp :: AtomicUMax => {
1217
+ let is_src_bigger = builder. icmp ( IntPredicate :: IntUGT , src, load) ;
1218
+ builder. select ( is_src_bigger, src, load)
1219
+ } ,
1220
+ AtomicRmwBinOp :: AtomicUMin => {
1221
+ let is_src_smaller = builder. icmp ( IntPredicate :: IntULT , src, load) ;
1222
+ builder. select ( is_src_smaller, src, load)
1223
+ }
1224
+ } ;
1225
+ unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , next_val, dst) } ;
1226
+ load
1227
+ } ,
1228
+ )
1205
1229
}
1206
1230
1207
1231
fn atomic_fence (
0 commit comments