diff --git a/enzyme/benchmarks/ReverseMode/adbench/Makefile.config b/enzyme/benchmarks/ReverseMode/adbench/Makefile.config new file mode 100644 index 000000000000..c620d4a3b710 --- /dev/null +++ b/enzyme/benchmarks/ReverseMode/adbench/Makefile.config @@ -0,0 +1,9 @@ +CLANG := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/clang++ +OPT := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/opt + +PASSES1 := verify,annotation2metadata,forceattrs,inferattrs,coro-early,function(ee-instrument<>,lower-expect,simplifycfg,sroa,early-cse<>,callsite-splitting),openmp-opt,ipsccp,called-value-propagation,globalopt,function(mem2reg,instcombine,simplifycfg),always-inline,require,function(invalidate),require,cgscc(devirt<4>(inline,function-attrs,argpromotion,openmp-opt-cgscc,function(sroa,early-cse,speculative-execution,jump-threading,correlated-propagation,simplifycfg,instcombine,aggressive-instcombine,libcalls-shrinkwrap,tailcallelim,simplifycfg,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm,loop-rotate,licm,simple-loop-unswitch),simplifycfg,instcombine,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa,vector-combine,mldst-motion,gvn<>,sccp,bdce,instcombine,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm),coro-elide,simplifycfg,instcombine),function-attrs,function(require),coro-split,coro-annotation-elide)),deadargelim,coro-cleanup,globalopt,globaldce,rpo-function-attrs,recompute-globalsaa,function(float2int,lower-constant-intrinsics,chr,loop(loop-rotate,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize,infer-alignment,loop-load-elim,instcombine,simplifycfg,vector-combine,instcombine,loop-unroll,transform-warning,sroa,infer-alignment,instcombine,loop-mssa(licm),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg),globaldce,constmerge,function(annotation-remarks),canonicalize-aliases,name-anon-globals,verify + +PASSES2 := cross-dso-cfi,openmp-opt,globaldce,inferattrs,function(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function(instcombine,aggressive-instcombine),expand-variadics,cgscc(inline,inline),globalopt,openmp-opt,globaldce,cgscc(argpromotion),function(instcombine,constraint-elimination,jump-threading,sroa,tailcallelim),cgscc(function-attrs),require,function(invalidate),cgscc(openmp-opt-cgscc),function(loop-mssa(licm),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize,infer-alignment,loop-unroll,transform-warning,sroa,instcombine,simplifycfg,sccp,instcombine,bdce,vector-combine,infer-alignment,instcombine,loop-mssa(licm),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg),elim-avail-extern,globaldce,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals +#PASSES2 := cross-dso-cfi,openmp-opt,globaldce,inferattrs,function(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function(instcombine,aggressive-instcombine),expand-variadics,cgscc(inline,inline),globalopt,openmp-opt,globaldce,cgscc(argpromotion),function(instcombine,constraint-elimination,jump-threading,sroa,tailcallelim),cgscc(function-attrs),require,function(invalidate),cgscc(openmp-opt-cgscc),function(loop-mssa(licm),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize,infer-alignment,loop-unroll,transform-warning,sroa,instcombine,simplifycfg,sccp,instcombine,bdce,vector-combine,infer-alignment,instcombine,loop-mssa(licm),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg),elim-avail-extern,globaldce,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals,EnzymeNewPM + +PASSES3 := cross-dso-cfi,openmp-opt,globaldce,inferattrs,function(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function(instcombine,aggressive-instcombine),expand-variadics,cgscc(inline,inline),globalopt,openmp-opt,globaldce,cgscc(argpromotion),function(instcombine,constraint-elimination,jump-threading,sroa,tailcallelim),cgscc(function-attrs),require,function(invalidate),cgscc(openmp-opt-cgscc),function(loop-mssa(licm),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize,infer-alignment,loop-unroll,transform-warning,sroa,instcombine,simplifycfg,sccp,instcombine,bdce,slp-vectorizer,vector-combine,infer-alignment,instcombine,loop-mssa(licm),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg),elim-avail-extern,globaldce,mergefunc,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals diff --git a/enzyme/benchmarks/ReverseMode/adbench/ba.h b/enzyme/benchmarks/ReverseMode/adbench/ba.h index 6a3f97737985..131a5f8ae4d2 100644 --- a/enzyme/benchmarks/ReverseMode/adbench/ba.h +++ b/enzyme/benchmarks/ReverseMode/adbench/ba.h @@ -427,7 +427,7 @@ int main(const int argc, const char* argv[]) { } } - { + for (int j=0;j<5;j++) { struct BAInput input; read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams, @@ -659,7 +659,7 @@ int main(const int argc, const char* argv[]) { } } - { + for(int j=0;j<5;j++){ struct BAInput input; read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams, diff --git a/enzyme/benchmarks/ReverseMode/adbench/gmm.h b/enzyme/benchmarks/ReverseMode/adbench/gmm.h index c5ec727e58e8..35f4423d9e19 100644 --- a/enzyme/benchmarks/ReverseMode/adbench/gmm.h +++ b/enzyme/benchmarks/ReverseMode/adbench/gmm.h @@ -213,17 +213,11 @@ int main(const int argc, const char* argv[]) { std::vector paths = { "10k/gmm_d10_K200.txt" }; - //getTests(paths, "data/1k", "1k/"); - if (std::getenv("BENCH_LARGE")) { - getTests(paths, "data/2.5k", "2.5k/"); - getTests(paths, "data/10k", "10k/"); - } - getTests(paths, "data/1k", "1k/"); - if (std::getenv("BENCH_LARGE")) { + //if (std::getenv("BENCH_LARGE")) { getTests(paths, "data/2.5k", "2.5k/"); getTests(paths, "data/10k", "10k/"); - } + //} std::ofstream jsonfile("results.json", std::ofstream::trunc); json test_results; @@ -274,7 +268,7 @@ int main(const int argc, const char* argv[]) { struct GMMOutput result = { 0, std::vector(Jcols) }; - //if (0) { + if (0) { try { struct timeval start, end; gettimeofday(&start, NULL); @@ -294,7 +288,7 @@ int main(const int argc, const char* argv[]) { } catch (std::bad_alloc) { printf("Adept combined 88888888 ooms\n"); } - //} + } } for (size_t i = 0; i < 5; i++) diff --git a/enzyme/benchmarks/ReverseMode/adbench/lstm.h b/enzyme/benchmarks/ReverseMode/adbench/lstm.h index 4f998418a938..80452b416504 100644 --- a/enzyme/benchmarks/ReverseMode/adbench/lstm.h +++ b/enzyme/benchmarks/ReverseMode/adbench/lstm.h @@ -243,8 +243,8 @@ double calculate_safe_primal(struct LSTMInput &input) { int main(const int argc, const char* argv[]) { printf("starting main\n"); - //std::vector paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" }; - std::vector paths = { "lstm_l4_c4096.txt" }; + std::vector paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" }; + //std::vector paths = { "lstm_l4_c4096.txt" }; std::ofstream jsonfile("results.json", std::ofstream::trunc); json test_results; @@ -289,7 +289,7 @@ int main(const int argc, const char* argv[]) { } - { + if (0){ struct LSTMInput input = {}; @@ -323,7 +323,7 @@ int main(const int argc, const char* argv[]) { } - { + for (int j=0; j<5; j++){ struct LSTMInput input = {}; @@ -390,7 +390,7 @@ int main(const int argc, const char* argv[]) { } } - { + for (int j=0; j<5; j++){ struct LSTMInput input = {}; diff --git a/enzyme/benchmarks/ReverseMode/ba/Makefile.make b/enzyme/benchmarks/ReverseMode/ba/Makefile.make index 50ab0cf9ef2d..cec8d4b5a795 100644 --- a/enzyme/benchmarks/ReverseMode/ba/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/ba/Makefile.make @@ -4,6 +4,28 @@ dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) +include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config + +ifeq ($(strip $(CLANG)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES1)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES2)),) +$(error PASSES2 is not set) +endif + +ifeq ($(strip $(PASSES3)),) +$(error PASSES3 is not set) +endif + +ifneq ($(strip $(PASSES4)),) +$(error PASSES4 is set) +endif + clean: rm -f *.ll *.o results.txt results.json cargo +enzyme clean @@ -12,16 +34,13 @@ $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a: src/lib.rs Cargo.toml RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm %-unopt.ll: %.cpp - clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - -%-raw.ll: %-unopt.ll - opt $^ $(LOAD) $(ENZYME) -o $@ -S + $(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm -%-opt.ll: %-raw.ll - opt $^ -o $@ -S +%-opt.ll: %-unopt.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S ba.o: ba-opt.ll $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a - clang++ $(BENCH) -pthread -O2 $^ -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 + $(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm results.json: ba.o numactl -C 1 ./$^ diff --git a/enzyme/benchmarks/ReverseMode/ba/ba.cpp b/enzyme/benchmarks/ReverseMode/ba/ba.cpp index 602af73d8d5f..c9b29ec4cf78 100644 --- a/enzyme/benchmarks/ReverseMode/ba/ba.cpp +++ b/enzyme/benchmarks/ReverseMode/ba/ba.cpp @@ -115,6 +115,15 @@ void radial_distort(double const* rad_params, double *proj) proj[1] = proj[1] * L; } +void radial_distort_restrict(double const *__restrict rad_params, double *__restrict proj) +{ + double rsq, L; + rsq = sqsum(2, proj); + L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq; + proj[0] = proj[0] * L; + proj[1] = proj[1] * L; +} + void project_restrict(double const *__restrict cam, double const *__restrict X, double *__restrict proj) { double const* C = &cam[3]; @@ -129,7 +138,7 @@ void project_restrict(double const *__restrict cam, double const *__restrict X, proj[0] = Xcam[0] / Xcam[2]; proj[1] = Xcam[1] / Xcam[2]; - radial_distort(&cam[9], proj); + radial_distort_restrict(&cam[9], proj); proj[0] = proj[0] * cam[6] + cam[7]; proj[1] = proj[1] * cam[6] + cam[8]; diff --git a/enzyme/benchmarks/ReverseMode/ba/src/safe.rs b/enzyme/benchmarks/ReverseMode/ba/src/safe.rs index 3530c79e5a8e..dd8bf88b9265 100644 --- a/enzyme/benchmarks/ReverseMode/ba/src/safe.rs +++ b/enzyme/benchmarks/ReverseMode/ba/src/safe.rs @@ -182,9 +182,9 @@ fn rust_ba_objective( #[no_mangle] extern "C" fn rust2_ba_objective( - n: usize, - m: usize, - p: usize, + n: i32, + m: i32, + p: i32, cams: *const f64, x: *const f64, w: *const f64, @@ -193,6 +193,9 @@ extern "C" fn rust2_ba_objective( reproj_err: *mut f64, w_err: *mut f64, ) { + let n = n as usize; + let m = m as usize; + let p = p as usize; let cams = unsafe { std::slice::from_raw_parts(cams, n * 11) }; let x = unsafe { std::slice::from_raw_parts(x, m * 3) }; let w = unsafe { std::slice::from_raw_parts(w, p) }; diff --git a/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs b/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs index 09f74be9b6f8..467a7cb27d7d 100644 --- a/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs +++ b/enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs @@ -110,9 +110,9 @@ pub unsafe fn compute_reproj_error( #[no_mangle] unsafe extern "C" fn rust2_unsafe_ba_objective( - n: usize, - m: usize, - p: usize, + n: i32, + m: i32, + p: i32, cams: *const f64, x: *const f64, w: *const f64, @@ -121,6 +121,9 @@ unsafe extern "C" fn rust2_unsafe_ba_objective( reproj_err: *mut f64, w_err: *mut f64, ) { + let n = n as usize; + let m = m as usize; + let p = p as usize; for i in 0..p { let cam_idx = *obs.add(i * 2 + 0) as usize; let pt_idx = *obs.add(i * 2 + 1) as usize; diff --git a/enzyme/benchmarks/ReverseMode/fft/Makefile.make b/enzyme/benchmarks/ReverseMode/fft/Makefile.make index b9385cd0d734..9ed3daaa26b6 100644 --- a/enzyme/benchmarks/ReverseMode/fft/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/fft/Makefile.make @@ -4,6 +4,28 @@ dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) +include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config + +ifeq ($(strip $(CLANG)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES1)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES2)),) +$(error PASSES2 is not set) +endif + +ifeq ($(strip $(PASSES3)),) +$(error PASSES3 is not set) +endif + +ifneq ($(strip $(PASSES4)),) +$(error PASSES4 is set) +endif + clean: rm -f *.ll *.o results.txt results.json @@ -11,17 +33,21 @@ $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a: src/lib.rs Cargo.toml RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib %-unopt.ll: %.cpp - clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm + $(CLANG) $(BENCH) $^ -DCPP=1 -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit +%-unoptr.ll: %.cpp + $(CLANG) $(BENCH) $^ -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit -%-raw.ll: %-unopt.ll - opt $^ $(LOAD) $(ENZYME) -o $@ -S -%-opt.ll: %-raw.ll - opt $^ -o $@ -S +%-opt.ll: %-unopt.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S +%-optr.ll: %-unoptr.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S fft.o: fft-opt.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a - clang++ $(BENCH) -pthread -O2 $^ -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 - #clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 + $(CLANG) -DCPP=1 -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm +fftr.o: fft-optr.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a + $(CLANG) -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm -results.json: fft.o - ./$^ 1048576 | tee $@ +results.json: fftr.o fft.o + numactl -C 1 ./fft.o 1048576 | tee results.json + numactl -C 1 ./fftr.o 1048576 | tee resultsr.json diff --git a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make index 17e22dde861a..f5f6de4fb06a 100644 --- a/enzyme/benchmarks/ReverseMode/gmm/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/gmm/Makefile.make @@ -1,28 +1,46 @@ -# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" PTR="%ptr" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B gmm-raw.ll results.json -f %s +# RUN: cd %S && LD_LIBRARY_PATH="%bldpath:$LD_LIBRARY_PATH" BENCH="%bench" BENCHLINK="%blink" LOAD="%loadEnzyme" LOADCLANG="%loadClangEnzyme" ENZYME="%enzyme" make -B gmm-raw.ll results.json -f %s .PHONY: clean dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) +include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config + +ifeq ($(strip $(CLANG)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES1)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES2)),) +$(error PASSES2 is not set) +endif + +ifeq ($(strip $(PASSES3)),) +$(error PASSES3 is not set) +endif + +ifneq ($(strip $(PASSES4)),) +$(error PASSES4 is set) +endif + clean: rm -f *.ll *.o results.txt results.json cargo +enzyme clean $(dir)/benchmarks/ReverseMode/gmm/target/release/libgmmrs.a: src/lib.rs Cargo.toml - RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm + RUSTFLAGS="-Z autodiff=Enable,PrintPasses,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib %-unopt.ll: %.cpp - clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - -%-raw.ll: %-unopt.ll - opt $^ $(LOAD) $(ENZYME) -o $@ -S + $(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm -%-opt.ll: %-raw.ll - opt $^ -o $@ -S +%-opt.ll: %-unopt.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S gmm.o: gmm-opt.ll $(dir)/benchmarks/ReverseMode/gmm/target/release/libgmmrs.a - clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm - #clang++ $(LOADCLANG) $(BENCH) gmm.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o gmm.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11 + $(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm results.json: gmm.o numactl -C 1 ./$^ diff --git a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make index 1388a5440ae2..71c6f5b14a46 100644 --- a/enzyme/benchmarks/ReverseMode/lstm/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/lstm/Makefile.make @@ -4,24 +4,44 @@ dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) +include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config + +ifeq ($(strip $(CLANG)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES1)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES2)),) +$(error PASSES2 is not set) +endif + +ifeq ($(strip $(PASSES3)),) +$(error PASSES3 is not set) +endif + +ifneq ($(strip $(PASSES4)),) +$(error PASSES4 is set) +endif + clean: rm -f *.ll *.o results.txt results.json cargo +enzyme clean $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a: src/lib.rs Cargo.toml - RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib + RUSTFLAGS="-Z autodiff=Enable,PrintPasses" cargo +enzyme rustc --release --lib --crate-type=staticlib %-unopt.ll: %.cpp - clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - -%-raw.ll: %-unopt.ll - opt $^ $(LOAD) $(ENZYME) -o $@ -S + $(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm -%-opt.ll: %-raw.ll - opt $^ -o $@ -S +%-opt.ll: %-unopt.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S lstm.o: lstm-opt.ll $(dir)/benchmarks/ReverseMode/lstm/target/release/liblstm.a - clang++ -pthread -O2 $^ -o $@ $(BENCHLINK) -lm + $(CLANG) -pthread -O3 $^ -o $@ $(BENCHLINK) -lm + #$(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm results.json: lstm.o numactl -C 1 ./$^ diff --git a/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs b/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs index d6847a4d5d72..3329ebb2c6ae 100644 --- a/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs +++ b/enzyme/benchmarks/ReverseMode/lstm/src/safe.rs @@ -1,5 +1,6 @@ use std::slice; use std::autodiff::autodiff; +use std::hint::assert_unchecked; // Sigmoid on scalar fn sigmoid(x: f64) -> f64 { @@ -32,11 +33,11 @@ fn lstm_model( let (a, b) = gates.split_at_mut(2 * hsize); let ((forget, ingate), (outgate, change)) = (a.split_at_mut(hsize), b.split_at_mut(hsize)); - //debug_assert_eq!(weight.len(), 4 * hsize); - //debug_assert_eq!(bias.len(), 4 * hsize); - //debug_assert_eq!(hidden.len(), hsize); - //debug_assert!(cell.len() >= hsize); - //debug_assert!(input.len() >= hsize); + // unsafe {assert_unchecked(weight.len()== 4 * hsize)}; + // unsafe {assert_unchecked(bias.len()== 4 * hsize)}; + // unsafe {assert_unchecked(hidden.len()== hsize)}; + // unsafe {assert_unchecked(cell.len() >= hsize)}; + // unsafe {assert_unchecked(input.len() >= hsize)}; // caching input for i in 0..hsize { forget[i] = sigmoid(input[i] * weight[i] + bias[i]); @@ -131,7 +132,7 @@ pub(crate) fn lstm_objective( let mut ypred = vec![0.0; b]; let mut ynorm = vec![0.0; b]; - //debug_assert!(b > 0); + // unsafe{assert_unchecked(b > 0)}; let limit = (c - 1) * b; for j in 0..(c - 1) { @@ -156,15 +157,18 @@ pub(crate) fn lstm_objective( #[no_mangle] pub extern "C" fn rust_lstm_objective( - l: usize, - c: usize, - b: usize, + l: i32, + c: i32, + b: i32, main_params: *const f64, extra_params: *const f64, state: *mut f64, sequence: *const f64, loss: *mut f64, ) { + let l = l as usize; + let c = c as usize; + let b = b as usize; let (main_params, extra_params, state, sequence) = unsafe { ( slice::from_raw_parts(main_params, 2 * l * 4 * b), @@ -190,9 +194,9 @@ pub extern "C" fn rust_lstm_objective( #[no_mangle] pub extern "C" fn rust_dlstm_objective( - l: usize, - c: usize, - b: usize, + l: i32, + c: i32, + b: i32, main_params: *const f64, d_main_params: *mut f64, extra_params: *const f64, @@ -202,6 +206,9 @@ pub extern "C" fn rust_dlstm_objective( res: *mut f64, d_res: *mut f64, ) { + let l = l as usize; + let c = c as usize; + let b = b as usize; let (main_params, d_main_params, extra_params, d_extra_params, state, sequence) = unsafe { ( slice::from_raw_parts(main_params, 2 * l * 4 * b), diff --git a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make index 87af95fede78..582ba796458f 100644 --- a/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make +++ b/enzyme/benchmarks/ReverseMode/ode-real/Makefile.make @@ -4,6 +4,28 @@ dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..) +include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config + +ifeq ($(strip $(CLANG)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES1)),) +$(error PASSES1 is not set) +endif + +ifeq ($(strip $(PASSES2)),) +$(error PASSES2 is not set) +endif + +ifeq ($(strip $(PASSES3)),) +$(error PASSES3 is not set) +endif + +ifneq ($(strip $(PASSES4)),) +$(error PASSES4 is set) +endif + clean: rm -f *.ll *.o results.txt results.json cargo +enzyme clean @@ -12,16 +34,13 @@ $(dir)/benchmarks/ReverseMode/ode-real/target/release/libode.a: src/lib.rs Cargo RUSTFLAGS="-Z autodiff=Enable,LooseTypes" cargo +enzyme rustc --release --lib --crate-type=staticlib %-unopt.ll: %.cpp - clang++ $(BENCH) $(PTR) $^ -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm - -%-raw.ll: %-unopt.ll - opt $^ $(LOAD) $(ENZYME) -o $@ -S + $(CLANG) $(BENCH) $^ -pthread -O3 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm -%-opt.ll: %-raw.ll - opt $^ -o $@ -S +%-opt.ll: %-unopt.ll + $(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S ode.o: ode-opt.ll $(dir)/benchmarks/ReverseMode/ode-real/target/release/libode.a - clang++ $(BENCH) -O2 $^ -o $@ $(BENCHLINK) + $(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) results.json: ode.o numactl -C 1 ./$^ 1000 | tee $@