Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions enzyme/benchmarks/ReverseMode/adbench/Makefile.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CLANG := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/clang++
OPT := /home/manuel/prog/rust-middle/build/x86_64-unknown-linux-gnu/llvm/build/bin/opt

PASSES1 := verify,annotation2metadata,forceattrs,inferattrs,coro-early,function<eager-inv>(ee-instrument<>,lower-expect,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sroa<modify-cfg>,early-cse<>,callsite-splitting),openmp-opt,ipsccp,called-value-propagation,globalopt,function<eager-inv>(mem2reg,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>),always-inline,require<globals-aa>,function(invalidate<aa>),require<profile-summary>,cgscc(devirt<4>(inline,function-attrs<skip-non-recursive-function-attrs>,argpromotion,openmp-opt-cgscc,function<eager-inv;no-rerun>(sroa<modify-cfg>,early-cse<memssa>,speculative-execution<only-if-divergent-target>,jump-threading,correlated-propagation,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine,libcalls-shrinkwrap,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,reassociate,constraint-elimination,loop-mssa(loop-instsimplify,loop-simplifycfg,licm<no-allowspeculation>,loop-rotate<header-duplication;prepare-for-lto>,licm<allowspeculation>,simple-loop-unswitch<nontrivial;trivial>),simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>,loop(loop-idiom,indvars,extra-simple-loop-unswitch-passes,loop-deletion,loop-unroll-full),sroa<modify-cfg>,vector-combine,mldst-motion<no-split-footer-bb>,gvn<>,sccp,bdce,instcombine<max-iterations=1;no-verify-fixpoint>,jump-threading,correlated-propagation,adce,memcpyopt,dse,move-auto-init,loop-mssa(licm<allowspeculation>),coro-elide,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,instcombine<max-iterations=1;no-verify-fixpoint>),function-attrs,function(require<should-not-run-function-passes>),coro-split,coro-annotation-elide)),deadargelim,coro-cleanup,globalopt,globaldce,rpo-function-attrs,recompute-globalsaa,function<eager-inv>(float2int,lower-constant-intrinsics,chr,loop(loop-rotate<header-duplication;prepare-for-lto>,loop-deletion),loop-distribute,inject-tli-mappings,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-load-elim,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,vector-combine,instcombine<max-iterations=1;no-verify-fixpoint>,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,loop-sink,instsimplify,div-rem-pairs,tailcallelim,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;no-hoist-common-insts;hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),globaldce,constmerge,function(annotation-remarks),canonicalize-aliases,name-anon-globals,verify

PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals
#PASSES2 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<interleave-forced-only;vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals,EnzymeNewPM

PASSES3 := cross-dso-cfi,openmp-opt,globaldce<vfe-linkage-unit-visibility>,inferattrs,function<eager-inv>(callsite-splitting),pgo-icall-prom,cgscc(function-attrs,argpromotion,function(sroa<modify-cfg>)),ipsccp,called-value-propagation,rpo-function-attrs,globalsplit,wholeprogramdevirt,globalopt,function(mem2reg),constmerge,deadargelim,function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,aggressive-instcombine),expand-variadics,cgscc(inline<only-mandatory>,inline),globalopt,openmp-opt,globaldce<vfe-linkage-unit-visibility>,cgscc(argpromotion),function<eager-inv>(instcombine<max-iterations=1;no-verify-fixpoint>,constraint-elimination,jump-threading,sroa<modify-cfg>,tailcallelim),cgscc(function-attrs),require<globals-aa>,function(invalidate<aa>),cgscc(openmp-opt-cgscc),function<eager-inv>(loop-mssa(licm<allowspeculation>),gvn<>,memcpyopt,dse,move-auto-init,mldst-motion<no-split-footer-bb>,loop(indvars,loop-deletion,loop-unroll-full),loop-distribute,loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,infer-alignment,loop-unroll<O3>,transform-warning,sroa<preserve-cfg>,instcombine<max-iterations=1;no-verify-fixpoint>,simplifycfg<bonus-inst-threshold=1;forward-switch-cond;switch-range-to-icmp;switch-to-lookup;no-keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;no-speculate-unpredictables>,sccp,instcombine<max-iterations=1;no-verify-fixpoint>,bdce,slp-vectorizer,vector-combine,infer-alignment,instcombine<max-iterations=1;no-verify-fixpoint>,loop-mssa(licm<allowspeculation>),alignment-from-assumptions,jump-threading),lowertypetests,lowertypetests,function(loop-sink,div-rem-pairs,simplifycfg<bonus-inst-threshold=1;no-forward-switch-cond;switch-range-to-icmp;no-switch-to-lookup;keep-loops;hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>),elim-avail-extern,globaldce<vfe-linkage-unit-visibility>,mergefunc,rel-lookup-table-converter,cg-profile,function(annotation-remarks),canonicalize-aliases,name-anon-globals
4 changes: 2 additions & 2 deletions enzyme/benchmarks/ReverseMode/adbench/ba.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ int main(const int argc, const char* argv[]) {
}
}

{
for (int j=0;j<5;j++) {

struct BAInput input;
read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,
Expand Down Expand Up @@ -659,7 +659,7 @@ int main(const int argc, const char* argv[]) {
}
}

{
for(int j=0;j<5;j++){

struct BAInput input;
read_ba_instance("data/" + path, input.n, input.m, input.p, input.cams,
Expand Down
14 changes: 4 additions & 10 deletions enzyme/benchmarks/ReverseMode/adbench/gmm.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,17 +213,11 @@ int main(const int argc, const char* argv[]) {

std::vector<std::string> paths = { "10k/gmm_d10_K200.txt" };

//getTests(paths, "data/1k", "1k/");
if (std::getenv("BENCH_LARGE")) {
getTests(paths, "data/2.5k", "2.5k/");
getTests(paths, "data/10k", "10k/");
}

getTests(paths, "data/1k", "1k/");
if (std::getenv("BENCH_LARGE")) {
//if (std::getenv("BENCH_LARGE")) {
getTests(paths, "data/2.5k", "2.5k/");
getTests(paths, "data/10k", "10k/");
}
//}

std::ofstream jsonfile("results.json", std::ofstream::trunc);
json test_results;
Expand Down Expand Up @@ -274,7 +268,7 @@ int main(const int argc, const char* argv[]) {

struct GMMOutput result = { 0, std::vector<double>(Jcols) };

//if (0) {
if (0) {
try {
struct timeval start, end;
gettimeofday(&start, NULL);
Expand All @@ -294,7 +288,7 @@ int main(const int argc, const char* argv[]) {
} catch (std::bad_alloc) {
printf("Adept combined 88888888 ooms\n");
}
//}
}
}

for (size_t i = 0; i < 5; i++)
Expand Down
10 changes: 5 additions & 5 deletions enzyme/benchmarks/ReverseMode/adbench/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ double calculate_safe_primal(struct LSTMInput &input) {
int main(const int argc, const char* argv[]) {
printf("starting main\n");

//std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
std::vector<std::string> paths = { "lstm_l4_c4096.txt" };
std::vector<std::string> paths = { "lstm_l2_c1024.txt", "lstm_l4_c1024.txt", "lstm_l2_c4096.txt", "lstm_l4_c4096.txt" };
//std::vector<std::string> paths = { "lstm_l4_c4096.txt" };

std::ofstream jsonfile("results.json", std::ofstream::trunc);
json test_results;
Expand Down Expand Up @@ -289,7 +289,7 @@ int main(const int argc, const char* argv[]) {

}

{
if (0){

struct LSTMInput input = {};

Expand Down Expand Up @@ -323,7 +323,7 @@ int main(const int argc, const char* argv[]) {

}

{
for (int j=0; j<5; j++){

struct LSTMInput input = {};

Expand Down Expand Up @@ -390,7 +390,7 @@ int main(const int argc, const char* argv[]) {
}
}

{
for (int j=0; j<5; j++){

struct LSTMInput input = {};

Expand Down
33 changes: 26 additions & 7 deletions enzyme/benchmarks/ReverseMode/ba/Makefile.make
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,28 @@

dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)

include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config

ifeq ($(strip $(CLANG)),)
$(error PASSES1 is not set)
endif

ifeq ($(strip $(PASSES1)),)
$(error PASSES1 is not set)
endif

ifeq ($(strip $(PASSES2)),)
$(error PASSES2 is not set)
endif

ifeq ($(strip $(PASSES3)),)
$(error PASSES3 is not set)
endif

ifneq ($(strip $(PASSES4)),)
$(error PASSES4 is set)
endif

clean:
rm -f *.ll *.o results.txt results.json
cargo +enzyme clean
Expand All @@ -12,16 +34,13 @@ $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a: src/lib.rs Cargo.toml
RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib --features=libm

%-unopt.ll: %.cpp
clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm

%-raw.ll: %-unopt.ll
opt $^ $(LOAD) $(ENZYME) -o $@ -S
$(CLANG) $(BENCH) $^ -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm

%-opt.ll: %-raw.ll
opt $^ -o $@ -S
%-opt.ll: %-unopt.ll
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S

ba.o: ba-opt.ll $(dir)/benchmarks/ReverseMode/ba/target/release/libbars.a
clang++ $(BENCH) -pthread -O2 $^ -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
$(CLANG) -pthread -O3 -fno-math-errno $^ -o $@ $(BENCHLINK) -lm

results.json: ba.o
numactl -C 1 ./$^
11 changes: 10 additions & 1 deletion enzyme/benchmarks/ReverseMode/ba/ba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,15 @@ void radial_distort(double const* rad_params, double *proj)
proj[1] = proj[1] * L;
}

void radial_distort_restrict(double const *__restrict rad_params, double *__restrict proj)
{
double rsq, L;
rsq = sqsum(2, proj);
L = 1. + rad_params[0] * rsq + rad_params[1] * rsq * rsq;
proj[0] = proj[0] * L;
proj[1] = proj[1] * L;
}

void project_restrict(double const *__restrict cam, double const *__restrict X,
double *__restrict proj) {
double const* C = &cam[3];
Expand All @@ -129,7 +138,7 @@ void project_restrict(double const *__restrict cam, double const *__restrict X,
proj[0] = Xcam[0] / Xcam[2];
proj[1] = Xcam[1] / Xcam[2];

radial_distort(&cam[9], proj);
radial_distort_restrict(&cam[9], proj);

proj[0] = proj[0] * cam[6] + cam[7];
proj[1] = proj[1] * cam[6] + cam[8];
Expand Down
9 changes: 6 additions & 3 deletions enzyme/benchmarks/ReverseMode/ba/src/safe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ fn rust_ba_objective(

#[no_mangle]
extern "C" fn rust2_ba_objective(
n: usize,
m: usize,
p: usize,
n: i32,
m: i32,
p: i32,
cams: *const f64,
x: *const f64,
w: *const f64,
Expand All @@ -193,6 +193,9 @@ extern "C" fn rust2_ba_objective(
reproj_err: *mut f64,
w_err: *mut f64,
) {
let n = n as usize;
let m = m as usize;
let p = p as usize;
let cams = unsafe { std::slice::from_raw_parts(cams, n * 11) };
let x = unsafe { std::slice::from_raw_parts(x, m * 3) };
let w = unsafe { std::slice::from_raw_parts(w, p) };
Expand Down
9 changes: 6 additions & 3 deletions enzyme/benchmarks/ReverseMode/ba/src/unsafe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ pub unsafe fn compute_reproj_error(

#[no_mangle]
unsafe extern "C" fn rust2_unsafe_ba_objective(
n: usize,
m: usize,
p: usize,
n: i32,
m: i32,
p: i32,
cams: *const f64,
x: *const f64,
w: *const f64,
Expand All @@ -121,6 +121,9 @@ unsafe extern "C" fn rust2_unsafe_ba_objective(
reproj_err: *mut f64,
w_err: *mut f64,
) {
let n = n as usize;
let m = m as usize;
let p = p as usize;
for i in 0..p {
let cam_idx = *obs.add(i * 2 + 0) as usize;
let pt_idx = *obs.add(i * 2 + 1) as usize;
Expand Down
44 changes: 35 additions & 9 deletions enzyme/benchmarks/ReverseMode/fft/Makefile.make
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,50 @@

dir := $(abspath $(lastword $(MAKEFILE_LIST))/../../../..)

include $(dir)/benchmarks/ReverseMode/adbench/Makefile.config

ifeq ($(strip $(CLANG)),)
$(error PASSES1 is not set)
endif

ifeq ($(strip $(PASSES1)),)
$(error PASSES1 is not set)
endif

ifeq ($(strip $(PASSES2)),)
$(error PASSES2 is not set)
endif

ifeq ($(strip $(PASSES3)),)
$(error PASSES3 is not set)
endif

ifneq ($(strip $(PASSES4)),)
$(error PASSES4 is set)
endif

clean:
rm -f *.ll *.o results.txt results.json

$(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a: src/lib.rs Cargo.toml
RUSTFLAGS="-Z autodiff=Enable" cargo +enzyme rustc --release --lib --crate-type=staticlib

%-unopt.ll: %.cpp
clang++ $(BENCH) $(PTR) $^ -pthread -O2 -fno-use-cxa-atexit -fno-vectorize -fno-slp-vectorize -ffast-math -fno-unroll-loops -o $@ -S -emit-llvm
$(CLANG) $(BENCH) $^ -DCPP=1 -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit
%-unoptr.ll: %.cpp
$(CLANG) $(BENCH) $^ -fno-math-errno -fno-plt -pthread -O3 -fno-vectorize -fno-slp-vectorize -fno-unroll-loops -o $@ -S -emit-llvm #-fno-use-cxa-atexit

%-raw.ll: %-unopt.ll
opt $^ $(LOAD) $(ENZYME) -o $@ -S

%-opt.ll: %-raw.ll
opt $^ -o $@ -S
%-opt.ll: %-unopt.ll
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S
%-optr.ll: %-unoptr.ll
$(OPT) $^ $(LOAD) -passes="$(PASSES2),enzyme" -o $@ -S

fft.o: fft-opt.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
clang++ $(BENCH) -pthread -O2 $^ -o $@ $(BENCHLINK) -lpthread -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
#clang++ $(LOAD) $(BENCH) fft.cpp -I /usr/include/c++/11 -I/usr/include/x86_64-linux-gnu/c++/11 -O2 -o fft.o -lpthread $(BENCHLINK) -lm -L /usr/lib/gcc/x86_64-linux-gnu/11
$(CLANG) -DCPP=1 -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm
fftr.o: fft-optr.ll $(dir)/benchmarks/ReverseMode/fft/target/release/libfft.a
$(CLANG) -pthread -O3 -fno-math-errno -fno-plt -lpthread -lm $^ -o $@ $(BENCHLINK) -lm

results.json: fft.o
./$^ 1048576 | tee $@
results.json: fftr.o fft.o
numactl -C 1 ./fft.o 1048576 | tee results.json
numactl -C 1 ./fftr.o 1048576 | tee resultsr.json
Loading
Loading