diff --git a/.github/workflows/check-all.yml b/.github/workflows/check-all.yml index ad68010c..7dae7547 100644 --- a/.github/workflows/check-all.yml +++ b/.github/workflows/check-all.yml @@ -26,7 +26,7 @@ jobs: run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add - sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true - sudo apt-get install -y cmake gcc g++ llvm-${{ matrix.llvm }}-dev libclang-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} lld-${{ matrix.llvm }} mlir-${{ matrix.llvm }}-tools libmlir-${{ matrix.llvm }} libmlir-${{ matrix.llvm }}-dev libflang-${{ matrix.llvm }}-dev flang-${{ matrix.llvm }} libzstd-dev libmpfr-dev + sudo apt-get install -y cmake gcc g++ llvm-${{ matrix.llvm }}-dev libclang-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} lld-${{ matrix.llvm }} mlir-${{ matrix.llvm }}-tools libmlir-${{ matrix.llvm }} libmlir-${{ matrix.llvm }}-dev libflang-${{ matrix.llvm }}-dev flang-${{ matrix.llvm }} libzstd-dev libmpfr-dev libomp-${{ matrix.llvm }}-dev sudo python3 -m pip install --upgrade pip lit - uses: actions/checkout@v4 - name: mkdir diff --git a/Readme.md b/Readme.md index 9881f3e0..ce2f614f 100644 --- a/Readme.md +++ b/Readme.md @@ -15,6 +15,11 @@ To install the dependencies on Debian and Ubuntu, [this repository](https://apt. sudo apt-get install -y cmake gcc g++ llvm-20-dev libclang-20-dev clang-20 lld-20 mlir-20-tools libmlir-20 libmlir-20-dev libflang-20-dev flang-20 libmpfr-dev ``` +LLVM can also be installed using spack as such: +``` +spack install llvm+clang+flang+lld+mlir@20 +``` + ## Building ``` shell diff --git a/pass/Raptor.cpp b/pass/Raptor.cpp index d22bbb2a..9174051b 100644 --- a/pass/Raptor.cpp +++ b/pass/Raptor.cpp @@ -564,7 +564,8 @@ class RaptorBase { auto [Truncation, NumArgsParsed] = parseTruncation(CI, Mode, 1); RequestContext context(CI, &Builder); - llvm::Value *res = Logic.CreateTruncateFunc(context, F, Truncation, Mode); + llvm::Value *res = Logic.CreateTruncateFunc( + context, F, TruncationConfiguration::getInitial(Truncation, Mode)); if (!res) return false; res = Builder.CreatePointerCast(res, CI->getType()); @@ -696,8 +697,10 @@ class RaptorBase { for (auto Truncation : FullModuleTruncs) { IRBuilder<> Builder(F.getContext()); RequestContext context(&*F.getEntryBlock().begin(), &Builder); - Function *TruncatedFunc = Logic.CreateTruncateFunc( - context, &F, Truncation, TruncOpFullModuleMode); + Function *TruncatedFunc = + Logic.CreateTruncateFunc(context, &F, + TruncationConfiguration::getInitial( + Truncation, TruncOpFullModuleMode)); ValueToValueMapTy Mapping; for (auto &&[Arg, TArg] : llvm::zip(F.args(), TruncatedFunc->args())) diff --git a/pass/RaptorLogic.cpp b/pass/RaptorLogic.cpp index 649c61c1..6b56612e 100644 --- a/pass/RaptorLogic.cpp +++ b/pass/RaptorLogic.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Instrumentation.h" +#include #include #include @@ -419,11 +420,11 @@ class TruncateGenerator : public llvm::InstVisitor, LLVMContext &Ctx; public: - TruncateGenerator(ValueToValueMapTy &originalToNewFn, - FloatTruncation Truncation, Function *oldFunc, - Function *newFunc, RaptorLogic &Logic, bool Root) - : TruncateUtils(Truncation, newFunc->getParent(), Logic), - OriginalToNewFn(originalToNewFn), Truncation(Truncation), + TruncateGenerator(ValueToValueMapTy &originalToNewFn, Function *oldFunc, + Function *newFunc, RaptorLogic &Logic, + TruncationConfiguration TC) + : TruncateUtils(TC.Truncation, newFunc->getParent(), Logic), + OriginalToNewFn(originalToNewFn), Truncation(TC.Truncation), Mode(Truncation.getMode()), Logic(Logic), Ctx(newFunc->getContext()) { auto AllocScratch = [&]() { @@ -440,28 +441,36 @@ class TruncateGenerator : public llvm::InstVisitor, // TODO should be the callsite or the function location itself Value *Loc = getUniquedLocStr( &*newFunc->getEntryBlock().getFirstNonPHIOrDbgOrAlloca()); - createFPRTGeneric(B, TruncChangeName, changePushArgs, B.getVoidTy(), Loc); - scratch = createFPRTGeneric(B, GetName, scratchArgs, B.getPtrTy(), Loc); + if (TC.NeedTruncChange) + createFPRTGeneric(B, TruncChangeName, changePushArgs, B.getVoidTy(), + Loc); + if (TC.NeedNewScratch) + scratch = createFPRTGeneric(B, GetName, scratchArgs, B.getPtrTy(), Loc); for (auto &BB : *newFunc) { if (ReturnInst *ret = dyn_cast(BB.getTerminator())) { B.SetInsertPoint(ret); - createFPRTGeneric(B, FreeName, scratchArgs, B.getPtrTy(), Loc); - createFPRTGeneric(B, "trunc_change", changePopArgs, B.getVoidTy(), - Loc); + if (TC.NeedNewScratch) + createFPRTGeneric(B, FreeName, scratchArgs, B.getPtrTy(), Loc); + if (TC.NeedTruncChange) + createFPRTGeneric(B, "trunc_change", changePopArgs, B.getVoidTy(), + Loc); } } }; if (Truncation.isToFPRT()) { if (Mode == TruncOpMode) { - if (Root) { + if (TC.NeedTruncChange || TC.NeedNewScratch) AllocScratch(); - } else { + if (!TC.NeedNewScratch) { // make sure we passed in `void *scratch` as the final parameter assert(newFunc->arg_size() == oldFunc->arg_size() + 1); scratch = newFunc->getArg(newFunc->arg_size() - 1); assert(scratch->getType()->isPointerTy()); } } else if (Mode == TruncOpFullModuleMode) { + assert(TC.NeedNewScratch); + assert(!TC.NeedTruncChange); + // TODO we need to do a call to trunc_change in the module constructor AllocScratch(); } } @@ -833,94 +842,44 @@ class TruncateGenerator : public llvm::InstVisitor, return cast(getNewFromOriginal((llvm::Value *)v)); } - bool handleKnownCalls(llvm::CallBase &call, llvm::Function *called, - llvm::StringRef funcName, - llvm::CallBase *const newCall) { - return false; - } - - Value *GetShadow(RequestContext &ctx, Value *v, bool root) { + Value *GetShadow(RequestContext &ctx, Value *v, bool WillPassScratch) { if (auto F = dyn_cast(v)) - return Logic.CreateTruncateFunc(ctx, F, Truncation, Mode, root); + return Logic.CreateTruncateFunc( + ctx, F, + TruncationConfiguration{Truncation, Mode, !WillPassScratch, false, + WillPassScratch}); llvm::errs() << " unknown get truncated func: " << *v << "\n"; llvm_unreachable("unknown get truncated func"); return v; } - // void visitInvokeInst(llvm::InvokeInst &CI) { - // // fprintf(stderr, "Won't handle invoke instruction.\n"); - // EmitWarning("FPNoInvoke", CI, - // "Will not handle invoke instruction.", CI); - // } - - // Return - void visitCallBase(llvm::CallBase &CI) { - Intrinsic::ID ID; - StringRef funcName = getFuncNameFromCall(const_cast(&CI)); - if (isMemFreeLibMFunction(funcName, &ID)) - if (handleIntrinsic(CI, ID)) - return; - using namespace llvm; - - CallBase *const newCall = cast(getNewFromOriginal(&CI)); - IRBuilder<> BuilderZ(newCall); - - if (auto called = CI.getCalledFunction()) - if (handleKnownCalls(CI, called, getFuncNameFromCall(&CI), newCall)) - return; - - // if (!newCall->getDebugLoc()) { - // Function *ContainingF = newCall->getFunction(); - // newCall->setDebugLoc(DILocation::get(ContainingF->getContext(), 0, 0, - // ContainingF->getSubprogram())); - // } + struct FunctionToTrunc { + Function *Func; + bool IsCallback; + unsigned ArgNo; + unsigned getCallbackArgNo() { + assert(isCallbackFunc()); + return ArgNo; + } + bool isCallbackFunc() { return IsCallback; } + }; - if (Mode == TruncOpMode || Mode == TruncMemMode) { - RequestContext ctx(&CI, &BuilderZ); - Function *Func = CI.getCalledFunction(); - if (Func && !Func->empty()) { - bool truncOpIgnore = Func->getName().contains("raptor_trunc_op_ignore"); - bool truncMemIgnore = - Func->getName().contains("raptor_trunc_mem_ignore"); - bool truncIgnore = Func->getName().contains("raptor_trunc_ignore"); - truncIgnore |= truncOpIgnore && Mode == TruncOpMode; - truncIgnore |= truncMemIgnore && Mode == TruncMemMode; - if (!truncIgnore) { - if (scratch && Mode == TruncOpMode && isa(&CI)) { - auto val = GetShadow(ctx, getNewFromOriginal(CI.getCalledOperand()), - false); - Function *F = cast(val); - IRBuilder<> B(newCall); - SmallVector args(newCall->args()); - args.push_back(scratch); - CallInst *newNewCall = B.CreateCall(F, args); - newNewCall->copyMetadata(*newCall); - newNewCall->copyIRFlags(newCall); - newNewCall->setAttributes(newCall->getAttributes()); - newNewCall->setCallingConv(newCall->getCallingConv()); - // newNewCall->setTailCallKind(newCall->getTailCallKind()); - newNewCall->setDebugLoc(newCall->getDebugLoc()); - newCall->replaceAllUsesWith(newNewCall); - newCall->eraseFromParent(); - // TODO not sure if we need to change the originalToNewFn mapping. - } else { - auto val = - GetShadow(ctx, getNewFromOriginal(CI.getCalledOperand()), true); - newCall->setCalledOperand(val); - } - } - } else if (!Func) { + SmallVector getFunctionToTruncate(llvm::CallBase &CI) { + SmallVector ToTrunc; + auto MaybeInsert = [&](Function *F, bool IsCallback, unsigned ArgNo = 0) { + if (!F) { switch (Mode) { case TruncMemMode: case TruncOpMode: - // fprintf(stderr, "Won't follow indirect call.\n"); EmitWarning("FPNoFollow", CI, "Will not follow FP through this indirect call.", CI); break; default: llvm_unreachable("Unknown trunc mode"); } - } else { + return; + } + if (F->isDeclaration()) { switch (Mode) { case TruncMemMode: EmitWarning("FPNoFollow", CI, @@ -937,9 +896,87 @@ class TruncateGenerator : public llvm::InstVisitor, default: llvm_unreachable("Unknown trunc mode"); } + return; + } + ToTrunc.push_back(FunctionToTrunc{F, IsCallback, ArgNo}); + }; + + Function *Callee = CI.getCalledFunction(); + MaybeInsert(Callee, false); + + if (!Callee) + return ToTrunc; + if (!Callee->isDeclaration()) + return ToTrunc; + + MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback); + if (CallbackMD) { + for (const MDOperand &Op : CallbackMD->operands()) { + MDNode *OpMD = cast(Op.get()); + auto *CBCalleeIdxAsCM = cast(OpMD->getOperand(0)); + uint64_t CBCalleeIdx = + cast(CBCalleeIdxAsCM->getValue())->getZExtValue(); + MaybeInsert(dyn_cast(CI.getArgOperand(CBCalleeIdx)), true, + CBCalleeIdx); + } + } + + return ToTrunc; + } + + // Return + void visitCallBase(llvm::CallBase &CI) { + Intrinsic::ID ID; + StringRef funcName = getFuncNameFromCall(const_cast(&CI)); + if (isMemFreeLibMFunction(funcName, &ID)) + if (handleIntrinsic(CI, ID)) + return; + + using namespace llvm; + + CallBase *const newCall = cast(getNewFromOriginal(&CI)); + IRBuilder<> BuilderZ(newCall); + + if (Mode != TruncOpMode && Mode != TruncMemMode) + return; + + RequestContext ctx(&CI, &BuilderZ); + auto FTTs = getFunctionToTruncate(CI); + auto NeedDirectCall = [&](auto FTT) { + return scratch && Mode == TruncOpMode && isa(&CI) && + !FTT.isCallbackFunc(); + }; + for (auto &FTT : FTTs) { + assert(FTT.Func && !FTT.Func->empty()); + if (!NeedDirectCall(FTT)) { + auto val = GetShadow(ctx, getNewFromOriginal(FTT.Func), false); + if (FTT.isCallbackFunc()) { + newCall->setArgOperand(FTT.getCallbackArgNo(), val); + } else { + newCall->setCalledOperand(val); + } + } + } + for (auto &FTT : FTTs) { + assert(FTT.Func && !FTT.Func->empty()); + if (NeedDirectCall(FTT)) { + auto val = GetShadow(ctx, getNewFromOriginal(FTT.Func), true); + Function *F = cast(val); + IRBuilder<> B(newCall); + SmallVector args(newCall->args()); + args.push_back(scratch); + CallInst *newNewCall = B.CreateCall(F, args); + newNewCall->copyMetadata(*newCall); + newNewCall->copyIRFlags(newCall); + newNewCall->setAttributes(newCall->getAttributes()); + newNewCall->setCallingConv(newCall->getCallingConv()); + // newNewCall->setTailCallKind(newCall->getTailCallKind()); + newNewCall->setDebugLoc(newCall->getDebugLoc()); + newCall->replaceAllUsesWith(newNewCall); + newCall->eraseFromParent(); + // TODO not sure if we need to change the originalToNewFn mapping. } } - return; } void visitPHINode(llvm::PHINode &PN) { switch (Mode) { @@ -1011,9 +1048,8 @@ bool RaptorLogic::CountInFunc(llvm::Function *F, FloatRepresentation FR) { llvm::Function *RaptorLogic::CreateTruncateFunc(RequestContext Context, llvm::Function *ToTrunc, - FloatTruncation Truncation, - TruncateMode Mode, bool Root) { - TruncateCacheKey tup(ToTrunc, Truncation, Mode, Root); + TruncationConfiguration TC) { + TruncateCacheKey tup(ToTrunc, TC); if (TruncateCachedFunctions.find(tup) != TruncateCachedFunctions.end()) { return TruncateCachedFunctions.find(tup)->second; } @@ -1027,7 +1063,7 @@ llvm::Function *RaptorLogic::CreateTruncateFunc(RequestContext Context, Params.push_back(OrigFTy->getParamType(i)); } - if (Mode == TruncOpMode && !Root) { + if (TC.ScratchFromArgs) { // void *scratch Params.push_back(B.getPtrTy()); } @@ -1035,13 +1071,12 @@ llvm::Function *RaptorLogic::CreateTruncateFunc(RequestContext Context, Type *NewTy = ToTrunc->getReturnType(); FunctionType *FTy = FunctionType::get(NewTy, Params, ToTrunc->isVarArg()); - std::string truncName = - std::string("__raptor_done_truncate_") + truncateModeStr(Mode) + - "_func_" + Truncation.mangleTruncation() + "_" + ToTrunc->getName().str(); + std::string truncName = std::string("__raptor_done_truncate_") + TC.mangle() + + "_" + ToTrunc->getName().str(); Function *NewF = Function::Create(FTy, ToTrunc->getLinkage(), truncName, ToTrunc->getParent()); - if (Mode != TruncOpFullModuleMode) + if (TC.Mode != TruncOpFullModuleMode) NewF->setLinkage(Function::LinkageTypes::InternalLinkage); TruncateCachedFunctions[tup] = NewF; @@ -1090,8 +1125,7 @@ llvm::Function *RaptorLogic::CreateTruncateFunc(RequestContext Context, NewF->setLinkage(Function::LinkageTypes::InternalLinkage); - TruncateGenerator Handle(originalToNewFn, Truncation, ToTrunc, NewF, *this, - Root); + TruncateGenerator Handle(originalToNewFn, ToTrunc, NewF, *this, TC); for (auto &BB : *ToTrunc) for (auto &I : BB) Handle.visit(&I); diff --git a/pass/RaptorLogic.h b/pass/RaptorLogic.h index 7236c55b..b0b2c367 100644 --- a/pass/RaptorLogic.h +++ b/pass/RaptorLogic.h @@ -288,6 +288,44 @@ struct FloatTruncation { std::string mangleFrom() const { return From.getMangling(); } }; +class TruncationConfiguration { +public: + FloatTruncation Truncation; + TruncateMode Mode; + bool NeedNewScratch; + bool NeedTruncChange; + bool ScratchFromArgs; + std::string mangle() { + return std::string(truncateModeStr(Mode)) + "_func_" + + Truncation.mangleTruncation() + "_" + + std::to_string(NeedTruncChange) + "_" + + std::to_string(NeedNewScratch) + "_" + + std::to_string(ScratchFromArgs); + } + static auto toTuple(const TruncationConfiguration &TC) { + return std::tuple(TC.Truncation, TC.Mode, TC.NeedNewScratch, + TC.NeedTruncChange, TC.ScratchFromArgs); + } + bool operator==(const TruncationConfiguration &Other) const { + return toTuple(*this) == toTuple(Other); + } + bool operator<(const TruncationConfiguration &Other) const { + return toTuple(*this) < toTuple(Other); + } + + static TruncationConfiguration getInitial(FloatTruncation Truncation, + TruncateMode Mode) { + if (Mode == TruncOpMode) + return TruncationConfiguration{Truncation, Mode, true, true, false}; + else if (Mode == TruncMemMode) + return TruncationConfiguration{Truncation, Mode, false, false, false}; + else if (Mode == TruncOpFullModuleMode) + return TruncationConfiguration{Truncation, Mode, true, false, false}; + else + llvm_unreachable(""); + } +}; + typedef std::map, llvm::GlobalValue *> UniqDebugLocStrsTy; @@ -303,12 +341,11 @@ class RaptorLogic { RaptorLogic(bool PostOpt) : PostOpt(PostOpt) {} using TruncateCacheKey = - std::tuple; + std::tuple; std::map TruncateCachedFunctions; - llvm::Function *CreateTruncateFunc(RequestContext context, - llvm::Function *tobatch, - FloatTruncation truncation, - TruncateMode mode, bool root = true); + llvm::Function *CreateTruncateFunc(RequestContext Context, + llvm::Function *ToTrunc, + TruncationConfiguration TC); bool CreateTruncateValue(RequestContext context, llvm::Value *addr, FloatTruncation Truncation, bool isTruncate); bool CountInFunc(llvm::Function *F, FloatRepresentation FR); diff --git a/test/Integration/Truncate/Cpp/openmp-cpu.cpp b/test/Integration/Truncate/Cpp/openmp-cpu.cpp new file mode 100644 index 00000000..448b0bbf --- /dev/null +++ b/test/Integration/Truncate/Cpp/openmp-cpu.cpp @@ -0,0 +1,92 @@ +// clang-format off + +// RUN: %clang -O3 %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out +// RUN: %clang -O3 -fopenmp %s -o %t.a.out %loadClangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out + +// CHECK: 1.000000 + 1000.000000 = 1000.000000 + +// clang-format on + +#include "../../test_utils.h" +#include + +#define FROM 64 +#define TO 1, 10, 8 + +template +fty *__raptor_truncate_op_func(fty *, int, int, int, int); + +double par_for(double a, double b) { + double c = 0; +#pragma omp parallel for + for (int i = 0; i < 1; i++) { + c = a + b; + } + return c; +} + +double teams(double a, double b) { + double c = 0; +#pragma omp teams + { + c = a + b; + } + return c; +} + +double teams_par(double a, double b) { + double c = 0; +#pragma omp teams parallel + { + c = a + b; + } + return c; +} + +double teams__par(double a, double b) { + double c = 0; +#pragma omp teams + { +#pragma omp parallel + { + c = a + b; + } + } + return c; +} + +double par(double a, double b) { + double c = 0; +#pragma omp parallel + { + c = a + b; + } + return c; +} + +int main() { + double a = 1; + double b = 1000; + double c; + c = __raptor_truncate_op_func(par, FROM, TO)(a, b); + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, 1000, 1e-5); + + c = __raptor_truncate_op_func(par_for, FROM, TO)(a, b); + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, 1000, 1e-5); + + c = __raptor_truncate_op_func(teams, FROM, TO)(a, b); + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, 1000, 1e-5); + + c = __raptor_truncate_op_func(teams_par, FROM, TO)(a, b); + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, 1000, 1e-5); + + c = __raptor_truncate_op_func(teams__par, FROM, TO)(a, b); + printf("%f + %f = %f\n", a, b, c); + APPROX_EQ(c, 1000, 1e-5); + + return 0; +} diff --git a/test/Integration/Truncate/Fortran/simple.f90 b/test/Integration/Truncate/Fortran/simple.f90 index 82cae39d..3befb534 100644 --- a/test/Integration/Truncate/Fortran/simple.f90 +++ b/test/Integration/Truncate/Fortran/simple.f90 @@ -1,3 +1,4 @@ +! RUN: %flang -O0 %s -o %t.a.out %loadFlangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out 100000 2 | FileCheck %s ! RUN: %flang -O1 %s -o %t.a.out %loadFlangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out 100000 2 | FileCheck %s ! RUN: %flang -O2 %s -o %t.a.out %loadFlangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out 100000 2 | FileCheck %s ! RUN: %flang -O3 %s -o %t.a.out %loadFlangRaptor %linkRaptorRT -lm -lmpfr && %t.a.out 100000 2 | FileCheck %s diff --git a/test/Unit/Truncate/cmp.ll b/test/Unit/Truncate/cmp.ll index 35fa7755..f251318d 100644 --- a/test/Unit/Truncate/cmp.ll +++ b/test/Unit/Truncate/cmp.ll @@ -28,11 +28,11 @@ entry: ret i1 %res } -; CHECK: define internal i1 @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f( +; CHECK: define internal i1 @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f( ; CHECK: call i1 @__raptor_fprt_ieee_64_fcmp_olt -; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_f( +; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f( ; CHECK: fcmp olt double -; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_f( +; CHECK: define internal i1 @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_1_1_0_f( ; CHECK: fcmp olt double diff --git a/test/Unit/Truncate/const.ll b/test/Unit/Truncate/const.ll index c2b48ae8..fa7c9c12 100644 --- a/test/Unit/Truncate/const.ll +++ b/test/Unit/Truncate/const.ll @@ -22,9 +22,9 @@ entry: ret double %res } -; CHECK: define internal double @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f(double %x) { +; CHECK: define internal double @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f(double %x) { ; CHECK: call double @__raptor_fprt_ieee_64_const(double 1.000000e+00, i64 8, i64 23, i64 1, {{.*}} ; CHECK: call double @__raptor_fprt_ieee_64_binop_fadd(double {{.*}}, double %1, i64 8, i64 23, i64 1, {{.*}} -; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_f(double %x) { +; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_1_1_0_f(double %x) { ; CHECK: call double @__raptor_fprt_ieee_64_binop_fadd(double {{.*}}, double 1.000000e+00, i64 3, i64 7, i64 2 diff --git a/test/Unit/Truncate/intrinsic.ll b/test/Unit/Truncate/intrinsic.ll index be809974..2a66d17f 100644 --- a/test/Unit/Truncate/intrinsic.ll +++ b/test/Unit/Truncate/intrinsic.ll @@ -40,20 +40,20 @@ entry: ret double %res } -; CHECK: define internal double @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f( +; CHECK: define internal double @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_func_pow( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_intr_llvm_pow_f64( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_intr_llvm_powi_f64_i16( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_binop_fadd( ; CHECK-DAG: call void @llvm.nvvm.barrier0() -; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_f( +; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f( ; CHECK-DAG: fptrunc ; CHECK-DAG: call float @llvm.pow.f32( ; CHECK-DAG: fpext float ; CHECK-DAG: call float @llvm.powi.f32.i16( -; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_f( +; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_mpfr_3_7_1_1_0_f( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_func_pow( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_intr_llvm_pow_f64( ; CHECK-DAG: call double @__raptor_fprt_ieee_64_intr_llvm_powi_f64_i16( diff --git a/test/Unit/Truncate/openmp.ll b/test/Unit/Truncate/openmp.ll new file mode 100644 index 00000000..3ed34dbd --- /dev/null +++ b/test/Unit/Truncate/openmp.ll @@ -0,0 +1,68 @@ +; RUN: %opt %s %newLoadRaptor -passes="raptor" -S | FileCheck %s + +; we managed to truncate the parallel region +; CHECK: define {{.*}} double @__raptor_done_truncate_op_func_ieee_64_to_mpfr_10_8_1_1_0__Z10teams__pardd +; CHECK: call {{.*}} @__kmpc_fork_teams({{.*}}@__raptor_done_truncate_op_func_ieee_64_to_mpfr_10_8_0_1_0__Z10teams__pardd.omp_outlined +; CHECK: define {{.*}} @__raptor_done_truncate_op_func_ieee_64_to_mpfr_10_8_0_1_0__Z10teams__pardd.omp_outlined +; CHECK: call {{.*}} @__kmpc_fork_call({{.*}}@__raptor_done_truncate_op_func_ieee_64_to_mpfr_10_8_0_1_0__Z10teams__pardd.omp_outlined.omp_outlined +; CHECK: define {{.*}} @__raptor_done_truncate_op_func_ieee_64_to_mpfr_10_8_0_1_0__Z10teams__pardd.omp_outlined.omp_outlined +; CHECK: call double @__raptor_fprt_ieee_64_binop_fadd + + +%struct.ident_t = type { i32, i32, i32, i32, ptr } + +@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + +; Function Attrs: mustprogress nounwind uwtable +define dso_local noundef double @_Z10teams__pardd(double noundef %a, double noundef %b) { +entry: + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + %c = alloca double, align 8 + store double %a, ptr %a.addr, align 8 + store double %b, ptr %b.addr, align 8 + store double 0.000000e+00, ptr %c, align 8 + call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr nonnull @1, i32 3, ptr nonnull @_Z10teams__pardd.omp_outlined, ptr nonnull %c, ptr nonnull %a.addr, ptr nonnull %b.addr) + %0 = load double, ptr %c, align 8 + ret double %0 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) + +; Function Attrs: alwaysinline norecurse nounwind uwtable +define internal void @_Z10teams__pardd.omp_outlined(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr noundef nonnull align 8 dereferenceable(8) %c, ptr noundef nonnull align 8 dereferenceable(8) %a, ptr noundef nonnull align 8 dereferenceable(8) %b) { +entry: + tail call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 3, ptr nonnull @_Z10teams__pardd.omp_outlined.omp_outlined, ptr nonnull %c, ptr nonnull %a, ptr nonnull %b) + ret void +} + +; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +define internal void @_Z10teams__pardd.omp_outlined.omp_outlined(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture noundef nonnull writeonly align 8 dereferenceable(8) initializes((0, 8)) %c, ptr nocapture noundef nonnull readonly align 8 dereferenceable(8) %a, ptr nocapture noundef nonnull readonly align 8 dereferenceable(8) %b) { +entry: + %0 = load double, ptr %a, align 8 + %1 = load double, ptr %b, align 8 + %add = fadd double %0, %1 + store double %add, ptr %c, align 8 + ret void +} + +; Function Attrs: nounwind +declare !callback !10 void @__kmpc_fork_call(ptr, i32, ptr, ...) local_unnamed_addr + +; Function Attrs: nounwind +declare !callback !10 void @__kmpc_fork_teams(ptr, i32, ptr, ...) local_unnamed_addr + +; Function Attrs: mustprogress norecurse uwtable +define dso_local noundef i32 @main() local_unnamed_addr { +entry: + %call = tail call noundef ptr @_Z25__raptor_truncate_op_funcIFdddEEPT_S2_iiii(ptr noundef nonnull @_Z10teams__pardd, i32 noundef 64, i32 noundef 1, i32 noundef 10, i32 noundef 8) + %call1 = tail call noundef double %call(double noundef 1.000000e+00, double noundef 1.000000e+03) + ret i32 0 +} + +declare noundef ptr @_Z25__raptor_truncate_op_funcIFdddEEPT_S2_iiii(ptr noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef) local_unnamed_addr + + +!10 = !{!11} +!11 = !{i64 2, i64 -1, i64 -1, i1 true} diff --git a/test/Unit/Truncate/select.ll b/test/Unit/Truncate/select.ll index 620d277a..f76a8fa8 100644 --- a/test/Unit/Truncate/select.ll +++ b/test/Unit/Truncate/select.ll @@ -1,5 +1,4 @@ -; RUN: if [ %llvmver -lt 16 ]; then %opt < %s %loadRaptor -raptor -S | FileCheck %s; fi -; RUN: %opt < %s %newLoadRaptor -passes="raptor" -S | FileCheck %s +; RUN: %opt %s %newLoadRaptor -passes="raptor" -S | FileCheck %s define double @f(double %x, double %y, i1 %cond) { %res = select i1 %cond, double %x, double %y @@ -23,8 +22,5 @@ entry: ret double %res } -; CHECK: define internal double @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f( - -; CHECK: define internal double @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_f( - -; TODO forgot what the intention of this test is +; CHECK: define internal double @__raptor_done_truncate +; TODO diff --git a/test/Unit/Truncate/simple.ll b/test/Unit/Truncate/simple.ll index 717f8dd4..4cd822cf 100644 --- a/test/Unit/Truncate/simple.ll +++ b/test/Unit/Truncate/simple.ll @@ -42,23 +42,23 @@ entry: ; CHECK: define void @tester(ptr %data) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f(ptr %data) +; CHECK-NEXT: call void @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f(ptr %data) ; CHECK-NEXT: ret void ; CHECK-NEXT: } ; CHECK: define void @tester_op(ptr %data) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_f(ptr %data) +; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f(ptr %data) ; CHECK-NEXT: ret void ; CHECK-NEXT: } ; CHECK: define void @tester_op_mpfr(ptr %data) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_mpfr_8_23_f(ptr %data) +; CHECK-NEXT: call void @__raptor_done_truncate_op_func_ieee_64_to_mpfr_8_23_1_1_0_f(ptr %data) ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK: define internal void @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_f(ptr %x) { +; CHECK: define internal void @__raptor_done_truncate_mem_func_ieee_64_to_mpfr_8_23_0_0_0_f(ptr %x) { ; CHECK-NEXT: %y = load double, ptr %x, align 8 ; CHECK-NEXT: %m = call double @__raptor_fprt_ieee_64_binop_fmul(double %y, double %y, i64 8, i64 23, i64 1, ptr @0, ptr null) ; CHECK-NEXT: store double %m, ptr %x, align 8 @@ -71,7 +71,7 @@ entry: ; CHECK-NEXT: ret double %2 ; CHECK-NEXT: } -; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_f(ptr %x) { +; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_ieee_32_1_1_0_f(ptr %x) { ; CHECK-NEXT: %y = load double, ptr %x, align 8 ; CHECK-NEXT: %raptor_trunc = fptrunc double %y to float ; CHECK-NEXT: %raptor_trunc1 = fptrunc double %y to float @@ -81,7 +81,7 @@ entry: ; CHECK-NEXT: ret void ; CHECK-NEXT: } -; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_mpfr_8_23_f(ptr %x) { +; CHECK: define internal void @__raptor_done_truncate_op_func_ieee_64_to_mpfr_8_23_1_1_0_f(ptr %x) { ; CHECK-NEXT: call void @__raptor_fprt_ieee_64_trunc_change(i64 1, i64 8, i64 23, i64 2, ptr @0, ptr null) ; CHECK-NEXT: %1 = call ptr @__raptor_fprt_ieee_64_get_scratch(i64 8, i64 23, i64 2, ptr @0, ptr null) ; CHECK-NEXT: %y = load double, ptr %x, align 8