diff --git a/build_cmake/CMakeLists.txt b/build_cmake/CMakeLists.txt index 2cf245d5..7ee7e752 100644 --- a/build_cmake/CMakeLists.txt +++ b/build_cmake/CMakeLists.txt @@ -124,6 +124,8 @@ set(CodeGenTest_SRC ../tests/ExternJumpTest.h ../tests/FpIntMixTest.cpp ../tests/FpuTest.cpp + ../tests/FloatFusedMultiAddTest.cpp + ../tests/DoubleFusedMultiAddTest.cpp ../tests/HugeJumpTest.cpp ../tests/HugeJumpTestLiteral.cpp ../tests/HugeJumpTestLiteral.h diff --git a/include/Jitter.h b/include/Jitter.h index 7a1dd06d..6b2bc657 100644 --- a/include/Jitter.h +++ b/include/Jitter.h @@ -152,6 +152,8 @@ namespace Jitter void FP_Max(); void FP_Min(); void FP_Mul(); + void FP_MulAdd(); + void FP_MulSub(); void FP_Div(); void FP_Cmp(CONDITION); void FP_Neg(); @@ -228,6 +230,8 @@ namespace Jitter void MD_UnpackUpperHW(); void MD_UnpackUpperWD(); void MD_Xor(); + void MD_MulAdd(); + void MD_MulSub(); CCodeGen* GetCodeGen(); @@ -285,6 +289,7 @@ namespace Jitter void InsertUnaryStatement(Jitter::OPERATION); void InsertBinaryStatement(Jitter::OPERATION); + void InsertTernaryStatement(Jitter::OPERATION); void InsertUnaryMdStatement(Jitter::OPERATION); void InsertBinaryMdStatement(Jitter::OPERATION); diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 803632fa..a633b6dc 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -107,6 +107,16 @@ namespace Jitter static OpEdAvxType OpEdAvx() { return &CX86Assembler::VmulssEd; } }; + struct FPUOP_MULADD213 : public FPUOP_BASE + { + static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfmadd213ssVo; } + }; + + struct FPUOP_MULSUB213 : public FPUOP_BASE + { + static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfnmadd213ssVo; } + }; + struct FPUOP_DIV : public FPUOP_BASE { static OpEdType OpEd() { return &CX86Assembler::DivssEd; } @@ -390,6 +400,16 @@ namespace Jitter static OpVoType OpVoAvx() { return &CX86Assembler::Vcvtdq2psVo; } }; + struct MDOP_MULADD213 : public MDOP_BASE + { + static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfmadd213psVo; } + }; + + struct MDOP_MULSUB213 : public MDOP_BASE + { + static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfnmadd213psVo; } + }; + //MDOP SHIFT ----------------------------------------------------- struct MDOP_SHIFT_BASE { @@ -722,6 +742,7 @@ namespace Jitter //FPUOP AVX template void Emit_Fpu_Avx_MemMem(const STATEMENT&); template void Emit_Fpu_Avx_MemMemMem(const STATEMENT&); + template void Emit_Fpu_Avx_MemMemMemMem(const STATEMENT&); void Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT&); void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&); @@ -733,6 +754,7 @@ namespace Jitter template void Emit_Md_Avx_VarVar(const STATEMENT&); template void Emit_Md_Avx_VarVarVar(const STATEMENT&); template void Emit_Md_Avx_VarVarVarRev(const STATEMENT&); + template void Emit_Md_Avx_VarVarVarVar(const STATEMENT&); template void Emit_Md_Avx_Shift_VarVarCst(const STATEMENT&); void Emit_Md_Avx_Mov_RegVar(const STATEMENT&); diff --git a/include/Jitter_Statement.h b/include/Jitter_Statement.h index ddba03d9..d069ebbe 100644 --- a/include/Jitter_Statement.h +++ b/include/Jitter_Statement.h @@ -136,9 +136,14 @@ namespace Jitter OP_MD_CMPLT_S, OP_MD_CMPGT_S, + OP_MD_MULADD, + OP_MD_MULSUB, + OP_FP_ADD, OP_FP_SUB, OP_FP_MUL, + OP_FP_MULADD, + OP_FP_MULSUB, OP_FP_DIV, OP_FP_SQRT, OP_FP_RSQRT, diff --git a/include/X86Assembler.h b/include/X86Assembler.h index 7023372a..f2870c57 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -463,6 +463,11 @@ class CX86Assembler void VcmppsVo(XMMREGISTER, XMMREGISTER, const CAddress&, SSE_CMP_TYPE); void VblendpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); + void Vfmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfnmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfnmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VshufpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); private: diff --git a/src/Jitter.cpp b/src/Jitter.cpp index fac3e7b3..b8a9b46d 100644 --- a/src/Jitter.cpp +++ b/src/Jitter.cpp @@ -987,6 +987,36 @@ void CJitter::FP_Mul() m_shadow.Push(tempSym); } +void CJitter::FP_MulAdd() +{ + SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_FP_MULADD; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + +void CJitter::FP_MulSub() +{ + SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_FP_MULSUB; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + void CJitter::FP_Div() { SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); @@ -1585,6 +1615,36 @@ void CJitter::MD_ToSingle() InsertUnaryMdStatement(OP_MD_TOSINGLE); } +void CJitter::MD_MulAdd() +{ + SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_MD_MULADD; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + +void CJitter::MD_MulSub() +{ + SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_MD_MULSUB; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + //Generic Statement Inserters //------------------------------------------------ @@ -1615,6 +1675,21 @@ void CJitter::InsertBinaryStatement(Jitter::OPERATION operation) m_shadow.Push(tempSym); } +void CJitter::InsertTernaryStatement(Jitter::OPERATION operation) +{ + auto tempSym = MakeSymbol(SYM_TEMPORARY, m_nextTemporary++); + + STATEMENT statement; + statement.op = operation; + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + void CJitter::InsertUnaryMdStatement(Jitter::OPERATION operation) { auto tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 318e172b..316f3541 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -27,6 +27,23 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement) m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); } +template +void CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + auto src2 = statement.src2->GetSymbol().get(); + auto src3 = statement.src3->GetSymbol().get(); + + auto dstRegister = CX86Assembler::xMM0; + auto src2Register = CX86Assembler::xMM1; + + m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1)); + m_assembler.VmovssEd(src2Register, MakeMemoryFpSingleSymbolAddress(src2)); + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src2Register, MakeMemoryFpSingleSymbolAddress(src3)); + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); +} + void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); @@ -101,6 +118,8 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement) CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { + { OP_FP_MULADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem }, + { OP_FP_MULSUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem }, { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, diff --git a/src/Jitter_CodeGen_x86_Md_Avx.cpp b/src/Jitter_CodeGen_x86_Md_Avx.cpp index 316af96f..6cc2add0 100644 --- a/src/Jitter_CodeGen_x86_Md_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Md_Avx.cpp @@ -45,6 +45,27 @@ void CCodeGen_x86::Emit_Md_Avx_VarVarVarRev(const STATEMENT& statement) CommitSymbolRegisterMdAvx(dst, dstRegister); } +template +void CCodeGen_x86::Emit_Md_Avx_VarVarVarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + auto src2 = statement.src2->GetSymbol().get(); + auto src3 = statement.src3->GetSymbol().get(); + + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseMdAvx(src1, CX86Assembler::xMM1); + auto src2Register = PrepareSymbolRegisterUseMdAvx(src2, CX86Assembler::xMM2); + + if(dstRegister != src1Register) + { + m_assembler.VmovapsVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + } + + ((m_assembler).*(MDOP::OpVoAvx()))(dstRegister, src2Register, MakeVariable128SymbolAddress(src3)); + CommitSymbolRegisterMdAvx(dst, dstRegister); +} + template void CCodeGen_x86::Emit_Md_Avx_Shift_VarVarCst(const STATEMENT& statement) { @@ -620,6 +641,9 @@ CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdAvxConstMatchers[] = { OP_MD_TOWORD_TRUNCATE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar }, { OP_MD_TOSINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar }, + {OP_MD_MULADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar}, + {OP_MD_MULSUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar}, + { OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_VARIABLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarVar }, { OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_CONSTANT, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarCst }, diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index abee2ba0..9046c78c 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -448,6 +448,26 @@ void CX86Assembler::VblendpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress WriteByte(mask); } +void CX86Assembler::Vfmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA9, dst, src1, src2); +} + +void CX86Assembler::Vfnmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAD, dst, src1, src2); +} + +void CX86Assembler::Vfmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA8, dst, src1, src2); +} + +void CX86Assembler::Vfnmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAC, dst, src1, src2); +} + void CX86Assembler::VshufpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2, uint8 shuffleByte) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0xC6, dst, src1, src2); diff --git a/tests/DoubleFusedMultiAddTest.cpp b/tests/DoubleFusedMultiAddTest.cpp new file mode 100644 index 00000000..bf1adbb2 --- /dev/null +++ b/tests/DoubleFusedMultiAddTest.cpp @@ -0,0 +1,81 @@ +#include "DoubleFusedMultiAddTest.h" +#include "MemStream.h" + +CDoubleFusedMultiAddTest::CDoubleFusedMultiAddTest() +{ +} + +CDoubleFusedMultiAddTest::~CDoubleFusedMultiAddTest() +{ +} + +void CDoubleFusedMultiAddTest::Compile(Jitter::CJitter& jitter) +{ + Framework::CMemStream codeStream; + jitter.SetStream(&codeStream); + + jitter.Begin(); + { + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number1)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res1)); + + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res2)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res3)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number1)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res4)); + + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res5)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res6)); + } + jitter.End(); + + m_function = CMemoryFunction(codeStream.GetBuffer(), codeStream.GetSize()); +} + +void CDoubleFusedMultiAddTest::Run() +{ + memset(&m_context, 0, sizeof(CONTEXT)); + m_context.number1[1] = 1 ; + m_context.number2[1] = 2 ; + m_context.number3[1] = 4 ; + m_context.number4[1] = 16; + m_context.number1[2] = 1 ; + m_context.number2[2] = 2 ; + m_context.number3[2] = 4 ; + m_context.number4[2] = 16; + m_function(&m_context); + for(int i = 0; i < 4; ++i) + { + TEST_VERIFY(m_context.res1[i] == m_context.res4[i]); + TEST_VERIFY(m_context.res2[i] == m_context.res5[i]); + TEST_VERIFY(m_context.res3[i] == m_context.res6[i]); + } +} diff --git a/tests/DoubleFusedMultiAddTest.h b/tests/DoubleFusedMultiAddTest.h new file mode 100644 index 00000000..d2ea4d48 --- /dev/null +++ b/tests/DoubleFusedMultiAddTest.h @@ -0,0 +1,37 @@ +#pragma once + +#include "Test.h" +#include "MemoryFunction.h" +#include "Align16.h" + +class CDoubleFusedMultiAddTest : public CTest +{ +public: + CDoubleFusedMultiAddTest(); + virtual ~CDoubleFusedMultiAddTest(); + + void Compile(Jitter::CJitter&) override; + void Run() override; + +private: + struct CONTEXT + { + ALIGN16 + + uint8 number1[16]; + uint8 number2[16]; + uint8 number3[16]; + uint8 number4[16]; + + + uint8 res1[16]; + uint8 res2[16]; + uint8 res3[16]; + uint8 res4[16]; + uint8 res5[16]; + uint8 res6[16]; + }; + + CONTEXT m_context; + CMemoryFunction m_function; +}; diff --git a/tests/FloatFusedMultiAddTest.cpp b/tests/FloatFusedMultiAddTest.cpp new file mode 100644 index 00000000..5d925f10 --- /dev/null +++ b/tests/FloatFusedMultiAddTest.cpp @@ -0,0 +1,117 @@ +#include "FloatFusedMultiAddTest.h" +#include "MemStream.h" + +CFloatFusedMultiAddTest::CFloatFusedMultiAddTest() +{ +} + +CFloatFusedMultiAddTest::~CFloatFusedMultiAddTest() +{ +} + +void CFloatFusedMultiAddTest::Compile(Jitter::CJitter& jitter) +{ + Framework::CMemStream codeStream; + jitter.SetStream(&codeStream); + + jitter.Begin(); + { + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res1)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res2)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res3)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res4)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res5)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res6)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res1_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res2_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res3_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res4_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res5_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res6_org)); + } + jitter.End(); + + m_function = CMemoryFunction(codeStream.GetBuffer(), codeStream.GetSize()); +} + +void CFloatFusedMultiAddTest::Run() +{ + memset(&m_context, 0, sizeof(CONTEXT)); + m_context.number1 = 1.0f; + m_context.number2 = 2.0f; + m_context.number3 = -4.0f; + m_context.number4 = 16.0f; + m_function(&m_context); + TEST_VERIFY(m_context.res1 == m_context.res1_org); + TEST_VERIFY(m_context.res2 == m_context.res2_org); + TEST_VERIFY(m_context.res3 == m_context.res3_org); + + TEST_VERIFY(m_context.res4 == m_context.res4_org); + TEST_VERIFY(m_context.res5 == m_context.res5_org); + TEST_VERIFY(m_context.res6 == m_context.res6_org); +} diff --git a/tests/FloatFusedMultiAddTest.h b/tests/FloatFusedMultiAddTest.h new file mode 100644 index 00000000..b5cb93d9 --- /dev/null +++ b/tests/FloatFusedMultiAddTest.h @@ -0,0 +1,45 @@ +#pragma once + +#include "Test.h" +#include "MemoryFunction.h" +#include "Align16.h" + +class CFloatFusedMultiAddTest : public CTest +{ +public: + CFloatFusedMultiAddTest(); + virtual ~CFloatFusedMultiAddTest(); + + void Compile(Jitter::CJitter&) override; + void Run() override; + +private: + struct CONTEXT + { + ALIGN16 + + float number1; + float number2; + float number3; + float number4; + + + float res1; + float res2; + float res3; + float res4; + float res5; + float res6; + + + float res1_org; + float res2_org; + float res3_org; + float res4_org; + float res5_org; + float res6_org; + }; + + CONTEXT m_context; + CMemoryFunction m_function; +}; diff --git a/tests/Main.cpp b/tests/Main.cpp index 1a38743d..14c873e8 100644 --- a/tests/Main.cpp +++ b/tests/Main.cpp @@ -47,122 +47,125 @@ #include "LzcTest.h" #include "NestedIfTest.h" #include "ExternJumpTest.h" +#include "FloatFusedMultiAddTest.h" +#include "DoubleFusedMultiAddTest.h" -typedef std::function TestFactoryFunction; +typedef std::function TestFactoryFunction; static const TestFactoryFunction s_factories[] = -{ - [] () { return new CCompareTest(); }, - [] () { return new CRegAllocTest(); }, - [] () { return new CRandomAluTest(true); }, - [] () { return new CRandomAluTest(false); }, - [] () { return new CRandomAluTest2(true); }, - [] () { return new CRandomAluTest2(false); }, - [] () { return new CRandomAluTest3(true); }, - [] () { return new CRandomAluTest3(false); }, - [] () { return new CShiftTest(0); }, - [] () { return new CShiftTest(12); }, - [] () { return new CShiftTest(31); }, - [] () { return new CShiftTest(32); }, - [] () { return new CShiftTest(44); }, - [] () { return new CCrc32Test("Hello World!", 0x67FCDACC); }, - [] () { return new CCursorTest(); }, - [] () { return new CLogicTest(0, false, ~0, false); }, - [] () { return new CLogicTest(0, false, ~0, true); }, - [] () { return new CLogicTest(0, true, ~0, false); }, - [] () { return new CLogicTest(0, true, ~0, true); }, - [] () { return new CLogicTest(0x01234567, false, 0x8000, true); }, - [] () { return new CLogicTest(0x01234567, false, ~0x8000, true); }, - [] () { return new CLogicTest(0x89ABCDEF, false, 0x01234567, true); }, - [] () { return new CMultTest(true); }, - [] () { return new CMultTest(false); }, - [] () { return new CDivTest(true); }, - [] () { return new CDivTest(false); }, - [] () { return new CMemAccessTest(); }, - [] () { return new CMemAccessIdxTest(); }, - [] () { return new CMemAccess8Test(); }, - [] () { return new CMemAccess16Test(); }, - [] () { return new CMemAccessRefTest(); }, - [] () { return new CHugeJumpTest(); }, - [] () { return new CHugeJumpTestLiteral(); }, - [] () { return new CNestedIfTest(); }, - [] () { return new CLzcTest(); }, - [] () { return new CAliasTest(); }, - [] () { return new CAliasTest2(); }, - [] () { return new CFpuTest(); }, - [] () { return new CFpIntMixTest(); }, - [] () { return new CSimpleMdTest(); }, - [] () { return new CMdTest(); }, - [] () { return new CMdLogicTest(); }, - [] () { return new CMdAddTest(); }, - [] () { return new CMdSubTest(); }, - [] () { return new CMdUnpackTest(); }, - [] () { return new CMdCmpTest(); }, - [] () { return new CMdMinMaxTest(); }, - [] () { return new CMdFpTest(); }, - [] () { return new CMdFpFlagTest(); }, - [] () { return new CMdCallTest(); }, - [] () { return new CMdMemAccessTest(); }, - [] () { return new CMdManipTest(); }, - [] () { return new CMdShiftTest(0); }, - [] () { return new CMdShiftTest(15); }, - [] () { return new CMdShiftTest(16); }, - [] () { return new CMdShiftTest(31); }, - [] () { return new CMdShiftTest(32); }, - [] () { return new CMdShiftTest(38); }, - [] () { return new CAlu64Test(); }, - //negative / positive - [] () { return new CConditionTest(false, 0xFFFFFFFE, 0xFFFFFFFE); }, - [] () { return new CConditionTest(false, 0x00000002, 0xFFFFFFFE); }, - [] () { return new CConditionTest(false, 0xFFFFFFFE, 0x00000002); }, - [] () { return new CConditionTest(false, 0x00000002, 0x00000002); }, - [] () { return new CConditionTest(true, 0xFFFFFFFE, 0xFFFFFFFE); }, - [] () { return new CConditionTest(true, 0x00000002, 0xFFFFFFFE); }, - [] () { return new CConditionTest(true, 0xFFFFFFFE, 0x00000002); }, - [] () { return new CConditionTest(true, 0x00000002, 0x00000002); }, - //negative / negative - []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFFF0); }, - []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFFF0); }, - []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFF00); }, - []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFF00); }, - []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFFF0); }, - []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFFF0); }, - []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFF00); }, - []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFF00); }, - //positive / positive - []() { return new CConditionTest(false, 0x0000000F, 0x0000000F); }, - []() { return new CConditionTest(false, 0x000000FF, 0x0000000F); }, - []() { return new CConditionTest(false, 0x0000000F, 0x000000FF); }, - []() { return new CConditionTest(false, 0x000000FF, 0x000000FF); }, - []() { return new CConditionTest(true, 0x0000000F, 0x0000000F); }, - []() { return new CConditionTest(true, 0x000000FF, 0x0000000F); }, - []() { return new CConditionTest(true, 0x0000000F, 0x000000FF); }, - []() { return new CConditionTest(true, 0x000000FF, 0x000000FF); }, - [] () { return new CCmp64Test(false, false, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(false, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(true, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(false, false, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(false, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(true, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(false, false, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(false, true, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(true , true, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(false, true, 0, 0x80ULL); }, - [] () { return new CCmp64Test(false, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, - [] () { return new CCmp64Test(true, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, - [] () { return new CLogic64Test(); }, - [] () { return new CShift64Test(0); }, - [] () { return new CShift64Test(12); }, - [] () { return new CShift64Test(32); }, - [] () { return new CShift64Test(52); }, - [] () { return new CShift64Test(63); }, - [] () { return new CShift64Test(64); }, - [] () { return new CShift64Test(76); }, - [] () { return new CMerge64Test(); }, - [] () { return new CMemAccess64Test(); }, - [] () { return new CCall64Test(); }, - [] () { return new CExternJumpTest(); } -}; + { + []() { return new CFloatFusedMultiAddTest(); }, + []() { return new CDoubleFusedMultiAddTest(); }, + // []() { return new CCompareTest(); }, + // []() { return new CRegAllocTest(); }, + // []() { return new CRandomAluTest(true); }, + // []() { return new CRandomAluTest(false); }, + // []() { return new CRandomAluTest2(true); }, + // []() { return new CRandomAluTest2(false); }, + // []() { return new CRandomAluTest3(true); }, + // []() { return new CRandomAluTest3(false); }, + // []() { return new CShiftTest(0); }, + // []() { return new CShiftTest(12); }, + // []() { return new CShiftTest(31); }, + // []() { return new CShiftTest(32); }, + // []() { return new CShiftTest(44); }, + // []() { return new CCrc32Test("Hello World!", 0x67FCDACC); }, + // []() { return new CCursorTest(); }, + // []() { return new CLogicTest(0, false, ~0, false); }, + // []() { return new CLogicTest(0, false, ~0, true); }, + // []() { return new CLogicTest(0, true, ~0, false); }, + // []() { return new CLogicTest(0, true, ~0, true); }, + // []() { return new CLogicTest(0x01234567, false, 0x8000, true); }, + // []() { return new CLogicTest(0x01234567, false, ~0x8000, true); }, + // []() { return new CLogicTest(0x89ABCDEF, false, 0x01234567, true); }, + // []() { return new CMultTest(true); }, + // []() { return new CMultTest(false); }, + // []() { return new CDivTest(true); }, + // []() { return new CDivTest(false); }, + // []() { return new CMemAccessTest(); }, + // []() { return new CMemAccessIdxTest(); }, + // []() { return new CMemAccess8Test(); }, + // []() { return new CMemAccess16Test(); }, + // []() { return new CMemAccessRefTest(); }, + // []() { return new CHugeJumpTest(); }, + // []() { return new CHugeJumpTestLiteral(); }, + // []() { return new CNestedIfTest(); }, + // []() { return new CLzcTest(); }, + // []() { return new CAliasTest(); }, + // []() { return new CAliasTest2(); }, + // []() { return new CFpuTest(); }, + // []() { return new CFpIntMixTest(); }, + // []() { return new CSimpleMdTest(); }, + // []() { return new CMdTest(); }, + // []() { return new CMdLogicTest(); }, + // []() { return new CMdAddTest(); }, + // []() { return new CMdSubTest(); }, + // []() { return new CMdUnpackTest(); }, + // []() { return new CMdCmpTest(); }, + // []() { return new CMdMinMaxTest(); }, + // []() { return new CMdFpTest(); }, + // []() { return new CMdFpFlagTest(); }, + // []() { return new CMdCallTest(); }, + // []() { return new CMdMemAccessTest(); }, + // []() { return new CMdManipTest(); }, + // []() { return new CMdShiftTest(0); }, + // []() { return new CMdShiftTest(15); }, + // []() { return new CMdShiftTest(16); }, + // []() { return new CMdShiftTest(31); }, + // []() { return new CMdShiftTest(32); }, + // []() { return new CMdShiftTest(38); }, + // []() { return new CAlu64Test(); }, + // //negative / positive + // []() { return new CConditionTest(false, 0xFFFFFFFE, 0xFFFFFFFE); }, + // []() { return new CConditionTest(false, 0x00000002, 0xFFFFFFFE); }, + // []() { return new CConditionTest(false, 0xFFFFFFFE, 0x00000002); }, + // []() { return new CConditionTest(false, 0x00000002, 0x00000002); }, + // []() { return new CConditionTest(true, 0xFFFFFFFE, 0xFFFFFFFE); }, + // []() { return new CConditionTest(true, 0x00000002, 0xFFFFFFFE); }, + // []() { return new CConditionTest(true, 0xFFFFFFFE, 0x00000002); }, + // []() { return new CConditionTest(true, 0x00000002, 0x00000002); }, + // //negative / negative + // []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFFF0); }, + // []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFFF0); }, + // []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFF00); }, + // []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFF00); }, + // []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFFF0); }, + // []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFFF0); }, + // []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFF00); }, + // []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFF00); }, + // //positive / positive + // []() { return new CConditionTest(false, 0x0000000F, 0x0000000F); }, + // []() { return new CConditionTest(false, 0x000000FF, 0x0000000F); }, + // []() { return new CConditionTest(false, 0x0000000F, 0x000000FF); }, + // []() { return new CConditionTest(false, 0x000000FF, 0x000000FF); }, + // []() { return new CConditionTest(true, 0x0000000F, 0x0000000F); }, + // []() { return new CConditionTest(true, 0x000000FF, 0x0000000F); }, + // []() { return new CConditionTest(true, 0x0000000F, 0x000000FF); }, + // []() { return new CConditionTest(true, 0x000000FF, 0x000000FF); }, + // []() { return new CCmp64Test(false, false, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(false, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(true, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(false, false, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(false, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(true, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(false, false, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(false, true, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(true, true, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(false, true, 0, 0x80ULL); }, + // []() { return new CCmp64Test(false, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, + // []() { return new CCmp64Test(true, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, + // []() { return new CLogic64Test(); }, + // []() { return new CShift64Test(0); }, + // []() { return new CShift64Test(12); }, + // []() { return new CShift64Test(32); }, + // []() { return new CShift64Test(52); }, + // []() { return new CShift64Test(63); }, + // []() { return new CShift64Test(64); }, + // []() { return new CShift64Test(76); }, + // []() { return new CMerge64Test(); }, + // []() { return new CMemAccess64Test(); }, + // []() { return new CCall64Test(); }, + []() { return new CExternJumpTest(); }}; int main(int argc, const char** argv) {