From 663905eaca2859c3119c0ad39ec0ca71c83c8b13 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 2 Jun 2020 19:54:05 +0100 Subject: [PATCH 1/6] add fused multiply add op --- include/Jitter.h | 2 ++ include/Jitter_Statement.h | 1 + src/Jitter.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/include/Jitter.h b/include/Jitter.h index 7a1dd06d..5ea97160 100644 --- a/include/Jitter.h +++ b/include/Jitter.h @@ -152,6 +152,7 @@ namespace Jitter void FP_Max(); void FP_Min(); void FP_Mul(); + void FP_MulAdd(); void FP_Div(); void FP_Cmp(CONDITION); void FP_Neg(); @@ -285,6 +286,7 @@ namespace Jitter void InsertUnaryStatement(Jitter::OPERATION); void InsertBinaryStatement(Jitter::OPERATION); + void InsertTernaryStatement(Jitter::OPERATION); void InsertUnaryMdStatement(Jitter::OPERATION); void InsertBinaryMdStatement(Jitter::OPERATION); diff --git a/include/Jitter_Statement.h b/include/Jitter_Statement.h index ddba03d9..46a4c769 100644 --- a/include/Jitter_Statement.h +++ b/include/Jitter_Statement.h @@ -139,6 +139,7 @@ namespace Jitter OP_FP_ADD, OP_FP_SUB, OP_FP_MUL, + OP_FP_MULADD, OP_FP_DIV, OP_FP_SQRT, OP_FP_RSQRT, diff --git a/src/Jitter.cpp b/src/Jitter.cpp index fac3e7b3..67914efc 100644 --- a/src/Jitter.cpp +++ b/src/Jitter.cpp @@ -987,6 +987,21 @@ void CJitter::FP_Mul() m_shadow.Push(tempSym); } +void CJitter::FP_MulAdd() +{ + SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_FP_MULADD; + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + void CJitter::FP_Div() { SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); @@ -1615,6 +1630,21 @@ void CJitter::InsertBinaryStatement(Jitter::OPERATION operation) m_shadow.Push(tempSym); } +void CJitter::InsertTernaryStatement(Jitter::OPERATION operation) +{ + auto tempSym = MakeSymbol(SYM_TEMPORARY, m_nextTemporary++); + + STATEMENT statement; + statement.op = operation; + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + void CJitter::InsertUnaryMdStatement(Jitter::OPERATION operation) { auto tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); From 02b79e359e31550c5bf26fccf55bad680225c1d8 Mon Sep 17 00:00:00 2001 From: Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 28 Dec 2021 00:06:07 +0000 Subject: [PATCH 2/6] implement FPU fused multiply-add and negated-muliply-add x86 --- include/Jitter.h | 1 + include/Jitter_CodeGen_x86.h | 11 +++++++++++ include/Jitter_Statement.h | 1 + include/X86Assembler.h | 3 +++ src/Jitter.cpp | 17 ++++++++++++++++- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 19 +++++++++++++++++++ src/X86Assembler_Avx.cpp | 10 ++++++++++ 7 files changed, 61 insertions(+), 1 deletion(-) diff --git a/include/Jitter.h b/include/Jitter.h index 5ea97160..27c61347 100644 --- a/include/Jitter.h +++ b/include/Jitter.h @@ -153,6 +153,7 @@ namespace Jitter void FP_Min(); void FP_Mul(); void FP_MulAdd(); + void FP_MulSub(); void FP_Div(); void FP_Cmp(CONDITION); void FP_Neg(); diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 803632fa..20ff9b89 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -107,6 +107,16 @@ namespace Jitter static OpEdAvxType OpEdAvx() { return &CX86Assembler::VmulssEd; } }; + struct FPUOP_MULADD213 : public FPUOP_BASE + { + static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfmadd213ssVo; } + }; + + struct FPUOP_MULSUB213 : public FPUOP_BASE + { + static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfnmadd213ssVo; } + }; + struct FPUOP_DIV : public FPUOP_BASE { static OpEdType OpEd() { return &CX86Assembler::DivssEd; } @@ -722,6 +732,7 @@ namespace Jitter //FPUOP AVX template void Emit_Fpu_Avx_MemMem(const STATEMENT&); template void Emit_Fpu_Avx_MemMemMem(const STATEMENT&); + template void Emit_Fpu_Avx_MemMemMemMem(const STATEMENT&); void Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT&); void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&); diff --git a/include/Jitter_Statement.h b/include/Jitter_Statement.h index 46a4c769..3ab24dab 100644 --- a/include/Jitter_Statement.h +++ b/include/Jitter_Statement.h @@ -140,6 +140,7 @@ namespace Jitter OP_FP_SUB, OP_FP_MUL, OP_FP_MULADD, + OP_FP_MULSUB, OP_FP_DIV, OP_FP_SQRT, OP_FP_RSQRT, diff --git a/include/X86Assembler.h b/include/X86Assembler.h index 7023372a..637edb6b 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -463,6 +463,9 @@ class CX86Assembler void VcmppsVo(XMMREGISTER, XMMREGISTER, const CAddress&, SSE_CMP_TYPE); void VblendpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); + void Vfmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfnmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VshufpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); private: diff --git a/src/Jitter.cpp b/src/Jitter.cpp index 67914efc..0acfb3aa 100644 --- a/src/Jitter.cpp +++ b/src/Jitter.cpp @@ -993,9 +993,24 @@ void CJitter::FP_MulAdd() STATEMENT statement; statement.op = OP_FP_MULADD; - statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.src1 = MakeSymbolRef(m_shadow.Pull()); statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + +void CJitter::FP_MulSub() +{ + SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_FP_MULSUB; statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); statement.dst = MakeSymbolRef(tempSym); InsertStatement(statement); diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 318e172b..316f3541 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -27,6 +27,23 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement) m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); } +template +void CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + auto src2 = statement.src2->GetSymbol().get(); + auto src3 = statement.src3->GetSymbol().get(); + + auto dstRegister = CX86Assembler::xMM0; + auto src2Register = CX86Assembler::xMM1; + + m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1)); + m_assembler.VmovssEd(src2Register, MakeMemoryFpSingleSymbolAddress(src2)); + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src2Register, MakeMemoryFpSingleSymbolAddress(src3)); + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); +} + void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); @@ -101,6 +118,8 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement) CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { + { OP_FP_MULADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem }, + { OP_FP_MULSUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem }, { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index abee2ba0..98f3f2cf 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -448,6 +448,16 @@ void CX86Assembler::VblendpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress WriteByte(mask); } +void CX86Assembler::Vfmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA9, dst, src1, src2); +} + +void CX86Assembler::Vfnmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAD, dst, src1, src2); +} + void CX86Assembler::VshufpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2, uint8 shuffleByte) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0xC6, dst, src1, src2); From f7fc559486a97dc9548a8957a36c583280dcb80d Mon Sep 17 00:00:00 2001 From: Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 28 Dec 2021 00:06:37 +0000 Subject: [PATCH 3/6] add MD muladd/mulsub jitter op --- include/Jitter.h | 2 ++ include/Jitter_Statement.h | 3 +++ src/Jitter.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/include/Jitter.h b/include/Jitter.h index 27c61347..6b2bc657 100644 --- a/include/Jitter.h +++ b/include/Jitter.h @@ -230,6 +230,8 @@ namespace Jitter void MD_UnpackUpperHW(); void MD_UnpackUpperWD(); void MD_Xor(); + void MD_MulAdd(); + void MD_MulSub(); CCodeGen* GetCodeGen(); diff --git a/include/Jitter_Statement.h b/include/Jitter_Statement.h index 3ab24dab..d069ebbe 100644 --- a/include/Jitter_Statement.h +++ b/include/Jitter_Statement.h @@ -136,6 +136,9 @@ namespace Jitter OP_MD_CMPLT_S, OP_MD_CMPGT_S, + OP_MD_MULADD, + OP_MD_MULSUB, + OP_FP_ADD, OP_FP_SUB, OP_FP_MUL, diff --git a/src/Jitter.cpp b/src/Jitter.cpp index 0acfb3aa..b8a9b46d 100644 --- a/src/Jitter.cpp +++ b/src/Jitter.cpp @@ -1615,6 +1615,36 @@ void CJitter::MD_ToSingle() InsertUnaryMdStatement(OP_MD_TOSINGLE); } +void CJitter::MD_MulAdd() +{ + SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_MD_MULADD; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + +void CJitter::MD_MulSub() +{ + SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++); + + STATEMENT statement; + statement.op = OP_MD_MULSUB; + statement.src1 = MakeSymbolRef(m_shadow.Pull()); + statement.src2 = MakeSymbolRef(m_shadow.Pull()); + statement.src3 = MakeSymbolRef(m_shadow.Pull()); + statement.dst = MakeSymbolRef(tempSym); + InsertStatement(statement); + + m_shadow.Push(tempSym); +} + //Generic Statement Inserters //------------------------------------------------ From c95787e22df9938f376def66458d21e87cf2be40 Mon Sep 17 00:00:00 2001 From: Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 28 Dec 2021 00:46:50 +0000 Subject: [PATCH 4/6] implement implement MD fused multiply-add and negated-muliply-add x86-64 --- include/Jitter_CodeGen_x86.h | 11 +++++++++ include/X86Assembler.h | 2 ++ src/Jitter_CodeGen_x86_Md_Avx.cpp | 39 +++++++++++++++++++++++++++++++ src/X86Assembler_Avx.cpp | 10 ++++++++ 4 files changed, 62 insertions(+) diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 20ff9b89..a633b6dc 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -400,6 +400,16 @@ namespace Jitter static OpVoType OpVoAvx() { return &CX86Assembler::Vcvtdq2psVo; } }; + struct MDOP_MULADD213 : public MDOP_BASE + { + static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfmadd213psVo; } + }; + + struct MDOP_MULSUB213 : public MDOP_BASE + { + static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfnmadd213psVo; } + }; + //MDOP SHIFT ----------------------------------------------------- struct MDOP_SHIFT_BASE { @@ -744,6 +754,7 @@ namespace Jitter template void Emit_Md_Avx_VarVar(const STATEMENT&); template void Emit_Md_Avx_VarVarVar(const STATEMENT&); template void Emit_Md_Avx_VarVarVarRev(const STATEMENT&); + template void Emit_Md_Avx_VarVarVarVar(const STATEMENT&); template void Emit_Md_Avx_Shift_VarVarCst(const STATEMENT&); void Emit_Md_Avx_Mov_RegVar(const STATEMENT&); diff --git a/include/X86Assembler.h b/include/X86Assembler.h index 637edb6b..f2870c57 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -465,6 +465,8 @@ class CX86Assembler void VblendpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); void Vfmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); void Vfnmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void Vfnmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VshufpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8); diff --git a/src/Jitter_CodeGen_x86_Md_Avx.cpp b/src/Jitter_CodeGen_x86_Md_Avx.cpp index 316af96f..77f1350f 100644 --- a/src/Jitter_CodeGen_x86_Md_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Md_Avx.cpp @@ -45,6 +45,42 @@ void CCodeGen_x86::Emit_Md_Avx_VarVarVarRev(const STATEMENT& statement) CommitSymbolRegisterMdAvx(dst, dstRegister); } +template +void CCodeGen_x86::Emit_Md_Avx_VarVarVarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + auto src2 = statement.src2->GetSymbol().get(); + auto src3 = statement.src3->GetSymbol().get(); + + auto tempRegister = CX86Assembler::xMM3; + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseMdAvx(src1, CX86Assembler::xMM0); + + if(dstRegister != src1Register) + { + m_assembler.VmovapsVo(tempRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + } + + auto src2Register = PrepareSymbolRegisterUseMdAvx(src2, CX86Assembler::xMM2); + ((m_assembler).*(MDOP::OpVoAvx()))(src1Register, src2Register, MakeVariable128SymbolAddress(src3)); + + // hack: reg optimisation doesnt take overrding values into account, so we need to copy and restore + if(dstRegister != src1Register) + { + if(dst->m_type != SYM_REGISTER128) + { + CommitSymbolRegisterMdAvx(dst, src1Register); + } + else + { + m_assembler.VmovapsVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + } + m_assembler.VmovapsVo(src1Register, CX86Assembler::MakeXmmRegisterAddress(tempRegister)); + } + +} + template void CCodeGen_x86::Emit_Md_Avx_Shift_VarVarCst(const STATEMENT& statement) { @@ -620,6 +656,9 @@ CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdAvxConstMatchers[] = { OP_MD_TOWORD_TRUNCATE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar }, { OP_MD_TOSINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar }, + {OP_MD_MULADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar}, + {OP_MD_MULSUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar}, + { OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_VARIABLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarVar }, { OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_CONSTANT, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarCst }, diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index 98f3f2cf..9046c78c 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -458,6 +458,16 @@ void CX86Assembler::Vfnmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, cons WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAD, dst, src1, src2); } +void CX86Assembler::Vfmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA8, dst, src1, src2); +} + +void CX86Assembler::Vfnmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAC, dst, src1, src2); +} + void CX86Assembler::VshufpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2, uint8 shuffleByte) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0xC6, dst, src1, src2); From c692e31f5f70e3a1401e9bb32bc2689b969568c1 Mon Sep 17 00:00:00 2001 From: Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 28 Dec 2021 00:47:20 +0000 Subject: [PATCH 5/6] add tests --- build_cmake/CMakeLists.txt | 2 + tests/DoubleFusedMultiAddTest.cpp | 81 +++++++++++ tests/DoubleFusedMultiAddTest.h | 37 +++++ tests/FloatFusedMultiAddTest.cpp | 117 +++++++++++++++ tests/FloatFusedMultiAddTest.h | 45 ++++++ tests/Main.cpp | 229 +++++++++++++++--------------- 6 files changed, 398 insertions(+), 113 deletions(-) create mode 100644 tests/DoubleFusedMultiAddTest.cpp create mode 100644 tests/DoubleFusedMultiAddTest.h create mode 100644 tests/FloatFusedMultiAddTest.cpp create mode 100644 tests/FloatFusedMultiAddTest.h diff --git a/build_cmake/CMakeLists.txt b/build_cmake/CMakeLists.txt index 2cf245d5..7ee7e752 100644 --- a/build_cmake/CMakeLists.txt +++ b/build_cmake/CMakeLists.txt @@ -124,6 +124,8 @@ set(CodeGenTest_SRC ../tests/ExternJumpTest.h ../tests/FpIntMixTest.cpp ../tests/FpuTest.cpp + ../tests/FloatFusedMultiAddTest.cpp + ../tests/DoubleFusedMultiAddTest.cpp ../tests/HugeJumpTest.cpp ../tests/HugeJumpTestLiteral.cpp ../tests/HugeJumpTestLiteral.h diff --git a/tests/DoubleFusedMultiAddTest.cpp b/tests/DoubleFusedMultiAddTest.cpp new file mode 100644 index 00000000..bf1adbb2 --- /dev/null +++ b/tests/DoubleFusedMultiAddTest.cpp @@ -0,0 +1,81 @@ +#include "DoubleFusedMultiAddTest.h" +#include "MemStream.h" + +CDoubleFusedMultiAddTest::CDoubleFusedMultiAddTest() +{ +} + +CDoubleFusedMultiAddTest::~CDoubleFusedMultiAddTest() +{ +} + +void CDoubleFusedMultiAddTest::Compile(Jitter::CJitter& jitter) +{ + Framework::CMemStream codeStream; + jitter.SetStream(&codeStream); + + jitter.Begin(); + { + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number1)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res1)); + + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res2)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulAdd(); + jitter.MD_PullRel(offsetof(CONTEXT, res3)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number1)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res4)); + + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res5)); + + jitter.MD_PushRel(offsetof(CONTEXT, number3)); + jitter.MD_PushRel(offsetof(CONTEXT, number4)); + jitter.MD_PushRel(offsetof(CONTEXT, number2)); + jitter.MD_MulS(); + jitter.MD_AddS(); + jitter.MD_PullRel(offsetof(CONTEXT, res6)); + } + jitter.End(); + + m_function = CMemoryFunction(codeStream.GetBuffer(), codeStream.GetSize()); +} + +void CDoubleFusedMultiAddTest::Run() +{ + memset(&m_context, 0, sizeof(CONTEXT)); + m_context.number1[1] = 1 ; + m_context.number2[1] = 2 ; + m_context.number3[1] = 4 ; + m_context.number4[1] = 16; + m_context.number1[2] = 1 ; + m_context.number2[2] = 2 ; + m_context.number3[2] = 4 ; + m_context.number4[2] = 16; + m_function(&m_context); + for(int i = 0; i < 4; ++i) + { + TEST_VERIFY(m_context.res1[i] == m_context.res4[i]); + TEST_VERIFY(m_context.res2[i] == m_context.res5[i]); + TEST_VERIFY(m_context.res3[i] == m_context.res6[i]); + } +} diff --git a/tests/DoubleFusedMultiAddTest.h b/tests/DoubleFusedMultiAddTest.h new file mode 100644 index 00000000..d2ea4d48 --- /dev/null +++ b/tests/DoubleFusedMultiAddTest.h @@ -0,0 +1,37 @@ +#pragma once + +#include "Test.h" +#include "MemoryFunction.h" +#include "Align16.h" + +class CDoubleFusedMultiAddTest : public CTest +{ +public: + CDoubleFusedMultiAddTest(); + virtual ~CDoubleFusedMultiAddTest(); + + void Compile(Jitter::CJitter&) override; + void Run() override; + +private: + struct CONTEXT + { + ALIGN16 + + uint8 number1[16]; + uint8 number2[16]; + uint8 number3[16]; + uint8 number4[16]; + + + uint8 res1[16]; + uint8 res2[16]; + uint8 res3[16]; + uint8 res4[16]; + uint8 res5[16]; + uint8 res6[16]; + }; + + CONTEXT m_context; + CMemoryFunction m_function; +}; diff --git a/tests/FloatFusedMultiAddTest.cpp b/tests/FloatFusedMultiAddTest.cpp new file mode 100644 index 00000000..5d925f10 --- /dev/null +++ b/tests/FloatFusedMultiAddTest.cpp @@ -0,0 +1,117 @@ +#include "FloatFusedMultiAddTest.h" +#include "MemStream.h" + +CFloatFusedMultiAddTest::CFloatFusedMultiAddTest() +{ +} + +CFloatFusedMultiAddTest::~CFloatFusedMultiAddTest() +{ +} + +void CFloatFusedMultiAddTest::Compile(Jitter::CJitter& jitter) +{ + Framework::CMemStream codeStream; + jitter.SetStream(&codeStream); + + jitter.Begin(); + { + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res1)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res2)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulAdd(); + jitter.FP_PullSingle(offsetof(CONTEXT, res3)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res4)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res5)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_MulSub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res6)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res1_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res2_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Add(); + jitter.FP_PullSingle(offsetof(CONTEXT, res3_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number1)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res4_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res5_org)); + + jitter.FP_PushSingle(offsetof(CONTEXT, number3)); + jitter.FP_PushSingle(offsetof(CONTEXT, number4)); + jitter.FP_PushSingle(offsetof(CONTEXT, number2)); + jitter.FP_Mul(); + jitter.FP_Sub(); + jitter.FP_PullSingle(offsetof(CONTEXT, res6_org)); + } + jitter.End(); + + m_function = CMemoryFunction(codeStream.GetBuffer(), codeStream.GetSize()); +} + +void CFloatFusedMultiAddTest::Run() +{ + memset(&m_context, 0, sizeof(CONTEXT)); + m_context.number1 = 1.0f; + m_context.number2 = 2.0f; + m_context.number3 = -4.0f; + m_context.number4 = 16.0f; + m_function(&m_context); + TEST_VERIFY(m_context.res1 == m_context.res1_org); + TEST_VERIFY(m_context.res2 == m_context.res2_org); + TEST_VERIFY(m_context.res3 == m_context.res3_org); + + TEST_VERIFY(m_context.res4 == m_context.res4_org); + TEST_VERIFY(m_context.res5 == m_context.res5_org); + TEST_VERIFY(m_context.res6 == m_context.res6_org); +} diff --git a/tests/FloatFusedMultiAddTest.h b/tests/FloatFusedMultiAddTest.h new file mode 100644 index 00000000..b5cb93d9 --- /dev/null +++ b/tests/FloatFusedMultiAddTest.h @@ -0,0 +1,45 @@ +#pragma once + +#include "Test.h" +#include "MemoryFunction.h" +#include "Align16.h" + +class CFloatFusedMultiAddTest : public CTest +{ +public: + CFloatFusedMultiAddTest(); + virtual ~CFloatFusedMultiAddTest(); + + void Compile(Jitter::CJitter&) override; + void Run() override; + +private: + struct CONTEXT + { + ALIGN16 + + float number1; + float number2; + float number3; + float number4; + + + float res1; + float res2; + float res3; + float res4; + float res5; + float res6; + + + float res1_org; + float res2_org; + float res3_org; + float res4_org; + float res5_org; + float res6_org; + }; + + CONTEXT m_context; + CMemoryFunction m_function; +}; diff --git a/tests/Main.cpp b/tests/Main.cpp index 1a38743d..14c873e8 100644 --- a/tests/Main.cpp +++ b/tests/Main.cpp @@ -47,122 +47,125 @@ #include "LzcTest.h" #include "NestedIfTest.h" #include "ExternJumpTest.h" +#include "FloatFusedMultiAddTest.h" +#include "DoubleFusedMultiAddTest.h" -typedef std::function TestFactoryFunction; +typedef std::function TestFactoryFunction; static const TestFactoryFunction s_factories[] = -{ - [] () { return new CCompareTest(); }, - [] () { return new CRegAllocTest(); }, - [] () { return new CRandomAluTest(true); }, - [] () { return new CRandomAluTest(false); }, - [] () { return new CRandomAluTest2(true); }, - [] () { return new CRandomAluTest2(false); }, - [] () { return new CRandomAluTest3(true); }, - [] () { return new CRandomAluTest3(false); }, - [] () { return new CShiftTest(0); }, - [] () { return new CShiftTest(12); }, - [] () { return new CShiftTest(31); }, - [] () { return new CShiftTest(32); }, - [] () { return new CShiftTest(44); }, - [] () { return new CCrc32Test("Hello World!", 0x67FCDACC); }, - [] () { return new CCursorTest(); }, - [] () { return new CLogicTest(0, false, ~0, false); }, - [] () { return new CLogicTest(0, false, ~0, true); }, - [] () { return new CLogicTest(0, true, ~0, false); }, - [] () { return new CLogicTest(0, true, ~0, true); }, - [] () { return new CLogicTest(0x01234567, false, 0x8000, true); }, - [] () { return new CLogicTest(0x01234567, false, ~0x8000, true); }, - [] () { return new CLogicTest(0x89ABCDEF, false, 0x01234567, true); }, - [] () { return new CMultTest(true); }, - [] () { return new CMultTest(false); }, - [] () { return new CDivTest(true); }, - [] () { return new CDivTest(false); }, - [] () { return new CMemAccessTest(); }, - [] () { return new CMemAccessIdxTest(); }, - [] () { return new CMemAccess8Test(); }, - [] () { return new CMemAccess16Test(); }, - [] () { return new CMemAccessRefTest(); }, - [] () { return new CHugeJumpTest(); }, - [] () { return new CHugeJumpTestLiteral(); }, - [] () { return new CNestedIfTest(); }, - [] () { return new CLzcTest(); }, - [] () { return new CAliasTest(); }, - [] () { return new CAliasTest2(); }, - [] () { return new CFpuTest(); }, - [] () { return new CFpIntMixTest(); }, - [] () { return new CSimpleMdTest(); }, - [] () { return new CMdTest(); }, - [] () { return new CMdLogicTest(); }, - [] () { return new CMdAddTest(); }, - [] () { return new CMdSubTest(); }, - [] () { return new CMdUnpackTest(); }, - [] () { return new CMdCmpTest(); }, - [] () { return new CMdMinMaxTest(); }, - [] () { return new CMdFpTest(); }, - [] () { return new CMdFpFlagTest(); }, - [] () { return new CMdCallTest(); }, - [] () { return new CMdMemAccessTest(); }, - [] () { return new CMdManipTest(); }, - [] () { return new CMdShiftTest(0); }, - [] () { return new CMdShiftTest(15); }, - [] () { return new CMdShiftTest(16); }, - [] () { return new CMdShiftTest(31); }, - [] () { return new CMdShiftTest(32); }, - [] () { return new CMdShiftTest(38); }, - [] () { return new CAlu64Test(); }, - //negative / positive - [] () { return new CConditionTest(false, 0xFFFFFFFE, 0xFFFFFFFE); }, - [] () { return new CConditionTest(false, 0x00000002, 0xFFFFFFFE); }, - [] () { return new CConditionTest(false, 0xFFFFFFFE, 0x00000002); }, - [] () { return new CConditionTest(false, 0x00000002, 0x00000002); }, - [] () { return new CConditionTest(true, 0xFFFFFFFE, 0xFFFFFFFE); }, - [] () { return new CConditionTest(true, 0x00000002, 0xFFFFFFFE); }, - [] () { return new CConditionTest(true, 0xFFFFFFFE, 0x00000002); }, - [] () { return new CConditionTest(true, 0x00000002, 0x00000002); }, - //negative / negative - []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFFF0); }, - []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFFF0); }, - []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFF00); }, - []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFF00); }, - []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFFF0); }, - []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFFF0); }, - []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFF00); }, - []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFF00); }, - //positive / positive - []() { return new CConditionTest(false, 0x0000000F, 0x0000000F); }, - []() { return new CConditionTest(false, 0x000000FF, 0x0000000F); }, - []() { return new CConditionTest(false, 0x0000000F, 0x000000FF); }, - []() { return new CConditionTest(false, 0x000000FF, 0x000000FF); }, - []() { return new CConditionTest(true, 0x0000000F, 0x0000000F); }, - []() { return new CConditionTest(true, 0x000000FF, 0x0000000F); }, - []() { return new CConditionTest(true, 0x0000000F, 0x000000FF); }, - []() { return new CConditionTest(true, 0x000000FF, 0x000000FF); }, - [] () { return new CCmp64Test(false, false, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(false, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(true, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, - [] () { return new CCmp64Test(false, false, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(false, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(true, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, - [] () { return new CCmp64Test(false, false, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(false, true, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(true , true, 0x100000000, 0x100000000); }, - [] () { return new CCmp64Test(false, true, 0, 0x80ULL); }, - [] () { return new CCmp64Test(false, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, - [] () { return new CCmp64Test(true, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, - [] () { return new CLogic64Test(); }, - [] () { return new CShift64Test(0); }, - [] () { return new CShift64Test(12); }, - [] () { return new CShift64Test(32); }, - [] () { return new CShift64Test(52); }, - [] () { return new CShift64Test(63); }, - [] () { return new CShift64Test(64); }, - [] () { return new CShift64Test(76); }, - [] () { return new CMerge64Test(); }, - [] () { return new CMemAccess64Test(); }, - [] () { return new CCall64Test(); }, - [] () { return new CExternJumpTest(); } -}; + { + []() { return new CFloatFusedMultiAddTest(); }, + []() { return new CDoubleFusedMultiAddTest(); }, + // []() { return new CCompareTest(); }, + // []() { return new CRegAllocTest(); }, + // []() { return new CRandomAluTest(true); }, + // []() { return new CRandomAluTest(false); }, + // []() { return new CRandomAluTest2(true); }, + // []() { return new CRandomAluTest2(false); }, + // []() { return new CRandomAluTest3(true); }, + // []() { return new CRandomAluTest3(false); }, + // []() { return new CShiftTest(0); }, + // []() { return new CShiftTest(12); }, + // []() { return new CShiftTest(31); }, + // []() { return new CShiftTest(32); }, + // []() { return new CShiftTest(44); }, + // []() { return new CCrc32Test("Hello World!", 0x67FCDACC); }, + // []() { return new CCursorTest(); }, + // []() { return new CLogicTest(0, false, ~0, false); }, + // []() { return new CLogicTest(0, false, ~0, true); }, + // []() { return new CLogicTest(0, true, ~0, false); }, + // []() { return new CLogicTest(0, true, ~0, true); }, + // []() { return new CLogicTest(0x01234567, false, 0x8000, true); }, + // []() { return new CLogicTest(0x01234567, false, ~0x8000, true); }, + // []() { return new CLogicTest(0x89ABCDEF, false, 0x01234567, true); }, + // []() { return new CMultTest(true); }, + // []() { return new CMultTest(false); }, + // []() { return new CDivTest(true); }, + // []() { return new CDivTest(false); }, + // []() { return new CMemAccessTest(); }, + // []() { return new CMemAccessIdxTest(); }, + // []() { return new CMemAccess8Test(); }, + // []() { return new CMemAccess16Test(); }, + // []() { return new CMemAccessRefTest(); }, + // []() { return new CHugeJumpTest(); }, + // []() { return new CHugeJumpTestLiteral(); }, + // []() { return new CNestedIfTest(); }, + // []() { return new CLzcTest(); }, + // []() { return new CAliasTest(); }, + // []() { return new CAliasTest2(); }, + // []() { return new CFpuTest(); }, + // []() { return new CFpIntMixTest(); }, + // []() { return new CSimpleMdTest(); }, + // []() { return new CMdTest(); }, + // []() { return new CMdLogicTest(); }, + // []() { return new CMdAddTest(); }, + // []() { return new CMdSubTest(); }, + // []() { return new CMdUnpackTest(); }, + // []() { return new CMdCmpTest(); }, + // []() { return new CMdMinMaxTest(); }, + // []() { return new CMdFpTest(); }, + // []() { return new CMdFpFlagTest(); }, + // []() { return new CMdCallTest(); }, + // []() { return new CMdMemAccessTest(); }, + // []() { return new CMdManipTest(); }, + // []() { return new CMdShiftTest(0); }, + // []() { return new CMdShiftTest(15); }, + // []() { return new CMdShiftTest(16); }, + // []() { return new CMdShiftTest(31); }, + // []() { return new CMdShiftTest(32); }, + // []() { return new CMdShiftTest(38); }, + // []() { return new CAlu64Test(); }, + // //negative / positive + // []() { return new CConditionTest(false, 0xFFFFFFFE, 0xFFFFFFFE); }, + // []() { return new CConditionTest(false, 0x00000002, 0xFFFFFFFE); }, + // []() { return new CConditionTest(false, 0xFFFFFFFE, 0x00000002); }, + // []() { return new CConditionTest(false, 0x00000002, 0x00000002); }, + // []() { return new CConditionTest(true, 0xFFFFFFFE, 0xFFFFFFFE); }, + // []() { return new CConditionTest(true, 0x00000002, 0xFFFFFFFE); }, + // []() { return new CConditionTest(true, 0xFFFFFFFE, 0x00000002); }, + // []() { return new CConditionTest(true, 0x00000002, 0x00000002); }, + // //negative / negative + // []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFFF0); }, + // []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFFF0); }, + // []() { return new CConditionTest(false, 0xFFFFFFF0, 0xFFFFFF00); }, + // []() { return new CConditionTest(false, 0xFFFFFF00, 0xFFFFFF00); }, + // []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFFF0); }, + // []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFFF0); }, + // []() { return new CConditionTest(true, 0xFFFFFFF0, 0xFFFFFF00); }, + // []() { return new CConditionTest(true, 0xFFFFFF00, 0xFFFFFF00); }, + // //positive / positive + // []() { return new CConditionTest(false, 0x0000000F, 0x0000000F); }, + // []() { return new CConditionTest(false, 0x000000FF, 0x0000000F); }, + // []() { return new CConditionTest(false, 0x0000000F, 0x000000FF); }, + // []() { return new CConditionTest(false, 0x000000FF, 0x000000FF); }, + // []() { return new CConditionTest(true, 0x0000000F, 0x0000000F); }, + // []() { return new CConditionTest(true, 0x000000FF, 0x0000000F); }, + // []() { return new CConditionTest(true, 0x0000000F, 0x000000FF); }, + // []() { return new CConditionTest(true, 0x000000FF, 0x000000FF); }, + // []() { return new CCmp64Test(false, false, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(false, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(true, true, 0xFEDCBA9876543210ULL, 0x012389AB4567CDEFULL); }, + // []() { return new CCmp64Test(false, false, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(false, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(true, true, 0xFFFFFFFFF6543210ULL, 0xFFFFFFFFF567CDEFULL); }, + // []() { return new CCmp64Test(false, false, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(false, true, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(true, true, 0x100000000, 0x100000000); }, + // []() { return new CCmp64Test(false, true, 0, 0x80ULL); }, + // []() { return new CCmp64Test(false, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, + // []() { return new CCmp64Test(true, true, 0, 0xFFFFFFFFFFFFFF80ULL); }, + // []() { return new CLogic64Test(); }, + // []() { return new CShift64Test(0); }, + // []() { return new CShift64Test(12); }, + // []() { return new CShift64Test(32); }, + // []() { return new CShift64Test(52); }, + // []() { return new CShift64Test(63); }, + // []() { return new CShift64Test(64); }, + // []() { return new CShift64Test(76); }, + // []() { return new CMerge64Test(); }, + // []() { return new CMemAccess64Test(); }, + // []() { return new CCall64Test(); }, + []() { return new CExternJumpTest(); }}; int main(int argc, const char** argv) { From 73b2b821a7765bd244308bfde4dff6e2b200aa40 Mon Sep 17 00:00:00 2001 From: Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 28 Dec 2021 14:37:25 +0000 Subject: [PATCH 6/6] simplify, copy src1 value to dst before operation --- src/Jitter_CodeGen_x86_Md_Avx.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Md_Avx.cpp b/src/Jitter_CodeGen_x86_Md_Avx.cpp index 77f1350f..6cc2add0 100644 --- a/src/Jitter_CodeGen_x86_Md_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Md_Avx.cpp @@ -53,32 +53,17 @@ void CCodeGen_x86::Emit_Md_Avx_VarVarVarVar(const STATEMENT& statement) auto src2 = statement.src2->GetSymbol().get(); auto src3 = statement.src3->GetSymbol().get(); - auto tempRegister = CX86Assembler::xMM3; auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); - auto src1Register = PrepareSymbolRegisterUseMdAvx(src1, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseMdAvx(src1, CX86Assembler::xMM1); + auto src2Register = PrepareSymbolRegisterUseMdAvx(src2, CX86Assembler::xMM2); if(dstRegister != src1Register) { - m_assembler.VmovapsVo(tempRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); - } - - auto src2Register = PrepareSymbolRegisterUseMdAvx(src2, CX86Assembler::xMM2); - ((m_assembler).*(MDOP::OpVoAvx()))(src1Register, src2Register, MakeVariable128SymbolAddress(src3)); - - // hack: reg optimisation doesnt take overrding values into account, so we need to copy and restore - if(dstRegister != src1Register) - { - if(dst->m_type != SYM_REGISTER128) - { - CommitSymbolRegisterMdAvx(dst, src1Register); - } - else - { - m_assembler.VmovapsVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); - } - m_assembler.VmovapsVo(src1Register, CX86Assembler::MakeXmmRegisterAddress(tempRegister)); + m_assembler.VmovapsVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); } + ((m_assembler).*(MDOP::OpVoAvx()))(dstRegister, src2Register, MakeVariable128SymbolAddress(src3)); + CommitSymbolRegisterMdAvx(dst, dstRegister); } template