From 998e0686dab3d1fc005998a466cbb727d665c4c8 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 9 Jun 2020 17:27:53 +0100 Subject: [PATCH 01/12] Initial work on allowing FPU operation to use registery allocation --- include/Jitter_CodeGen_x86.h | 7 +++++ src/Jitter_CodeGen_x86.cpp | 35 +++++++++++++++++++++++ src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 46 +++++++++++++++++++++++++++--- src/Jitter_RegAlloc.cpp | 8 ++++-- 4 files changed, 90 insertions(+), 6 deletions(-) diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 803632fa..266dbb8d 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -727,6 +727,11 @@ namespace Jitter void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&); void Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT&); void Emit_Fp_Avx_Mov_RelSRelI32(const STATEMENT&); + + void Emit_Fp_Avx_Mov_RelSReg(const STATEMENT&); + void Emit_Fp_Avx_Mov_RegSRelS(const STATEMENT&); + void Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT&); + void Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT&); //MDOP AVX @@ -770,6 +775,8 @@ namespace Jitter CX86Assembler::XMMREGISTER PrepareSymbolRegisterDefMd(CSymbol*, CX86Assembler::XMMREGISTER); CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); void CommitSymbolRegisterMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); + CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); + void CommitSymbolRegisterFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); virtual CX86Assembler::REGISTER PrepareRefSymbolRegisterUse(CSymbol*, CX86Assembler::REGISTER) = 0; diff --git a/src/Jitter_CodeGen_x86.cpp b/src/Jitter_CodeGen_x86.cpp index 67b0f8c1..1f3bde0d 100644 --- a/src/Jitter_CodeGen_x86.cpp +++ b/src/Jitter_CodeGen_x86.cpp @@ -1342,3 +1342,38 @@ void CCodeGen_x86::CommitSymbolRegisterMdAvx(CSymbol* symbol, CX86Assembler::XMM break; } } + +CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterUseFpuAvx(CSymbol* symbol, CX86Assembler::XMMREGISTER preferedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + return m_mdRegisters[symbol->m_valueLow]; + break; + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + m_assembler.VmovssEd(preferedRegister, MakeMemoryFpSingleSymbolAddress(symbol)); + return preferedRegister; + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} + +void CCodeGen_x86::CommitSymbolRegisterFpuAvx(CSymbol* symbol, CX86Assembler::XMMREGISTER usedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + assert(usedRegister == m_mdRegisters[symbol->m_valueLow]); + break; + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + m_assembler.VmovssEd(MakeMemory128SymbolAddress(symbol), usedRegister); + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 318e172b..2ee2bc9f 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -19,12 +19,11 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement) auto src1 = statement.src1->GetSymbol().get(); auto src2 = statement.src2->GetSymbol().get(); - auto dstRegister = CX86Assembler::xMM0; - auto src1Register = CX86Assembler::xMM1; + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - m_assembler.VmovssEd(src1Register, MakeMemoryFpSingleSymbolAddress(src1)); ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src1Register, MakeMemoryFpSingleSymbolAddress(src2)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); + CommitSymbolRegisterFpuAvx(dst, dstRegister); } void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) @@ -75,6 +74,41 @@ void CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT& statement) m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), resultRegister); } +void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_FP_REL_INT32); + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); +} + +void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_FP_REL_SINGLE); + assert(src1->m_type == SYM_REGISTER128); + auto src1Register = PrepareSymbolRegisterDefMd(src1, CX86Assembler::xMM1); + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), src1Register); +} + + +void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_FP_REL_SINGLE); + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM1); + m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1)); +} + + void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); @@ -114,6 +148,10 @@ CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { OP_FP_RSQRT, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, { OP_FP_RCPL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, + { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32 }, + { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_REGISTER128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg }, + { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS }, + { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32 }, { OP_FP_TOINT_TRUNC, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, diff --git a/src/Jitter_RegAlloc.cpp b/src/Jitter_RegAlloc.cpp index 2e3fb92a..357a7adf 100644 --- a/src/Jitter_RegAlloc.cpp +++ b/src/Jitter_RegAlloc.cpp @@ -196,6 +196,7 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c return (symbolType == SYM_RELATIVE) || (symbolType == SYM_TEMPORARY) || (symbolType == SYM_REL_REFERENCE) || (symbolType == SYM_TMP_REFERENCE) || + (symbolType == SYM_FP_REL_SINGLE) || (symbolType == SYM_FP_TMP_SINGLE) || || (symbolType == SYM_FP_REL_INT32) (symbolType == SYM_RELATIVE128) || (symbolType == SYM_TEMPORARY128); }; @@ -245,7 +246,7 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c auto registerIterator = std::end(availableRegisters); auto registerIteratorEnd = std::end(availableRegisters); auto registerSymbolType = SYM_REGISTER; - if((symbol->m_type == SYM_RELATIVE) || (symbol->m_type == SYM_TEMPORARY)) + if((symbol->m_type == SYM_RELATIVE) || (symbol->m_type == SYM_TEMPORARY) || (symbol->m_type == SYM_FP_REL_INT32)) { registerIterator = availableRegisters.lower_bound(SYM_REGISTER); registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER); @@ -257,7 +258,10 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER); registerSymbolType = SYM_REG_REFERENCE; } - else if((symbol->m_type == SYM_RELATIVE128) || (symbol->m_type == SYM_TEMPORARY128)) + else if( + (symbol->m_type == SYM_RELATIVE128) || (symbol->m_type == SYM_TEMPORARY128) || + (symbol->m_type == SYM_FP_REL_SINGLE) || (symbol->m_type == SYM_FP_TMP_SINGLE) + ) { registerIterator = availableRegisters.lower_bound(SYM_REGISTER128); registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER128); From d9301be0b804d73c504d2bd80f230365b9f64264 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Wed, 3 Jun 2020 17:27:01 +0100 Subject: [PATCH 02/12] change matcher to variable type --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 2ee2bc9f..e46f6a04 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -135,12 +135,12 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement) CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { - { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_ADD, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_SUB, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_MUL, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_DIV, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_MAX, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_MIN, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, { OP_FP_CMP, MATCH_VARIABLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, From 379ad1a301f0ff36b60976c72a62c0e3305304e9 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Wed, 3 Jun 2020 23:59:53 +0100 Subject: [PATCH 03/12] add more registry FPU operations + fix --- include/Jitter_CodeGen_x86.h | 8 +- include/X86Assembler.h | 2 + src/Jitter_CodeGen_x86.cpp | 31 +++- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 226 +++++++++++++++++++++++++---- src/X86Assembler_Avx.cpp | 5 + 5 files changed, 242 insertions(+), 30 deletions(-) diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 266dbb8d..23e16875 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -721,7 +721,12 @@ namespace Jitter //FPUOP AVX template void Emit_Fpu_Avx_MemMem(const STATEMENT&); - template void Emit_Fpu_Avx_MemMemMem(const STATEMENT&); + template void Emit_Fpu_Avx_VarVarVar(const STATEMENT&); + + void Emit_Fp_Avx_Neg_VarVar(const STATEMENT&); + void Emit_Fp_Avx_Abs_VarVar(const STATEMENT&); + void Emit_Fp_Avx_Mov_Reg32RelI32(const STATEMENT&); + void Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT&); void Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT&); void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&); @@ -775,6 +780,7 @@ namespace Jitter CX86Assembler::XMMREGISTER PrepareSymbolRegisterDefMd(CSymbol*, CX86Assembler::XMMREGISTER); CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); void CommitSymbolRegisterMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); + CX86Assembler::XMMREGISTER PrepareSymbolRegisterDefFpu(CSymbol*, CX86Assembler::XMMREGISTER); CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); void CommitSymbolRegisterFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); diff --git a/include/X86Assembler.h b/include/X86Assembler.h index 7023372a..d814cdf5 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -454,6 +454,8 @@ class CX86Assembler void VcmpltpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VcmpgtpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VxorpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VminpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VmaxpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); diff --git a/src/Jitter_CodeGen_x86.cpp b/src/Jitter_CodeGen_x86.cpp index 1f3bde0d..d0a85b64 100644 --- a/src/Jitter_CodeGen_x86.cpp +++ b/src/Jitter_CodeGen_x86.cpp @@ -1342,6 +1342,24 @@ void CCodeGen_x86::CommitSymbolRegisterMdAvx(CSymbol* symbol, CX86Assembler::XMM break; } } +CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterDefFpu(CSymbol* symbol, CX86Assembler::XMMREGISTER preferedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + return m_mdRegisters[symbol->m_valueLow]; + break; + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + return preferedRegister; + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterUseFpuAvx(CSymbol* symbol, CX86Assembler::XMMREGISTER preferedRegister) { @@ -1352,7 +1370,12 @@ CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterUseFpuAvx(CSymbol* break; case SYM_TEMPORARY128: case SYM_RELATIVE128: - m_assembler.VmovssEd(preferedRegister, MakeMemoryFpSingleSymbolAddress(symbol)); + m_assembler.VmovssEd(preferedRegister, MakeMemory128SymbolAddress(symbol)); + return preferedRegister; + break; + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + m_assembler.VmovssEd(preferedRegister, MakeMemoryFpSingleSymbolAddress(symbol)); return preferedRegister; break; default: @@ -1370,7 +1393,11 @@ void CCodeGen_x86::CommitSymbolRegisterFpuAvx(CSymbol* symbol, CX86Assembler::XM break; case SYM_TEMPORARY128: case SYM_RELATIVE128: - m_assembler.VmovssEd(MakeMemory128SymbolAddress(symbol), usedRegister); + m_assembler.VmovssEd(MakeMemory128SymbolAddress(symbol), usedRegister); + break; + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(symbol), usedRegister); break; default: throw std::runtime_error("Invalid symbol type."); diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index e46f6a04..35e11ddf 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -8,21 +8,60 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMem(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - ((m_assembler).*(FPUOP::OpEdAvx()))(CX86Assembler::xMM0, CX86Assembler::xMM0, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), CX86Assembler::xMM0); + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + + CommitSymbolRegisterFpuAvx(dst, dstRegister); + +} + +void CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + auto tmpXMMRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM2); + auto tmpXMM2Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM3); + + m_assembler.VxorpsVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.VminpsVo(dstRegister, tmpXMM2Register, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + CommitSymbolRegisterFpuAvx(dst, dstRegister); +} + +void CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + auto tmpXMMRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM2); + auto tmpXMM2Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM3); + + m_assembler.VxorpsVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.VmaxpsVo(dstRegister, tmpXMM2Register, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + CommitSymbolRegisterFpuAvx(dst, dstRegister); + } template -void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement) +void CCodeGen_x86::Emit_Fpu_Avx_VarVarVar(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); auto src2 = statement.src2->GetSymbol().get(); - auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + auto src2Register = PrepareSymbolRegisterUseFpuAvx(src2, CX86Assembler::xMM2); - ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src1Register, MakeMemoryFpSingleSymbolAddress(src2)); + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register)); CommitSymbolRegisterFpuAvx(dst, dstRegister); } @@ -33,12 +72,14 @@ void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) auto src2 = statement.src2->GetSymbol().get(); auto dstReg = PrepareSymbolRegisterDef(dst, CX86Assembler::rAX); - auto cmpReg = CX86Assembler::xMM0; - auto resReg = CX86Assembler::xMM1; + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM0); + auto src2Register = PrepareSymbolRegisterUseFpuAvx(src2, CX86Assembler::xMM1); + auto cmpReg = CX86Assembler::xMM2; + auto resReg = CX86Assembler::xMM3; auto conditionCode = GetSseConditionCode(statement.jmpCondition); - m_assembler.VmovssEd(cmpReg, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VcmpssEd(resReg, cmpReg, MakeMemoryFpSingleSymbolAddress(src2), conditionCode); + m_assembler.VmovssEd(cmpReg, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.VcmpssEd(resReg, cmpReg, CX86Assembler::MakeXmmRegisterAddress(src2Register), conditionCode); m_assembler.VmovdVo(CX86Assembler::MakeRegisterAddress(dstReg), resReg); CommitSymbolRegister(dst, dstReg); @@ -49,15 +90,18 @@ void CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); + {StatementList list = {statement}; + DumpStatementList(list);} + auto tmpIntRegister = CX86Assembler::rAX; - auto resultRegister = CX86Assembler::xMM0; - auto sqrtRegister = CX86Assembler::xMM1; + auto resultRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto sqrtRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - m_assembler.VsqrtssEd(sqrtRegister, CX86Assembler::xMM0, MakeMemoryFpSingleSymbolAddress(src1)); + m_assembler.VsqrtssEd(sqrtRegister, CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(sqrtRegister)); m_assembler.MovId(tmpIntRegister, 0x3F800000); m_assembler.VmovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); m_assembler.VdivssEd(resultRegister, resultRegister, CX86Assembler::MakeXmmRegisterAddress(sqrtRegister)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), resultRegister); + CommitSymbolRegisterFpuAvx(dst, resultRegister); } void CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT& statement) @@ -66,12 +110,15 @@ void CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT& statement) auto src1 = statement.src1->GetSymbol().get(); auto tmpIntRegister = CX86Assembler::rAX; - auto resultRegister = CX86Assembler::xMM0; + auto resultRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); m_assembler.MovId(tmpIntRegister, 0x3F800000); m_assembler.VmovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); - m_assembler.VdivssEd(resultRegister, resultRegister, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), resultRegister); + m_assembler.VdivssEd(resultRegister, resultRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + + CommitSymbolRegisterFpuAvx(dst, resultRegister); + } void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT& statement) @@ -85,6 +132,22 @@ void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT& statement) m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); } +void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg32RelI32(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER); + assert(src1->m_type == SYM_FP_REL_INT32); + auto dstRegister = CX86Assembler::xMM3; + auto dstReg = PrepareSymbolRegisterDef(dst, CX86Assembler::rAX); + + m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); + m_assembler.VmovdVo(CX86Assembler::MakeRegisterAddress(dstReg), dstRegister); + + CommitSymbolRegister(dst, dstReg); +} + void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); @@ -108,6 +171,21 @@ void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS(const STATEMENT& statement) m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1)); } +void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_RELATIVE); + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM1); + auto src1Reg = PrepareSymbolRegisterUse(src1, CX86Assembler::rAX); + + m_assembler.VmovdVo(dstRegister, CX86Assembler::MakeRegisterAddress(src1Reg)); + m_assembler.VshufpsVo(dstRegister, dstRegister, CX86Assembler::MakeXmmRegisterAddress(dstRegister), 0x00); + CommitSymbolRegisterFpuAvx(dst, dstRegister); +} + void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32(const STATEMENT& statement) { @@ -126,34 +204,128 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - assert(dst->m_type == SYM_FP_REL_SINGLE); - assert(src1->m_type == SYM_FP_REL_SINGLE); - - m_assembler.Vcvttss2siEd(CX86Assembler::rAX, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); - m_assembler.MovGd(CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, dst->m_valueLow), CX86Assembler::rAX); + auto src1Reg = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + + m_assembler.Vcvttss2siEd(CX86Assembler::rAX, CX86Assembler::MakeXmmRegisterAddress(src1Reg)); + if( + dst->m_type == SYM_RELATIVE128 || dst->m_type == SYM_TEMPORARY128 + || dst->m_type == SYM_FP_REL_SINGLE || dst->m_type == SYM_FP_TMP_SINGLE + || dst->m_type == SYM_REGISTER128 + ) + { + auto dstReg = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + m_assembler.VmovdVo(dstReg, CX86Assembler::MakeRegisterAddress(CX86Assembler::rAX)); + CommitSymbolRegisterFpuAvx(dst, dstReg); + } + else if(dst->m_type == SYM_RELATIVE || dst->m_type == SYM_TEMPORARY) + { + m_assembler.MovGd(CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, dst->m_valueLow), CX86Assembler::rAX); + } + else + { + throw std::exception(); + } } CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { - { OP_FP_ADD, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_SUB, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MUL, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_DIV, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MAX, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MIN, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_VARIABLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ABS, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_ABS, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_ABS, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + + { OP_FP_NEG, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, + { OP_FP_NEG, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, + { OP_FP_NEG, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, { OP_FP_CMP, MATCH_VARIABLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + + { OP_FP_RSQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + { OP_FP_RSQRT, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + { OP_FP_RSQRT, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + // { OP_FP_SQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_SQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_SQRT, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_RCPL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, + { OP_FP_RCPL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, { OP_FP_SQRT, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, { OP_FP_RSQRT, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, { OP_FP_RCPL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, + { OP_MOV, MATCH_VARIABLE128, MATCH_VARIABLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel }, + { OP_MOV, MATCH_VARIABLE, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_Reg32RelI32 }, { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32 }, { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_REGISTER128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg }, { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS }, { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32 }, { OP_FP_TOINT_TRUNC, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_RELATIVE_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_VARIABLE128, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr }, }; diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index abee2ba0..b8e7607a 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -416,6 +416,11 @@ void CX86Assembler::VcmpgtpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress VcmppsVo(dst, src1, src2, SSE_CMP_NLE); } +void CX86Assembler::VxorpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x57, dst, src1, src2); +} + void CX86Assembler::VminpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x5D, dst, src1, src2); From 35ff560533a869e9e6c6ffdc6094d15ef78fe42b Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 9 Jun 2020 17:39:19 +0100 Subject: [PATCH 04/12] fix OP_NEG + add more matches --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 35e11ddf..22de0743 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -22,14 +22,15 @@ void CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); + auto tmpIntRegister = CX86Assembler::rAX; + auto tmpXMMRegister = CX86Assembler::xMM2; + auto tmpXMM2Register = CX86Assembler::xMM3; auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - auto tmpXMMRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM2); - auto tmpXMM2Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM3); - m_assembler.VxorpsVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); - m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); - m_assembler.VminpsVo(dstRegister, tmpXMM2Register, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.MovId(tmpIntRegister, 0x80000000); + m_assembler.VmovdVo(tmpXMMRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); + m_assembler.VxorpsVo(dstRegister, src1Register, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); CommitSymbolRegisterFpuAvx(dst, dstRegister); } @@ -47,7 +48,6 @@ void CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar(const STATEMENT& statement) m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); m_assembler.VmaxpsVo(dstRegister, tmpXMM2Register, CX86Assembler::MakeXmmRegisterAddress(src1Register)); CommitSymbolRegisterFpuAvx(dst, dstRegister); - } template @@ -288,11 +288,12 @@ CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { OP_FP_MAX, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, { OP_FP_MIN, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, - + { OP_FP_ABS, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, { OP_FP_ABS, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, { OP_FP_ABS, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, { OP_FP_ABS, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_NEG, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, { OP_FP_NEG, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, { OP_FP_NEG, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, { OP_FP_NEG, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, From b4e8f4406816d2dfa21b594e2cebcb0eb10b8f85 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Thu, 4 Jun 2020 15:50:10 +0100 Subject: [PATCH 05/12] resolve SYM_FP_REL_INT32 relative address --- src/Jitter_CodeGen_x86.cpp | 1 + src/Jitter_CodeGen_x86_Fpu.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Jitter_CodeGen_x86.cpp b/src/Jitter_CodeGen_x86.cpp index d0a85b64..db0186eb 100644 --- a/src/Jitter_CodeGen_x86.cpp +++ b/src/Jitter_CodeGen_x86.cpp @@ -1373,6 +1373,7 @@ CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterUseFpuAvx(CSymbol* m_assembler.VmovssEd(preferedRegister, MakeMemory128SymbolAddress(symbol)); return preferedRegister; break; + case SYM_FP_REL_INT32: case SYM_FP_REL_SINGLE: case SYM_FP_TMP_SINGLE: m_assembler.VmovssEd(preferedRegister, MakeMemoryFpSingleSymbolAddress(symbol)); diff --git a/src/Jitter_CodeGen_x86_Fpu.cpp b/src/Jitter_CodeGen_x86_Fpu.cpp index 5ab774d8..22b57ecc 100644 --- a/src/Jitter_CodeGen_x86_Fpu.cpp +++ b/src/Jitter_CodeGen_x86_Fpu.cpp @@ -20,6 +20,7 @@ CX86Assembler::CAddress CCodeGen_x86::MakeMemoryFpSingleSymbolAddress(CSymbol* s { switch(symbol->m_type) { + case SYM_FP_REL_INT32: case SYM_FP_REL_SINGLE: return MakeRelativeFpSingleSymbolAddress(symbol); break; From b4f11eb62886f31ba163d1fb8e0955e26aeefa43 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Thu, 4 Jun 2020 15:53:19 +0100 Subject: [PATCH 06/12] fix Emit_Fp_Avx_Abs_VarVar --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 22de0743..81cd1314 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -41,8 +41,8 @@ void CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar(const STATEMENT& statement) auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - auto tmpXMMRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM2); - auto tmpXMM2Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM3); + auto tmpXMMRegister = CX86Assembler::xMM2; + auto tmpXMM2Register = CX86Assembler::xMM3; m_assembler.VxorpsVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); From 626cf27f2b82b433cd90eeb4a1c64a5cfaa8d630 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Thu, 4 Jun 2020 16:00:08 +0100 Subject: [PATCH 07/12] cleanup --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 81cd1314..937c342e 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -144,8 +144,6 @@ void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg32RelI32(const STATEMENT& statement) m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); m_assembler.VmovdVo(CX86Assembler::MakeRegisterAddress(dstReg), dstRegister); - - CommitSymbolRegister(dst, dstReg); } void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg(const STATEMENT& statement) @@ -183,7 +181,6 @@ void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT& statement) m_assembler.VmovdVo(dstRegister, CX86Assembler::MakeRegisterAddress(src1Reg)); m_assembler.VshufpsVo(dstRegister, dstRegister, CX86Assembler::MakeXmmRegisterAddress(dstRegister), 0x00); - CommitSymbolRegisterFpuAvx(dst, dstRegister); } From 780fcc162829bf09c094531f105d9d0797c17d44 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 9 Jun 2020 17:42:33 +0100 Subject: [PATCH 08/12] cleanup --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 937c342e..68dfc08d 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -90,9 +90,6 @@ void CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - {StatementList list = {statement}; - DumpStatementList(list);} - auto tmpIntRegister = CX86Assembler::rAX; auto resultRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto sqrtRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); @@ -180,7 +177,6 @@ void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT& statement) auto src1Reg = PrepareSymbolRegisterUse(src1, CX86Assembler::rAX); m_assembler.VmovdVo(dstRegister, CX86Assembler::MakeRegisterAddress(src1Reg)); - m_assembler.VshufpsVo(dstRegister, dstRegister, CX86Assembler::MakeXmmRegisterAddress(dstRegister), 0x00); } From 50b1158fbfa51ecf718bf5b26d9e826c402f90ab Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Tue, 9 Jun 2020 17:48:12 +0100 Subject: [PATCH 09/12] fix build --- src/Jitter_RegAlloc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Jitter_RegAlloc.cpp b/src/Jitter_RegAlloc.cpp index 357a7adf..d4b1c694 100644 --- a/src/Jitter_RegAlloc.cpp +++ b/src/Jitter_RegAlloc.cpp @@ -196,7 +196,7 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c return (symbolType == SYM_RELATIVE) || (symbolType == SYM_TEMPORARY) || (symbolType == SYM_REL_REFERENCE) || (symbolType == SYM_TMP_REFERENCE) || - (symbolType == SYM_FP_REL_SINGLE) || (symbolType == SYM_FP_TMP_SINGLE) || || (symbolType == SYM_FP_REL_INT32) + (symbolType == SYM_FP_REL_SINGLE) || (symbolType == SYM_FP_TMP_SINGLE) || (symbolType == SYM_FP_REL_INT32) || (symbolType == SYM_RELATIVE128) || (symbolType == SYM_TEMPORARY128); }; From 75fc2dc8d7c1e52190fabba6493fe5df48ab3cef Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Fri, 25 Dec 2020 17:23:50 +0000 Subject: [PATCH 10/12] implement VandpsVo --- include/X86Assembler.h | 1 + src/X86Assembler_Avx.cpp | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/X86Assembler.h b/include/X86Assembler.h index d814cdf5..dc2980e5 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -446,6 +446,7 @@ class CX86Assembler void VpshufbVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VpmovmskbVo(REGISTER, XMMREGISTER); + void VandpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VaddpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VsubpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VmulpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index b8e7607a..1540b55a 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -386,6 +386,11 @@ void CX86Assembler::VpmovmskbVo(REGISTER dst, XMMREGISTER src) WriteVexVoOp(VEX_OPCODE_MAP_66, 0xD7, static_cast(dst), CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(src)); } +void CX86Assembler::VandpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x54, dst, src1, src2); +} + void CX86Assembler::VaddpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x58, dst, src1, src2); From 09d2edf5753ad19a3a21892868757bdbb642576e Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Fri, 25 Dec 2020 17:25:28 +0000 Subject: [PATCH 11/12] update Emit_Fp_Avx_Abs_VarVar() to use VaddpsVo --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 68dfc08d..eff5337a 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -42,11 +42,10 @@ void CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar(const STATEMENT& statement) auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); auto tmpXMMRegister = CX86Assembler::xMM2; - auto tmpXMM2Register = CX86Assembler::xMM3; - m_assembler.VxorpsVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); - m_assembler.VsubpsVo(tmpXMM2Register, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); - m_assembler.VmaxpsVo(dstRegister, tmpXMM2Register, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.VpcmpeqdVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VpsrldVo(tmpXMMRegister, tmpXMMRegister, 1); + m_assembler.VandpsVo(dstRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); CommitSymbolRegisterFpuAvx(dst, dstRegister); } From 22f7e7217596c5c3717d1e31d0033946ffe7e6e6 Mon Sep 17 00:00:00 2001 From: Mahmood - Zer0xFF <5013823+Zer0xFF@users.noreply.github.com> Date: Fri, 25 Dec 2020 17:44:10 +0000 Subject: [PATCH 12/12] Clean up, use avx instruction to avoid mixing instructions --- src/Jitter_CodeGen_x86_Fpu_Avx.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index eff5337a..3e539bd2 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -22,14 +22,12 @@ void CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - auto tmpIntRegister = CX86Assembler::rAX; auto tmpXMMRegister = CX86Assembler::xMM2; - auto tmpXMM2Register = CX86Assembler::xMM3; auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - m_assembler.MovId(tmpIntRegister, 0x80000000); - m_assembler.VmovdVo(tmpXMMRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); + m_assembler.VpcmpeqdVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VpslldVo(tmpXMMRegister, tmpXMMRegister, 31); m_assembler.VxorpsVo(dstRegister, src1Register, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); CommitSymbolRegisterFpuAvx(dst, dstRegister); }