diff --git a/include/Jitter_CodeGen_x86.h b/include/Jitter_CodeGen_x86.h index 803632fa..23e16875 100644 --- a/include/Jitter_CodeGen_x86.h +++ b/include/Jitter_CodeGen_x86.h @@ -721,12 +721,22 @@ namespace Jitter //FPUOP AVX template void Emit_Fpu_Avx_MemMem(const STATEMENT&); - template void Emit_Fpu_Avx_MemMemMem(const STATEMENT&); + template void Emit_Fpu_Avx_VarVarVar(const STATEMENT&); + + void Emit_Fp_Avx_Neg_VarVar(const STATEMENT&); + void Emit_Fp_Avx_Abs_VarVar(const STATEMENT&); + void Emit_Fp_Avx_Mov_Reg32RelI32(const STATEMENT&); + void Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT&); void Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT&); void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&); void Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT&); void Emit_Fp_Avx_Mov_RelSRelI32(const STATEMENT&); + + void Emit_Fp_Avx_Mov_RelSReg(const STATEMENT&); + void Emit_Fp_Avx_Mov_RegSRelS(const STATEMENT&); + void Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT&); + void Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT&); //MDOP AVX @@ -770,6 +780,9 @@ namespace Jitter CX86Assembler::XMMREGISTER PrepareSymbolRegisterDefMd(CSymbol*, CX86Assembler::XMMREGISTER); CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); void CommitSymbolRegisterMdAvx(CSymbol*, CX86Assembler::XMMREGISTER); + CX86Assembler::XMMREGISTER PrepareSymbolRegisterDefFpu(CSymbol*, CX86Assembler::XMMREGISTER); + CX86Assembler::XMMREGISTER PrepareSymbolRegisterUseFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); + void CommitSymbolRegisterFpuAvx(CSymbol*, CX86Assembler::XMMREGISTER); virtual CX86Assembler::REGISTER PrepareRefSymbolRegisterUse(CSymbol*, CX86Assembler::REGISTER) = 0; diff --git a/include/X86Assembler.h b/include/X86Assembler.h index 7023372a..dc2980e5 100644 --- a/include/X86Assembler.h +++ b/include/X86Assembler.h @@ -446,6 +446,7 @@ class CX86Assembler void VpshufbVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VpmovmskbVo(REGISTER, XMMREGISTER); + void VandpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VaddpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VsubpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VmulpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); @@ -454,6 +455,8 @@ class CX86Assembler void VcmpltpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VcmpgtpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VxorpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); + void VminpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); void VmaxpsVo(XMMREGISTER, XMMREGISTER, const CAddress&); diff --git a/src/Jitter_CodeGen_x86.cpp b/src/Jitter_CodeGen_x86.cpp index 67b0f8c1..db0186eb 100644 --- a/src/Jitter_CodeGen_x86.cpp +++ b/src/Jitter_CodeGen_x86.cpp @@ -1342,3 +1342,66 @@ void CCodeGen_x86::CommitSymbolRegisterMdAvx(CSymbol* symbol, CX86Assembler::XMM break; } } +CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterDefFpu(CSymbol* symbol, CX86Assembler::XMMREGISTER preferedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + return m_mdRegisters[symbol->m_valueLow]; + break; + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + return preferedRegister; + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} + +CX86Assembler::XMMREGISTER CCodeGen_x86::PrepareSymbolRegisterUseFpuAvx(CSymbol* symbol, CX86Assembler::XMMREGISTER preferedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + return m_mdRegisters[symbol->m_valueLow]; + break; + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + m_assembler.VmovssEd(preferedRegister, MakeMemory128SymbolAddress(symbol)); + return preferedRegister; + break; + case SYM_FP_REL_INT32: + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + m_assembler.VmovssEd(preferedRegister, MakeMemoryFpSingleSymbolAddress(symbol)); + return preferedRegister; + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} + +void CCodeGen_x86::CommitSymbolRegisterFpuAvx(CSymbol* symbol, CX86Assembler::XMMREGISTER usedRegister) +{ + switch(symbol->m_type) + { + case SYM_REGISTER128: + assert(usedRegister == m_mdRegisters[symbol->m_valueLow]); + break; + case SYM_TEMPORARY128: + case SYM_RELATIVE128: + m_assembler.VmovssEd(MakeMemory128SymbolAddress(symbol), usedRegister); + break; + case SYM_FP_REL_SINGLE: + case SYM_FP_TMP_SINGLE: + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(symbol), usedRegister); + break; + default: + throw std::runtime_error("Invalid symbol type."); + break; + } +} diff --git a/src/Jitter_CodeGen_x86_Fpu.cpp b/src/Jitter_CodeGen_x86_Fpu.cpp index 5ab774d8..22b57ecc 100644 --- a/src/Jitter_CodeGen_x86_Fpu.cpp +++ b/src/Jitter_CodeGen_x86_Fpu.cpp @@ -20,6 +20,7 @@ CX86Assembler::CAddress CCodeGen_x86::MakeMemoryFpSingleSymbolAddress(CSymbol* s { switch(symbol->m_type) { + case SYM_FP_REL_INT32: case SYM_FP_REL_SINGLE: return MakeRelativeFpSingleSymbolAddress(symbol); break; diff --git a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp index 318e172b..3e539bd2 100644 --- a/src/Jitter_CodeGen_x86_Fpu_Avx.cpp +++ b/src/Jitter_CodeGen_x86_Fpu_Avx.cpp @@ -8,23 +8,58 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMem(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - ((m_assembler).*(FPUOP::OpEdAvx()))(CX86Assembler::xMM0, CX86Assembler::xMM0, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), CX86Assembler::xMM0); + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + + CommitSymbolRegisterFpuAvx(dst, dstRegister); + +} + +void CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + auto tmpXMMRegister = CX86Assembler::xMM2; + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + + m_assembler.VpcmpeqdVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VpslldVo(tmpXMMRegister, tmpXMMRegister, 31); + m_assembler.VxorpsVo(dstRegister, src1Register, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + CommitSymbolRegisterFpuAvx(dst, dstRegister); +} + +void CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + auto tmpXMMRegister = CX86Assembler::xMM2; + + m_assembler.VpcmpeqdVo(tmpXMMRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(tmpXMMRegister)); + m_assembler.VpsrldVo(tmpXMMRegister, tmpXMMRegister, 1); + m_assembler.VandpsVo(dstRegister, tmpXMMRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + CommitSymbolRegisterFpuAvx(dst, dstRegister); } template -void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement) +void CCodeGen_x86::Emit_Fpu_Avx_VarVarVar(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); auto src2 = statement.src2->GetSymbol().get(); - auto dstRegister = CX86Assembler::xMM0; - auto src1Register = CX86Assembler::xMM1; + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); + auto src2Register = PrepareSymbolRegisterUseFpuAvx(src2, CX86Assembler::xMM2); - m_assembler.VmovssEd(src1Register, MakeMemoryFpSingleSymbolAddress(src1)); - ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src1Register, MakeMemoryFpSingleSymbolAddress(src2)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister); + ((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src1Register, CX86Assembler::MakeXmmRegisterAddress(src2Register)); + CommitSymbolRegisterFpuAvx(dst, dstRegister); } void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) @@ -34,12 +69,14 @@ void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement) auto src2 = statement.src2->GetSymbol().get(); auto dstReg = PrepareSymbolRegisterDef(dst, CX86Assembler::rAX); - auto cmpReg = CX86Assembler::xMM0; - auto resReg = CX86Assembler::xMM1; + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM0); + auto src2Register = PrepareSymbolRegisterUseFpuAvx(src2, CX86Assembler::xMM1); + auto cmpReg = CX86Assembler::xMM2; + auto resReg = CX86Assembler::xMM3; auto conditionCode = GetSseConditionCode(statement.jmpCondition); - m_assembler.VmovssEd(cmpReg, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VcmpssEd(resReg, cmpReg, MakeMemoryFpSingleSymbolAddress(src2), conditionCode); + m_assembler.VmovssEd(cmpReg, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + m_assembler.VcmpssEd(resReg, cmpReg, CX86Assembler::MakeXmmRegisterAddress(src2Register), conditionCode); m_assembler.VmovdVo(CX86Assembler::MakeRegisterAddress(dstReg), resReg); CommitSymbolRegister(dst, dstReg); @@ -51,14 +88,14 @@ void CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT& statement) auto src1 = statement.src1->GetSymbol().get(); auto tmpIntRegister = CX86Assembler::rAX; - auto resultRegister = CX86Assembler::xMM0; - auto sqrtRegister = CX86Assembler::xMM1; + auto resultRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto sqrtRegister = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - m_assembler.VsqrtssEd(sqrtRegister, CX86Assembler::xMM0, MakeMemoryFpSingleSymbolAddress(src1)); + m_assembler.VsqrtssEd(sqrtRegister, CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(sqrtRegister)); m_assembler.MovId(tmpIntRegister, 0x3F800000); m_assembler.VmovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); m_assembler.VdivssEd(resultRegister, resultRegister, CX86Assembler::MakeXmmRegisterAddress(sqrtRegister)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), resultRegister); + CommitSymbolRegisterFpuAvx(dst, resultRegister); } void CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT& statement) @@ -67,14 +104,79 @@ void CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem(const STATEMENT& statement) auto src1 = statement.src1->GetSymbol().get(); auto tmpIntRegister = CX86Assembler::rAX; - auto resultRegister = CX86Assembler::xMM0; + auto resultRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + auto src1Register = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); m_assembler.MovId(tmpIntRegister, 0x3F800000); m_assembler.VmovdVo(resultRegister, CX86Assembler::MakeRegisterAddress(tmpIntRegister)); - m_assembler.VdivssEd(resultRegister, resultRegister, MakeMemoryFpSingleSymbolAddress(src1)); - m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), resultRegister); + m_assembler.VdivssEd(resultRegister, resultRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register)); + + CommitSymbolRegisterFpuAvx(dst, resultRegister); + +} + +void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_FP_REL_INT32); + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0); + m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); +} + +void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg32RelI32(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER); + assert(src1->m_type == SYM_FP_REL_INT32); + auto dstRegister = CX86Assembler::xMM3; + auto dstReg = PrepareSymbolRegisterDef(dst, CX86Assembler::rAX); + + m_assembler.Vcvtsi2ssEd(dstRegister, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); + m_assembler.VmovdVo(CX86Assembler::MakeRegisterAddress(dstReg), dstRegister); +} + +void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_FP_REL_SINGLE); + assert(src1->m_type == SYM_REGISTER128); + auto src1Register = PrepareSymbolRegisterDefMd(src1, CX86Assembler::xMM1); + m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), src1Register); } + +void CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_FP_REL_SINGLE); + auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM1); + m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1)); +} + +void CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel(const STATEMENT& statement) +{ + auto dst = statement.dst->GetSymbol().get(); + auto src1 = statement.src1->GetSymbol().get(); + + assert(dst->m_type == SYM_REGISTER128); + assert(src1->m_type == SYM_RELATIVE); + auto dstRegister = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM1); + auto src1Reg = PrepareSymbolRegisterUse(src1, CX86Assembler::rAX); + + m_assembler.VmovdVo(dstRegister, CX86Assembler::MakeRegisterAddress(src1Reg)); +} + + void CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32(const STATEMENT& statement) { auto dst = statement.dst->GetSymbol().get(); @@ -92,30 +194,129 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement) auto dst = statement.dst->GetSymbol().get(); auto src1 = statement.src1->GetSymbol().get(); - assert(dst->m_type == SYM_FP_REL_SINGLE); - assert(src1->m_type == SYM_FP_REL_SINGLE); + auto src1Reg = PrepareSymbolRegisterUseFpuAvx(src1, CX86Assembler::xMM1); - m_assembler.Vcvttss2siEd(CX86Assembler::rAX, CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, src1->m_valueLow)); - m_assembler.MovGd(CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, dst->m_valueLow), CX86Assembler::rAX); + m_assembler.Vcvttss2siEd(CX86Assembler::rAX, CX86Assembler::MakeXmmRegisterAddress(src1Reg)); + if( + dst->m_type == SYM_RELATIVE128 || dst->m_type == SYM_TEMPORARY128 + || dst->m_type == SYM_FP_REL_SINGLE || dst->m_type == SYM_FP_TMP_SINGLE + || dst->m_type == SYM_REGISTER128 + ) + { + auto dstReg = PrepareSymbolRegisterDefFpu(dst, CX86Assembler::xMM0); + m_assembler.VmovdVo(dstReg, CX86Assembler::MakeRegisterAddress(CX86Assembler::rAX)); + CommitSymbolRegisterFpuAvx(dst, dstReg); + } + else if(dst->m_type == SYM_RELATIVE || dst->m_type == SYM_TEMPORARY) + { + m_assembler.MovGd(CX86Assembler::MakeIndRegOffAddress(CX86Assembler::rBP, dst->m_valueLow), CX86Assembler::rAX); + } + else + { + throw std::exception(); + } } CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] = { - { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, - { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem }, + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ADD, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_SUB, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MUL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_DIV, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MAX, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + { OP_FP_MIN, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_VarVarVar }, + + { OP_FP_ABS, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_ABS, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_ABS, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + { OP_FP_ABS, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Abs_VarVar }, + + { OP_FP_NEG, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, + { OP_FP_NEG, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, + { OP_FP_NEG, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, + { OP_FP_NEG, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Neg_VarVar }, { OP_FP_CMP, MATCH_VARIABLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + { OP_FP_CMP, MATCH_VARIABLE, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem }, + + { OP_FP_RSQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + { OP_FP_RSQRT, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + { OP_FP_RSQRT, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, + // { OP_FP_SQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_SQRT, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_SQRT, MATCH_MEMORY_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, + { OP_FP_RCPL, MATCH_VARIABLE128, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, + { OP_FP_RCPL, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, { OP_FP_SQRT, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMem }, { OP_FP_RSQRT, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rsqrt_MemMem }, { OP_FP_RCPL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Rcpl_MemMem }, + { OP_MOV, MATCH_VARIABLE128, MATCH_VARIABLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_Reg128Rel }, + { OP_MOV, MATCH_VARIABLE, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_Reg32RelI32 }, + { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelI32 }, + { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_REGISTER128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSReg }, + { OP_MOV, MATCH_REGISTER128, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RegSRelS }, + { OP_MOV, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_INT32, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_Mov_RelSRelI32 }, { OP_FP_TOINT_TRUNC, MATCH_RELATIVE_FP_SINGLE, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_RELATIVE_FP_SINGLE, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, + { OP_FP_TOINT_TRUNC, MATCH_VARIABLE128, MATCH_RELATIVE_FP_SINGLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel }, { OP_MOV, MATCH_NIL, MATCH_NIL, MATCH_NIL, MATCH_NIL, nullptr }, }; diff --git a/src/Jitter_RegAlloc.cpp b/src/Jitter_RegAlloc.cpp index 2e3fb92a..d4b1c694 100644 --- a/src/Jitter_RegAlloc.cpp +++ b/src/Jitter_RegAlloc.cpp @@ -196,6 +196,7 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c return (symbolType == SYM_RELATIVE) || (symbolType == SYM_TEMPORARY) || (symbolType == SYM_REL_REFERENCE) || (symbolType == SYM_TMP_REFERENCE) || + (symbolType == SYM_FP_REL_SINGLE) || (symbolType == SYM_FP_TMP_SINGLE) || (symbolType == SYM_FP_REL_INT32) || (symbolType == SYM_RELATIVE128) || (symbolType == SYM_TEMPORARY128); }; @@ -245,7 +246,7 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c auto registerIterator = std::end(availableRegisters); auto registerIteratorEnd = std::end(availableRegisters); auto registerSymbolType = SYM_REGISTER; - if((symbol->m_type == SYM_RELATIVE) || (symbol->m_type == SYM_TEMPORARY)) + if((symbol->m_type == SYM_RELATIVE) || (symbol->m_type == SYM_TEMPORARY) || (symbol->m_type == SYM_FP_REL_INT32)) { registerIterator = availableRegisters.lower_bound(SYM_REGISTER); registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER); @@ -257,7 +258,10 @@ void CJitter::AssociateSymbolsToRegisters(SymbolRegAllocInfo& symbolRegAllocs) c registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER); registerSymbolType = SYM_REG_REFERENCE; } - else if((symbol->m_type == SYM_RELATIVE128) || (symbol->m_type == SYM_TEMPORARY128)) + else if( + (symbol->m_type == SYM_RELATIVE128) || (symbol->m_type == SYM_TEMPORARY128) || + (symbol->m_type == SYM_FP_REL_SINGLE) || (symbol->m_type == SYM_FP_TMP_SINGLE) + ) { registerIterator = availableRegisters.lower_bound(SYM_REGISTER128); registerIteratorEnd = availableRegisters.upper_bound(SYM_REGISTER128); diff --git a/src/X86Assembler_Avx.cpp b/src/X86Assembler_Avx.cpp index abee2ba0..1540b55a 100644 --- a/src/X86Assembler_Avx.cpp +++ b/src/X86Assembler_Avx.cpp @@ -386,6 +386,11 @@ void CX86Assembler::VpmovmskbVo(REGISTER dst, XMMREGISTER src) WriteVexVoOp(VEX_OPCODE_MAP_66, 0xD7, static_cast(dst), CX86Assembler::xMM0, CX86Assembler::MakeXmmRegisterAddress(src)); } +void CX86Assembler::VandpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x54, dst, src1, src2); +} + void CX86Assembler::VaddpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x58, dst, src1, src2); @@ -416,6 +421,11 @@ void CX86Assembler::VcmpgtpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress VcmppsVo(dst, src1, src2, SSE_CMP_NLE); } +void CX86Assembler::VxorpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) +{ + WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x57, dst, src1, src2); +} + void CX86Assembler::VminpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2) { WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0x5D, dst, src1, src2);