diff --git a/examples_tests b/examples_tests index 829ea34183..1a9b50718a 160000 --- a/examples_tests +++ b/examples_tests @@ -1 +1 @@ -Subproject commit 829ea34183a0a62a3bd68ded4dd9e451b97126d4 +Subproject commit 1a9b50718aafe8a53229e1f5aa231b64441ac8f4 diff --git a/include/ICameraSceneNode.h b/include/ICameraSceneNode.h index e3975e3802..577b6d0fb6 100644 --- a/include/ICameraSceneNode.h +++ b/include/ICameraSceneNode.h @@ -6,6 +6,9 @@ #ifndef __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ #define __NBL_I_CAMERA_SCENE_NODE_H_INCLUDED__ +#include +#include + #include "ISceneNode.h" #include "matrixutil.h" @@ -46,17 +49,17 @@ class ICameraSceneNode : public ISceneNode The function will figure it out if you've set an orthogonal matrix. \param projection The new projection matrix of the camera. */ - virtual void setProjectionMatrix(const core::matrix4SIMD& projection) =0; + virtual void setProjectionMatrix(const hlsl::float32_t4x4& projection) =0; //! Gets the current projection matrix of the camera. /** \return The current projection matrix of the camera. */ - inline const core::matrix4SIMD& getProjectionMatrix() const { return projMatrix; } + inline const hlsl::float32_t4x4& getProjectionMatrix() const { return projMatrix; } //! Gets the current view matrix of the camera. /** \return The current view matrix of the camera. */ - virtual const core::matrix3x4SIMD& getViewMatrix() const =0; + virtual const hlsl::float32_t3x4& getViewMatrix() const =0; - virtual const core::matrix4SIMD& getConcatenatedMatrix() const =0; + virtual const hlsl::float32_t4x4& getConcatenatedMatrix() const =0; #if 0 //! It is possible to send mouse and key events to the camera. /** Most cameras may ignore this input, but camera scene nodes @@ -198,7 +201,7 @@ class ICameraSceneNode : public ISceneNode float ZFar; // Z-value of the far view-plane. // actual projection matrix used - core::matrix4SIMD projMatrix; + hlsl::float32_t4x4 projMatrix; bool leftHanded; }; diff --git a/include/matrix3x4SIMD.h b/include/matrix3x4SIMD.h deleted file mode 100644 index d52f305cec..0000000000 --- a/include/matrix3x4SIMD.h +++ /dev/null @@ -1,263 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX3X4SIMD_H_INCLUDED__ -#define __NBL_MATRIX3X4SIMD_H_INCLUDED__ - -#include "vectorSIMD.h" -#include "quaternion.h" - -namespace nbl::core -{ - -class matrix4x3; - -#define _NBL_MATRIX_ALIGNMENT _NBL_SIMD_ALIGNMENT -static_assert(_NBL_MATRIX_ALIGNMENT>=_NBL_VECTOR_ALIGNMENT,"Matrix must be equally or more aligned than vector!"); - -//! Equivalent of GLSL's mat4x3 -class matrix3x4SIMD// : private AllocationOverrideBase<_NBL_MATRIX_ALIGNMENT> EBO inheritance problem w.r.t `rows[3]` -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 3u; - vectorSIMDf rows[VectorCount]; - - explicit matrix3x4SIMD( const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f)) : rows{_r0, _r1, _r2} - { - } - - matrix3x4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23) - : matrix3x4SIMD(vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23)) - { - } - - explicit matrix3x4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i); - } - matrix3x4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4*i, ALIGNED); - } - - float* pointer() { return rows[0].pointer; } - const float* pointer() const { return rows[0].pointer; } - - inline matrix3x4SIMD& set(const matrix4x3& _retarded); - inline matrix4x3 getAsRetardedIrrlichtMatrix() const; - - static inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - static inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b); - - inline matrix3x4SIMD& concatenateAfter(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(*this, _other); - } - - inline matrix3x4SIMD& concatenateBefore(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByA(_other, *this); - } - - inline matrix3x4SIMD& concatenateAfterPrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(*this, _other); - } - - inline matrix3x4SIMD& concatenateBeforePrecisely(const matrix3x4SIMD& _other) - { - return *this = concatenateBFollowedByAPrecisely(_other, *this); - } - - inline bool operator==(const matrix3x4SIMD& _other) - { - return !(*this != _other); - } - - inline bool operator!=(const matrix3x4SIMD& _other); - - - inline matrix3x4SIMD operator-() const - { - matrix3x4SIMD retval; - retval.rows[0] = -rows[0]; - retval.rows[1] = -rows[1]; - retval.rows[2] = -rows[2]; - return retval; - } - - - inline matrix3x4SIMD& operator+=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator+(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval += _other; - } - - inline matrix3x4SIMD& operator-=(const matrix3x4SIMD& _other); - inline matrix3x4SIMD operator-(const matrix3x4SIMD& _other) const - { - matrix3x4SIMD retval(*this); - return retval -= _other; - } - - inline matrix3x4SIMD& operator*=(float _scalar); - inline matrix3x4SIMD operator*(float _scalar) const - { - matrix3x4SIMD retval(*this); - return retval *= _scalar; - } - - inline matrix3x4SIMD& setTranslation(const vectorSIMDf& _translation) - { - // no faster way of doing it? - rows[0].w = _translation.x; - rows[1].w = _translation.y; - rows[2].w = _translation.z; - return *this; - } - inline vectorSIMDf getTranslation() const; - inline vectorSIMDf getTranslation3D() const; - - inline matrix3x4SIMD& setScale(const vectorSIMDf& _scale); - - inline vectorSIMDf getScale() const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _in_out) const - { - transformVect(_in_out, _in_out); - } - - inline void pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void pseudoMulWith4x1(vectorSIMDf& _in_out) const - { - pseudoMulWith4x1(_in_out,_in_out); - } - - inline void mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void mulSub3x3WithNx1(vectorSIMDf& _in_out) const - { - mulSub3x3WithNx1(_in_out, _in_out); - } - - inline static matrix3x4SIMD buildCameraLookAtMatrixLH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - inline static matrix3x4SIMD buildCameraLookAtMatrixRH( - const vectorSIMDf& position, - const vectorSIMDf& target, - const vectorSIMDf& upVector); - - inline matrix3x4SIMD& setRotation(const quaternion& _quat); - - inline matrix3x4SIMD& setScaleRotationAndTranslation( const vectorSIMDf& _scale, - const quaternion& _quat, - const vectorSIMDf& _translation); - - inline vectorSIMDf getPseudoDeterminant() const - { - vectorSIMDf tmp; - return determinant_helper(tmp); - } - - inline bool getInverse(matrix3x4SIMD& _out) const; - bool makeInverse() - { - matrix3x4SIMD tmp; - - if (getInverse(tmp)) - { - *this = tmp; - return true; - } - return false; - } - - // - inline bool getSub3x3InverseTranspose(matrix3x4SIMD& _out) const; - - // - inline bool getSub3x3InverseTransposePacked(float outRows[9]) const - { - matrix3x4SIMD tmp; - if (!getSub3x3InverseTranspose(tmp)) - return false; - - float* _out = outRows; - for (auto i=0; i<3; i++) - { - const auto& row = tmp.rows[i]; - for (auto j=0; j<3; j++) - *(_out++) = row[j]; - } - - return true; - } - - // - inline core::matrix3x4SIMD getSub3x3TransposeCofactors() const; - - // - inline void setTransformationCenter(const vectorSIMDf& _center, const vectorSIMDf& _translation); - - // - static inline matrix3x4SIMD buildAxisAlignedBillboard( - const vectorSIMDf& camPos, - const vectorSIMDf& center, - const vectorSIMDf& translation, - const vectorSIMDf& axis, - const vectorSIMDf& from); - - - // - float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - const float& operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - - // - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - static inline vectorSIMDf doJob(const __m128& a, const matrix3x4SIMD& _mtx); - - // really need that dvec<2> or wider - inline __m128d halfRowAsDouble(size_t _n, bool _0) const; - static inline __m128d doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf); - - vectorSIMDf determinant_helper(vectorSIMDf& r1crossr2) const - { - r1crossr2 = core::cross(rows[1], rows[2]); - return core::dot(rows[0], r1crossr2); - } -}; - -inline matrix3x4SIMD concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix3x4SIMD concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - return matrix3x4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - -} - -#endif diff --git a/include/matrix3x4SIMD_impl.h b/include/matrix3x4SIMD_impl.h deleted file mode 100644 index 0e9022efd0..0000000000 --- a/include/matrix3x4SIMD_impl.h +++ /dev/null @@ -1,470 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ -#define _NBL_MATRIX3X4SIMD_IMPL_H_INCLUDED_ - -#include "matrix3x4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" - -namespace nbl::core -{ - -// TODO: move to another implementation header -inline quaternion::quaternion(const matrix3x4SIMD& m) -{ - const vectorSIMDf one(1.f); - auto Qx = m.rows[0].xxxx()^vectorSIMDu32(0,0,0x80000000u,0x80000000u); - auto Qy = m.rows[1].yyyy()^vectorSIMDu32(0,0x80000000u,0,0x80000000u); - auto Qz = m.rows[2].zzzz()^vectorSIMDu32(0,0x80000000u,0x80000000u,0); - - auto tmp = one+Qx+Qy+Qz; - auto invscales = inversesqrt(tmp)*0.5f; - auto scales = tmp*invscales*0.5f; - - // TODO: speed this up - if (tmp.x > 0.0f) - { - X = (m(2, 1) - m(1, 2)) * invscales.x; - Y = (m(0, 2) - m(2, 0)) * invscales.x; - Z = (m(1, 0) - m(0, 1)) * invscales.x; - W = scales.x; - } - else - { - if (tmp.y>0.f) - { - X = scales.y; - Y = (m(0, 1) + m(1, 0)) * invscales.y; - Z = (m(2, 0) + m(0, 2)) * invscales.y; - W = (m(2, 1) - m(1, 2)) * invscales.y; - } - else if (tmp.z>0.f) - { - X = (m(0, 1) + m(1, 0)) * invscales.z; - Y = scales.z; - Z = (m(1, 2) + m(2, 1)) * invscales.z; - W = (m(0, 2) - m(2, 0)) * invscales.z; - } - else - { - X = (m(0, 2) + m(2, 0)) * invscales.w; - Y = (m(1, 2) + m(2, 1)) * invscales.w; - Z = scales.w; - W = (m(1, 0) - m(0, 1)) * invscales.w; - } - } - - *this = normalize(*this); -} - -inline bool matrix3x4SIMD::operator!=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix3x4SIMD& matrix3x4SIMD::operator+=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator-=(const matrix3x4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} -inline matrix3x4SIMD& matrix3x4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_XORMASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_ ? 0x80000000u:0x0u, _y_ ? 0x80000000u:0x0u, _z_ ? 0x80000000u:0x0u, _w_ ? 0x80000000u:0x0u) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByA(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ -#ifdef _NBL_DEBUG - assert(is_aligned_to(&_a, _NBL_SIMD_ALIGNMENT)); - assert(is_aligned_to(&_b, _NBL_SIMD_ALIGNMENT)); -#endif // _NBL_DEBUG - __m128 r0 = _a.rows[0].getAsRegister(); - __m128 r1 = _a.rows[1].getAsRegister(); - __m128 r2 = _a.rows[2].getAsRegister(); - - matrix3x4SIMD out; - out.rows[0] = matrix3x4SIMD::doJob(r0, _b); - out.rows[1] = matrix3x4SIMD::doJob(r1, _b); - out.rows[2] = matrix3x4SIMD::doJob(r2, _b); - - return out; -} - -inline matrix3x4SIMD matrix3x4SIMD::concatenateBFollowedByAPrecisely(const matrix3x4SIMD& _a, const matrix3x4SIMD& _b) -{ - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - - matrix3x4SIMD out; - - const __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - - __m128 second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - second = _mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(matrix3x4SIMD::doJob_d(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - - return out; -} - -inline vectorSIMDf matrix3x4SIMD::getTranslation() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setr_ps(0.f, 0.f, 0.f, 1.f)); // (2z,3z,2w,3w) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,3w) - - return xmm2; -} -inline vectorSIMDf matrix3x4SIMD::getTranslation3D() const -{ - __m128 xmm0 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 xmm1 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 xmm2 = _mm_movehl_ps(xmm1, xmm0);// (0w,1w,2w,0) - - return xmm2; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const vectorSIMDu32 mask0010 = vectorSIMDu32(BUILD_MASKF(0, 0, 1, 0)); - const vectorSIMDu32 mask0100 = vectorSIMDu32(BUILD_MASKF(0, 1, 0, 0)); - const vectorSIMDu32 mask1000 = vectorSIMDu32(BUILD_MASKF(1, 0, 0, 0)); - - const vectorSIMDu32& scaleAlias = reinterpret_cast(_scale); - - vectorSIMDu32& rowAlias0 = reinterpret_cast(rows[0]); - vectorSIMDu32& rowAlias1 = reinterpret_cast(rows[1]); - vectorSIMDu32& rowAlias2 = reinterpret_cast(rows[2]); - rowAlias0 = (scaleAlias & reinterpret_cast(mask1000)) | (rowAlias0 & reinterpret_cast(mask0001)); - rowAlias1 = (scaleAlias & reinterpret_cast(mask0100)) | (rowAlias1 & reinterpret_cast(mask0001)); - rowAlias2 = (scaleAlias & reinterpret_cast(mask0010)) | (rowAlias2 & reinterpret_cast(mask0001)); - - return *this; -} - -inline core::vectorSIMDf matrix3x4SIMD::getScale() const -{ - // xmm4-7 will now become columuns of B - __m128 xmm4 = rows[0].getAsRegister(); - __m128 xmm5 = rows[1].getAsRegister(); - __m128 xmm6 = rows[2].getAsRegister(); - __m128 xmm7 = _mm_setzero_ps(); - // g==0 - __m128 xmm0 = _mm_unpacklo_ps(xmm4, xmm5); - __m128 xmm1 = _mm_unpacklo_ps(xmm6, xmm7); // (2x,g,2y,g) - __m128 xmm2 = _mm_unpackhi_ps(xmm4, xmm5); - __m128 xmm3 = _mm_unpackhi_ps(xmm6, xmm7); // (2z,g,2w,g) - xmm4 = _mm_movelh_ps(xmm1, xmm0); //(0x,1x,2x,g) - xmm5 = _mm_movehl_ps(xmm1, xmm0); - xmm6 = _mm_movelh_ps(xmm3, xmm2); //(0z,1z,2z,g) - - // See http://www.robertblum.com/articles/2005/02/14/decomposing-matrices - // We have to do the full calculation. - xmm0 = _mm_mul_ps(xmm4, xmm4);// column 0 squared - xmm1 = _mm_mul_ps(xmm5, xmm5);// column 1 squared - xmm2 = _mm_mul_ps(xmm6, xmm6);// column 2 squared - xmm4 = _mm_hadd_ps(xmm0, xmm1); - xmm5 = _mm_hadd_ps(xmm2, xmm7); - xmm6 = _mm_hadd_ps(xmm4, xmm5); - - return _mm_sqrt_ps(xmm6); -} - -inline void matrix3x4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r0 = rows[0] * _in, - r1 = rows[1] * _in, - r2 = rows[2] * _in; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_set1_ps(0.25f)) - ); -} - -inline void matrix3x4SIMD::pseudoMulWith4x1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - _out = (_in & mask1110) | _mm_castps_si128(vectorSIMDf(0.f, 0.f, 0.f, 1.f).getAsRegister()); - transformVect(_out); -} - -inline void matrix3x4SIMD::mulSub3x3WithNx1(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - auto maskedIn = _in & BUILD_MASKF(1, 1, 1, 0); - vectorSIMDf r0 = rows[0] * maskedIn, - r1 = rows[1] * maskedIn, - r2 = rows[2] * maskedIn; - - _out = - _mm_hadd_ps( - _mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), - _mm_hadd_ps(r2.getAsRegister(), _mm_setzero_ps()) - ); -} - - -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixLH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(target - position); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} -inline matrix3x4SIMD matrix3x4SIMD::buildCameraLookAtMatrixRH( - const core::vectorSIMDf& position, - const core::vectorSIMDf& target, - const core::vectorSIMDf& upVector) -{ - const core::vectorSIMDf zaxis = core::normalize(position - target); - const core::vectorSIMDf xaxis = core::normalize(core::cross(upVector, zaxis)); - const core::vectorSIMDf yaxis = core::cross(zaxis, xaxis); - - matrix3x4SIMD r; - r.rows[0] = xaxis; - r.rows[1] = yaxis; - r.rows[2] = zaxis; - r.rows[0].w = -dot(xaxis, position)[0]; - r.rows[1].w = -dot(yaxis, position)[0]; - r.rows[2].w = -dot(zaxis, position)[0]; - - return r; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setRotation(const core::quaternion& _quat) -{ - const vectorSIMDu32 mask0001 = vectorSIMDu32(BUILD_MASKF(0, 0, 0, 1)); - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const core::vectorSIMDf& quat = reinterpret_cast(_quat); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * vectorSIMDf(2.f))) + (quat.zzzz() * (quat.zwxx() & mask1110) * vectorSIMDf(2.f, -2.f, 2.f, 0.f))) | (reinterpret_cast(rows[0]) & (mask0001)); - rows[0].x = 1.f - rows[0].x; - - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * vectorSIMDf(2.f))) + (quat.xxxx() * (quat.yxwx() & mask1110) * vectorSIMDf(2.f, 2.f, -2.f, 0.f))) | (reinterpret_cast(rows[1]) & (mask0001)); - rows[1].y = 1.f - rows[1].y; - - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * vectorSIMDf(2.f))) + (quat.yyyy() * (quat.wzyx() & mask1110) * vectorSIMDf(-2.f, 2.f, 2.f, 0.f))) | (reinterpret_cast(rows[2]) & (mask0001)); - rows[2].z = 1.f - rows[2].z; - - return *this; -} - -inline matrix3x4SIMD& matrix3x4SIMD::setScaleRotationAndTranslation(const vectorSIMDf& _scale, const core::quaternion& _quat, const vectorSIMDf& _translation) -{ - const __m128i mask1110 = BUILD_MASKF(1, 1, 1, 0); - - const vectorSIMDf& quat = reinterpret_cast(_quat); - const vectorSIMDf dblScale = (_scale * 2.f) & mask1110; - - vectorSIMDf mlt = dblScale ^ BUILD_XORMASKF(0, 1, 0, 0); - rows[0] = ((quat.yyyy() * ((quat.yxwx() & mask1110) * dblScale)) + (quat.zzzz() * (quat.zwxx() & mask1110) * mlt)); - rows[0].x = _scale.x - rows[0].x; - - mlt = dblScale ^ BUILD_XORMASKF(0, 0, 1, 0); - rows[1] = ((quat.zzzz() * ((quat.wzyx() & mask1110) * dblScale)) + (quat.xxxx() * (quat.yxwx() & mask1110) * mlt)); - rows[1].y = _scale.y - rows[1].y; - - mlt = dblScale ^ BUILD_XORMASKF(1, 0, 0, 0); - rows[2] = ((quat.xxxx() * ((quat.zwxx() & mask1110) * dblScale)) + (quat.yyyy() * (quat.wzyx() & mask1110) * mlt)); - rows[2].z = _scale.z - rows[2].z; - - setTranslation(_translation); - - return *this; -} - - -inline bool matrix3x4SIMD::getInverse(matrix3x4SIMD& _out) const //! SUBOPTIMAL - OPTIMIZE! -{ - auto translation = getTranslation(); - // `tmp` will have columns in its `rows` - core::matrix4SIMD tmp; - auto* cols = tmp.rows; - if (!getSub3x3InverseTranspose(reinterpret_cast(tmp))) - return false; - - // find inverse post-translation - cols[3] = -cols[0]*translation.xxxx()-cols[1]*translation.yyyy()-cols[2]*translation.zzzz(); - - // columns into rows - _out = transpose(tmp).extractSub3x4(); - - return true; -} - -inline bool matrix3x4SIMD::getSub3x3InverseTranspose(core::matrix3x4SIMD& _out) const -{ - vectorSIMDf r1crossr2; - const vectorSIMDf d = determinant_helper(r1crossr2); - if (core::iszero(d.x, FLT_MIN)) - return false; - auto rcp = core::reciprocal(d); - - // matrix of cofactors * 1/det - _out = getSub3x3TransposeCofactors(); - _out.rows[0] *= rcp; - _out.rows[1] *= rcp; - _out.rows[2] *= rcp; - - return true; -} - -inline core::matrix3x4SIMD matrix3x4SIMD::getSub3x3TransposeCofactors() const -{ - core::matrix3x4SIMD _out; - _out.rows[0] = core::cross(rows[1], rows[2]); - _out.rows[1] = core::cross(rows[2], rows[0]); - _out.rows[2] = core::cross(rows[0], rows[1]); - return _out; -} - -// TODO: Double check this!- -inline void matrix3x4SIMD::setTransformationCenter(const core::vectorSIMDf& _center, const core::vectorSIMDf& _translation) -{ - core::vectorSIMDf r0 = rows[0] * _center; - core::vectorSIMDf r1 = rows[1] * _center; - core::vectorSIMDf r2 = rows[2] * _center; - core::vectorSIMDf r3(0.f, 0.f, 0.f, 1.f); - - __m128 col3 = _mm_hadd_ps(_mm_hadd_ps(r0.getAsRegister(), r1.getAsRegister()), _mm_hadd_ps(r2.getAsRegister(), r3.getAsRegister())); - const vectorSIMDf vcol3 = _center - _translation - col3; - - for (size_t i = 0u; i < VectorCount; ++i) - rows[i].w = vcol3.pointer[i]; -} - - -// TODO: Double check this! -inline matrix3x4SIMD matrix3x4SIMD::buildAxisAlignedBillboard( - const core::vectorSIMDf& camPos, - const core::vectorSIMDf& center, - const core::vectorSIMDf& translation, - const core::vectorSIMDf& axis, - const core::vectorSIMDf& from) -{ - // axis of rotation - const core::vectorSIMDf up = core::normalize(axis); - const core::vectorSIMDf forward = core::normalize(camPos - center); - const core::vectorSIMDf right = core::normalize(core::cross(up, forward)); - - // correct look vector - const core::vectorSIMDf look = core::cross(right, up); - - // rotate from to - // axis multiplication by sin - const core::vectorSIMDf vs = core::cross(look, from); - - // cosinus angle - const core::vectorSIMDf ca = core::cross(from, look); - - const core::vectorSIMDf vt(up * (core::vectorSIMDf(1.f) - ca)); - const core::vectorSIMDf wt = vt * up.yzxx(); - const core::vectorSIMDf vtuppca = vt * up + ca; - - matrix3x4SIMD mat; - core::vectorSIMDf& row0 = mat.rows[0]; - core::vectorSIMDf& row1 = mat.rows[1]; - core::vectorSIMDf& row2 = mat.rows[2]; - - row0 = vtuppca & BUILD_MASKF(1, 0, 0, 0); - row1 = vtuppca & BUILD_MASKF(0, 1, 0, 0); - row2 = vtuppca & BUILD_MASKF(0, 0, 1, 0); - - row0 += (wt.xxzx() + vs.xzyx() * core::vectorSIMDf(1.f, 1.f, -1.f, 1.f)) & BUILD_MASKF(0, 1, 1, 0); - row1 += (wt.xxyx() + vs.zxxx() * core::vectorSIMDf(-1.f, 1.f, 1.f, 1.f)) & BUILD_MASKF(1, 0, 1, 0); - row2 += (wt.zyxx() + vs.yxxx() * core::vectorSIMDf(1.f, -1.f, 1.f, 1.f)) & BUILD_MASKF(1, 1, 0, 0); - - mat.setTransformationCenter(center, translation); - return mat; -} - - - -inline vectorSIMDf matrix3x4SIMD::doJob(const __m128& a, const matrix3x4SIMD& _mtx) -{ - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - - const __m128i mask = _mm_setr_epi32(0, 0, 0, 0xffffffff); - - vectorSIMDf res; - res = _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(0)), r0); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(1)), r1); - res += _mm_mul_ps(_mm_shuffle_ps(a, a, BROADCAST32(2)), r2); - res += vectorSIMDf(a) & mask; // always 0 0 0 a3 -- no shuffle needed - return res; - } - -inline __m128d matrix3x4SIMD::halfRowAsDouble(size_t _n, bool _0) const -{ - return _mm_cvtps_pd(_0 ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix3x4SIMD::doJob_d(const __m128d& _a0, const __m128d& _a1, const matrix3x4SIMD& _mtx, bool _xyHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _xyHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _xyHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _xyHalf); - - const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - if (!_xyHalf) - res = _mm_add_pd(res, _mm_and_pd(_a1, mask01)); - return res; -} - -#undef BUILD_MASKF -#undef BUILD_XORMASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -} // nbl::core - -#endif diff --git a/include/matrix4SIMD.h b/include/matrix4SIMD.h deleted file mode 100644 index 03126c61f7..0000000000 --- a/include/matrix4SIMD.h +++ /dev/null @@ -1,385 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_H_INCLUDED__ - -#include "matrix3x4SIMD.h" - -namespace nbl -{ -namespace core -{ - -template -class aabbox3d; - - -class matrix4SIMD// : public AlignedBase<_NBL_SIMD_ALIGNMENT> don't inherit from AlignedBase (which is empty) because member `rows[4]` inherits from it as well -{ - public: - _NBL_STATIC_INLINE_CONSTEXPR uint32_t VectorCount = 4u; - vectorSIMDf rows[VectorCount]; - - inline explicit matrix4SIMD(const vectorSIMDf& _r0 = vectorSIMDf(1.f, 0.f, 0.f, 0.f), - const vectorSIMDf& _r1 = vectorSIMDf(0.f, 1.f, 0.f, 0.f), - const vectorSIMDf& _r2 = vectorSIMDf(0.f, 0.f, 1.f, 0.f), - const vectorSIMDf& _r3 = vectorSIMDf(0.f, 0.f, 0.f, 1.f)) - : rows{ _r0, _r1, _r2, _r3 } - { - } - - inline matrix4SIMD( float _a00, float _a01, float _a02, float _a03, - float _a10, float _a11, float _a12, float _a13, - float _a20, float _a21, float _a22, float _a23, - float _a30, float _a31, float _a32, float _a33) - : matrix4SIMD( vectorSIMDf(_a00, _a01, _a02, _a03), - vectorSIMDf(_a10, _a11, _a12, _a13), - vectorSIMDf(_a20, _a21, _a22, _a23), - vectorSIMDf(_a30, _a31, _a32, _a33)) - { - } - - inline explicit matrix4SIMD(const float* const _data) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i); - } - inline matrix4SIMD(const float* const _data, bool ALIGNED) - { - if (!_data) - return; - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] = vectorSIMDf(_data + 4 * i, ALIGNED); - } - - inline explicit matrix4SIMD(const matrix3x4SIMD& smallMat) - { - *reinterpret_cast(this) = smallMat; - rows[3].set(0.f,0.f,0.f,1.f); - } - - inline matrix3x4SIMD extractSub3x4() const - { - return matrix3x4SIMD(rows[0],rows[1],rows[2]); - } - - //! Access by row - inline const vectorSIMDf& getRow(size_t _rown) const{ return rows[_rown]; } - inline vectorSIMDf& getRow(size_t _rown) { return rows[_rown]; } - - //! Access by element - inline float operator()(size_t _i, size_t _j) const { return rows[_i].pointer[_j]; } - inline float& operator()(size_t _i, size_t _j) { return rows[_i].pointer[_j]; } - - //! Access for memory - inline const float* pointer() const {return rows[0].pointer;} - inline float* pointer() {return rows[0].pointer;} - - - inline bool operator==(const matrix4SIMD& _other) const - { - return !(*this != _other); - } - inline bool operator!=(const matrix4SIMD& _other) const; - - inline matrix4SIMD& operator+=(const matrix4SIMD& _other); - inline matrix4SIMD operator+(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r += _other; - } - - inline matrix4SIMD& operator-=(const matrix4SIMD& _other); - inline matrix4SIMD operator-(const matrix4SIMD& _other) const - { - matrix4SIMD r{*this}; - return r -= _other; - } - - inline matrix4SIMD& operator*=(float _scalar); - inline matrix4SIMD operator*(float _scalar) const - { - matrix4SIMD r{*this}; - return r *= _scalar; - } - - static inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b); - static inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b); - - inline bool isIdentity() const - { - return *this == matrix4SIMD(); - } - inline bool isIdentity(float _tolerance) const; - - inline bool isOrthogonal() const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(); - } - inline bool isOrthogonal(float _tolerance) const - { - return concatenateBFollowedByA(transpose(*this), *this).isIdentity(_tolerance); - } - - inline matrix4SIMD& setScale(const core::vectorSIMDf& _scale); - inline matrix4SIMD& setScale(float _scale) - { - return setScale(vectorSIMDf(_scale)); - } - - inline void setTranslation(const float* _t) - { - for (size_t i = 0u; i < 3u; ++i) - rows[i].w = _t[i]; - } - //! Takes into account only x,y,z components of _t - inline void setTranslation(const vectorSIMDf& _t) - { - setTranslation(_t.pointer); - } - inline void setTranslation(const vector3d& _t) - { - setTranslation(&_t.X); - } - - //! Returns last column of the matrix. - inline vectorSIMDf getTranslation() const; - - //! Returns translation part of the matrix (w component is always 0). - inline vectorSIMDf getTranslation3D() const; - - enum class E_MATRIX_INVERSE_PRECISION - { - EMIP_FAST_RECIPROCAL, - EMIP_32BIT, - EMIP_64BBIT - }; - - template - inline bool getInverseTransform(matrix4SIMD& _out) const - { - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_64BBIT) - { - double a = rows[0][0], b = rows[0][1], c = rows[0][2], d = rows[0][3]; - double e = rows[1][0], f = rows[1][1], g = rows[1][2], h = rows[1][3]; - double i = rows[2][0], j = rows[2][1], k = rows[2][2], l = rows[2][3]; - double m = rows[3][0], n = rows[3][1], o = rows[3][2], p = rows[3][3]; - - double kp_lo = k * p - l * o; - double jp_ln = j * p - l * n; - double jo_kn = j * o - k * n; - double ip_lm = i * p - l * m; - double io_km = i * o - k * m; - double in_jm = i * n - j * m; - - double a11 = +(f * kp_lo - g * jp_ln + h * jo_kn); - double a12 = -(e * kp_lo - g * ip_lm + h * io_km); - double a13 = +(e * jp_ln - f * ip_lm + h * in_jm); - double a14 = -(e * jo_kn - f * io_km + g * in_jm); - - double det = a * a11 + b * a12 + c * a13 + d * a14; - - if (core::iszero(det, DBL_MIN)) - return false; - - double invDet = 1.0 / det; - - _out.rows[0][0] = a11 * invDet; - _out.rows[1][0] = a12 * invDet; - _out.rows[2][0] = a13 * invDet; - _out.rows[3][0] = a14 * invDet; - - _out.rows[0][1] = -(b * kp_lo - c * jp_ln + d * jo_kn) * invDet; - _out.rows[1][1] = +(a * kp_lo - c * ip_lm + d * io_km) * invDet; - _out.rows[2][1] = -(a * jp_ln - b * ip_lm + d * in_jm) * invDet; - _out.rows[3][1] = +(a * jo_kn - b * io_km + c * in_jm) * invDet; - - double gp_ho = g * p - h * o; - double fp_hn = f * p - h * n; - double fo_gn = f * o - g * n; - double ep_hm = e * p - h * m; - double eo_gm = e * o - g * m; - double en_fm = e * n - f * m; - - _out.rows[0][2] = +(b * gp_ho - c * fp_hn + d * fo_gn) * invDet; - _out.rows[1][2] = -(a * gp_ho - c * ep_hm + d * eo_gm) * invDet; - _out.rows[2][2] = +(a * fp_hn - b * ep_hm + d * en_fm) * invDet; - _out.rows[3][2] = -(a * fo_gn - b * eo_gm + c * en_fm) * invDet; - - double gl_hk = g * l - h * k; - double fl_hj = f * l - h * j; - double fk_gj = f * k - g * j; - double el_hi = e * l - h * i; - double ek_gi = e * k - g * i; - double ej_fi = e * j - f * i; - - _out.rows[0][3] = -(b * gl_hk - c * fl_hj + d * fk_gj) * invDet; - _out.rows[1][3] = +(a * gl_hk - c * el_hi + d * ek_gi) * invDet; - _out.rows[2][3] = -(a * fl_hj - b * el_hi + d * ej_fi) * invDet; - _out.rows[3][3] = +(a * fk_gj - b * ek_gi + c * ej_fi) * invDet; - - return true; - } - else - { - auto mat2mul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.xwxw()+_A.yxwz()*_B.zyzy(); - }; - auto mat2adjmul = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A.wwxx()*_B-_A.yyzz()*_B.zwxy(); - }; - auto mat2muladj = [](vectorSIMDf _A, vectorSIMDf _B) - { - return _A*_B.wxwx()-_A.yxwz()*_B.zyzy(); - }; - - vectorSIMDf A = _mm_movelh_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); - vectorSIMDf B = _mm_movehl_ps(rows[1].getAsRegister(), rows[0].getAsRegister()); - vectorSIMDf C = _mm_movelh_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); - vectorSIMDf D = _mm_movehl_ps(rows[3].getAsRegister(), rows[2].getAsRegister()); - - vectorSIMDf allDets = vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(2,0,2,0)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(3,1,3,1))) - - - vectorSIMDf(_mm_shuffle_ps(rows[0].getAsRegister(),rows[2].getAsRegister(),_MM_SHUFFLE(3,1,3,1)))* - vectorSIMDf(_mm_shuffle_ps(rows[1].getAsRegister(),rows[3].getAsRegister(),_MM_SHUFFLE(2,0,2,0))); - - auto detA = allDets.xxxx(); - auto detB = allDets.yyyy(); - auto detC = allDets.zzzz(); - auto detD = allDets.wwww(); - - // https://lxjk.github.io/2017/09/03/Fast-4x4-Matrix-Inverse-with-SSE-SIMD-Explained.html - auto D_C = mat2adjmul(D, C); - // A#B - auto A_B = mat2adjmul(A, B); - // X# = |D|A - B(D#C) - auto X_ = detD*A - mat2mul(B, D_C); - // W# = |A|D - C(A#B) - auto W_ = detA*D - mat2mul(C, A_B); - - // |M| = |A|*|D| + ... (continue later) - auto detM = detA*detD; - - // Y# = |B|C - D(A#B)# - auto Y_ = detB*C - mat2muladj(D, A_B); - // Z# = |C|B - A(D#C)# - auto Z_ = detC*B - mat2muladj(A, D_C); - - // |M| = |A|*|D| + |B|*|C| ... (continue later) - detM += detB*detC; - - // tr((A#B)(D#C)) - __m128 tr = (A_B*D_C.xzyw()).getAsRegister(); - tr = _mm_hadd_ps(tr, tr); - tr = _mm_hadd_ps(tr, tr); - // |M| = |A|*|D| + |B|*|C| - tr((A#B)(D#C) - detM -= tr; - - if (core::iszero(detM.x, FLT_MIN)) - return false; - - vectorSIMDf rDetM; - - // (1/|M|, -1/|M|, -1/|M|, 1/|M|) - if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_FAST_RECIPROCAL) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f)*core::reciprocal(detM); - else if constexpr (precision == E_MATRIX_INVERSE_PRECISION::EMIP_32BIT) - rDetM = vectorSIMDf(1.f, -1.f, -1.f, 1.f).preciseDivision(detM); - - X_ *= rDetM; - Y_ *= rDetM; - Z_ *= rDetM; - W_ *= rDetM; - - // apply adjugate and store, here we combine adjugate shuffle and store shuffle - _out.rows[0] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[1] = _mm_shuffle_ps(X_.getAsRegister(), Y_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - _out.rows[2] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(1, 3, 1, 3)); - _out.rows[3] = _mm_shuffle_ps(Z_.getAsRegister(), W_.getAsRegister(), _MM_SHUFFLE(0, 2, 0, 2)); - - return true; - } - } - - inline vectorSIMDf sub3x3TransformVect(const vectorSIMDf& _in) const; - - inline void transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const; - inline void transformVect(vectorSIMDf& _vector) const - { - transformVect(_vector, _vector); - } - - inline void translateVect(vectorSIMDf& _vect) const - { - _vect += getTranslation(); - } - - bool isBoxInFrustum(const aabbox3d& bbox); - - bool perspectiveTransformVect(core::vectorSIMDf& inOutVec) - { - transformVect(inOutVec); - const bool inFront = inOutVec[3] > 0.f; - inOutVec /= inOutVec.wwww(); - return inFront; - } - - core::vector2di fragCoordTransformVect(const core::vectorSIMDf& _in, const core::dimension2du& viewportDimensions) - { - core::vectorSIMDf pos(_in); - pos.w = 1.f; - if (perspectiveTransformVect(pos)) - core::vector2di(-0x80000000, -0x80000000); - - pos[0] *= 0.5f; - pos[1] *= 0.5f; - pos[0] += 0.5f; - pos[1] += 0.5f; - - return core::vector2di(pos[0] * float(viewportDimensions.Width), pos[1] * float(viewportDimensions.Height)); - } - - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar); - - static inline matrix4SIMD buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - static inline matrix4SIMD buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar); - - //! Access by row - inline const vectorSIMDf& operator[](size_t _rown) const { return rows[_rown]; } - //! Access by row - inline vectorSIMDf& operator[](size_t _rown) { return rows[_rown]; } - - private: - //! TODO: implement a dvec<2> - inline __m128d halfRowAsDouble(size_t _n, bool _firstHalf) const; - static inline __m128d concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf); -}; - -inline matrix4SIMD operator*(float _scalar, const matrix4SIMD& _mtx) -{ - return _mtx * _scalar; -} - -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByA(_a, _b); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - return matrix4SIMD::concatenateBFollowedByAPrecisely(_a, _b); -} -*/ - - -}} // nbl::core - -#endif diff --git a/include/matrix4SIMD_impl.h b/include/matrix4SIMD_impl.h deleted file mode 100644 index 02484e7a4c..0000000000 --- a/include/matrix4SIMD_impl.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ -#define __NBL_MATRIX4SIMD_IMPL_H_INCLUDED__ - -#include "matrix4SIMD.h" -#include "nbl/core/math/glslFunctions.tcc" -#include "aabbox3d.h" - -namespace nbl -{ -namespace core -{ - - -inline bool matrix4SIMD::operator!=(const matrix4SIMD& _other) const -{ - for (size_t i = 0u; i < VectorCount; ++i) - if ((rows[i] != _other.rows[i]).any()) - return true; - return false; -} - -inline matrix4SIMD& matrix4SIMD::operator+=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] += _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator-=(const matrix4SIMD& _other) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] -= _other.rows[i]; - return *this; -} - -inline matrix4SIMD& matrix4SIMD::operator*=(float _scalar) -{ - for (size_t i = 0u; i < VectorCount; ++i) - rows[i] *= _scalar; - return *this; -} - -inline bool matrix4SIMD::isIdentity(float _tolerance) const -{ - return core::equals(*this, matrix4SIMD(), core::ROUNDING_ERROR()); -} - -#ifdef __NBL_COMPILE_WITH_SSE3 -#define BROADCAST32(fpx) _MM_SHUFFLE(fpx, fpx, fpx, fpx) -#define BUILD_MASKF(_x_, _y_, _z_, _w_) _mm_setr_epi32(_x_*0xffffffff, _y_*0xffffffff, _z_*0xffffffff, _w_*0xffffffff) -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByA(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - auto calcRow = [](const __m128& _row, const matrix4SIMD& _mtx) - { - __m128 r0 = _mtx.rows[0].getAsRegister(); - __m128 r1 = _mtx.rows[1].getAsRegister(); - __m128 r2 = _mtx.rows[2].getAsRegister(); - __m128 r3 = _mtx.rows[3].getAsRegister(); - - __m128 res; - res = _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(0)), r0); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(1)), r1)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(2)), r2)); - res = _mm_add_ps(res, _mm_mul_ps(_mm_shuffle_ps(_row, _row, BROADCAST32(3)), r3)); - return res; - }; - - matrix4SIMD r; - for (size_t i = 0u; i < 4u; ++i) - r.rows[i] = calcRow(_a.rows[i].getAsRegister(), _b); - - return r; -} -inline matrix4SIMD matrix4SIMD::concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix4SIMD& _b) -{ - matrix4SIMD out; - - __m128i mask0011 = BUILD_MASKF(0, 0, 1, 1); - __m128 second; - - { - __m128d r00 = _a.halfRowAsDouble(0u, true); - __m128d r01 = _a.halfRowAsDouble(0u, false); - second = _mm_cvtpd_ps(concat64_helper(r00, r01, _b, false)); - out.rows[0] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r00, r01, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r10 = _a.halfRowAsDouble(1u, true); - __m128d r11 = _a.halfRowAsDouble(1u, false); - second = _mm_cvtpd_ps(concat64_helper(r10, r11, _b, false)); - out.rows[1] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r10, r11, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r20 = _a.halfRowAsDouble(2u, true); - __m128d r21 = _a.halfRowAsDouble(2u, false); - second = _mm_cvtpd_ps(concat64_helper(r20, r21, _b, false)); - out.rows[2] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r20, r21, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - { - __m128d r30 = _a.halfRowAsDouble(3u, true); - __m128d r31 = _a.halfRowAsDouble(3u, false); - second = _mm_cvtpd_ps(concat64_helper(r30, r31, _b, false)); - out.rows[3] = vectorSIMDf(_mm_cvtpd_ps(concat64_helper(r30, r31, _b, true))) | _mm_castps_si128((vectorSIMDf(_mm_movelh_ps(second, second)) & mask0011).getAsRegister()); - } - - return out; -} - -inline matrix4SIMD& matrix4SIMD::setScale(const core::vectorSIMDf& _scale) -{ - const __m128i mask0001 = BUILD_MASKF(0, 0, 0, 1); - - rows[0] = (_scale & BUILD_MASKF(1, 0, 0, 0)) | _mm_castps_si128((rows[0] & mask0001).getAsRegister()); - rows[1] = (_scale & BUILD_MASKF(0, 1, 0, 0)) | _mm_castps_si128((rows[1] & mask0001).getAsRegister()); - rows[2] = (_scale & BUILD_MASKF(0, 0, 1, 0)) | _mm_castps_si128((rows[2] & mask0001).getAsRegister()); - rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return *this; -} - -//! Returns last column of the matrix. -inline vectorSIMDf matrix4SIMD::getTranslation() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), rows[3].getAsRegister()); // (2z,3z,2w,3w) - __m128 col3 = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,3w) - - return col3; -} -//! Returns translation part of the matrix (w component is always 0). -inline vectorSIMDf matrix4SIMD::getTranslation3D() const -{ - __m128 tmp1 = _mm_unpackhi_ps(rows[0].getAsRegister(), rows[1].getAsRegister()); // (0z,1z,0w,1w) - __m128 tmp2 = _mm_unpackhi_ps(rows[2].getAsRegister(), _mm_setzero_ps()); // (2z,0,2w,0) - __m128 transl = _mm_movehl_ps(tmp1, tmp2);// (0w,1w,2w,0) - - return transl; -} - -inline vectorSIMDf matrix4SIMD::sub3x3TransformVect(const vectorSIMDf& _in) const -{ - matrix4SIMD cp{*this}; - vectorSIMDf out = _in & BUILD_MASKF(1, 1, 1, 0); - transformVect(out); - return out; -} - -inline void matrix4SIMD::transformVect(vectorSIMDf& _out, const vectorSIMDf& _in) const -{ - vectorSIMDf r[4]; - for (size_t i = 0u; i < VectorCount; ++i) - r[i] = rows[i] * _in; - - _out = _mm_hadd_ps( - _mm_hadd_ps(r[0].getAsRegister(), r[1].getAsRegister()), - _mm_hadd_ps(r[2].getAsRegister(), r[3].getAsRegister()) - ); -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, -1.f, 0.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) -{ - const float h = core::reciprocal(tanf(fieldOfViewRadians*0.5f)); - _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero - const float w = h / aspectRatio; - - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(w, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -h, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, zFar/(zFar-zNear), -zNear*zFar/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 1.f, 0.f); - - return m; -} - -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, -1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} -inline matrix4SIMD matrix4SIMD::buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) -{ - _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero - _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero - - matrix4SIMD m; - m.rows[0] = vectorSIMDf(2.f/widthOfViewVolume, 0.f, 0.f, 0.f); - m.rows[1] = vectorSIMDf(0.f, -2.f/heightOfViewVolume, 0.f, 0.f); - m.rows[2] = vectorSIMDf(0.f, 0.f, 1.f/(zFar-zNear), -zNear/(zFar-zNear)); - m.rows[3] = vectorSIMDf(0.f, 0.f, 0.f, 1.f); - - return m; -} - - - -inline __m128d matrix4SIMD::halfRowAsDouble(size_t _n, bool _firstHalf) const -{ - return _mm_cvtps_pd(_firstHalf ? rows[_n].xyxx().getAsRegister() : rows[_n].zwxx().getAsRegister()); -} -inline __m128d matrix4SIMD::concat64_helper(const __m128d& _a0, const __m128d& _a1, const matrix4SIMD& _mtx, bool _firstHalf) -{ - __m128d r0 = _mtx.halfRowAsDouble(0u, _firstHalf); - __m128d r1 = _mtx.halfRowAsDouble(1u, _firstHalf); - __m128d r2 = _mtx.halfRowAsDouble(2u, _firstHalf); - __m128d r3 = _mtx.halfRowAsDouble(3u, _firstHalf); - - //const __m128d mask01 = _mm_castsi128_pd(_mm_setr_epi32(0, 0, 0xffffffff, 0xffffffff)); - - __m128d res; - res = _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 0), r0); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a0, _a0, 3/*0b11*/), r1)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 0), r2)); - res = _mm_add_pd(res, _mm_mul_pd(_mm_shuffle_pd(_a1, _a1, 3/*0b11*/), r3)); - return res; -} - -#undef BUILD_MASKF -#undef BROADCAST32 -#else -#error "no implementation" -#endif - -inline bool matrix4SIMD::isBoxInFrustum(const aabbox3d& bbox) -{ - vectorSIMDf MinEdge, MaxEdge; - MinEdge.set(bbox.MinEdge); - MaxEdge.set(bbox.MaxEdge); - MinEdge.w = 1.f; - MaxEdge.w = 1.f; - - - auto getClosestDP = [&MinEdge,&MaxEdge](const vectorSIMDf& toDot) -> float - { - return dot(mix(MaxEdge,MinEdge,toDot struct Instance final { @@ -221,18 +221,18 @@ class ITopLevelAccelerationStructure : public IDescriptor, public IAccelerationS template struct StaticInstance final { - core::matrix3x4SIMD transform = core::matrix3x4SIMD(); + hlsl::float32_t3x4 transform = hlsl::float32_t3x4(); Instance base = {}; }; template struct MatrixMotionInstance final { - core::matrix3x4SIMD transform[2] = {core::matrix3x4SIMD(),core::matrix3x4SIMD()}; + hlsl::float32_t3x4 transform[2] = {hlsl::float32_t3x4(),hlsl::float32_t3x4()}; Instance base = {}; }; struct SRT { - // TODO: some operators to convert back and forth from `core::matrix3x4SIMD + // TODO: some operators to convert back and forth from `hlsl::float32_t3x4 float sx; float a; diff --git a/include/nbl/asset/IAnimationLibrary.h b/include/nbl/asset/IAnimationLibrary.h index 9665349103..d650cb25d9 100644 --- a/include/nbl/asset/IAnimationLibrary.h +++ b/include/nbl/asset/IAnimationLibrary.h @@ -34,7 +34,7 @@ class IAnimationLibrary : public virtual core::IReferenceCounted translation[2] = translation[1] = translation[0] = 0.f; quat = core::vectorSIMDu32(128u,128u,128u,255u); // should be (0,0,0,1) encoded } - Keyframe(const core::vectorSIMDf& _scale, const core::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) + Keyframe(const core::vectorSIMDf& _scale, const hlsl::quaternion& _quat, const CQuantQuaternionCache* quantCache, const core::vectorSIMDf& _translation) { std::copy(_translation.pointer,_translation.pointer+3,translation); quat = quantCache->template quantize(_quat); @@ -42,13 +42,13 @@ class IAnimationLibrary : public virtual core::IReferenceCounted //scale = ; } - inline core::quaternion getRotation() const + inline hlsl::quaternion getRotation() const { const void* _pix[4] = {&quat,nullptr,nullptr,nullptr}; double out[4]; decodePixels(_pix,out,0u,0u); auto q = core::normalize(core::vectorSIMDf(out[0],out[1],out[2],out[3])); - return reinterpret_cast(&q)[0]; + return reinterpret_cast*>(&q)[0]; } inline core::vectorSIMDf getScale() const diff --git a/include/nbl/asset/ICPUSkeleton.h b/include/nbl/asset/ICPUSkeleton.h index 1049798268..7418e46ce3 100644 --- a/include/nbl/asset/ICPUSkeleton.h +++ b/include/nbl/asset/ICPUSkeleton.h @@ -42,15 +42,15 @@ class ICPUSkeleton final : public ISkeleton, public IAsset } //! - inline const core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) const + inline const hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) const { const uint8_t* ptr = reinterpret_cast(m_defaultTransforms.buffer->getPointer()); - return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; + return reinterpret_cast(ptr+m_defaultTransforms.offset)[jointID]; } - inline core::matrix3x4SIMD& getDefaultTransformMatrix(base_t::joint_id_t jointID) + inline hlsl::float32_t3x4& getDefaultTransformMatrix(base_t::joint_id_t jointID) { assert(isMutable()); - return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); + return const_cast(const_cast(this)->getDefaultTransformMatrix(jointID)); } //! diff --git a/include/nbl/asset/ISkeleton.h b/include/nbl/asset/ISkeleton.h index 7960ca4eef..03ba3af4ea 100644 --- a/include/nbl/asset/ISkeleton.h +++ b/include/nbl/asset/ISkeleton.h @@ -62,7 +62,7 @@ class ISkeleton : public virtual core::IReferenceCounted return; assert(m_parentJointIDs.buffer->getSize()>=m_parentJointIDs.offset+sizeof(joint_id_t)*m_jointCount); - assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(core::matrix3x4SIMD)*m_jointCount); + assert(m_defaultTransforms.buffer->getSize()>=m_defaultTransforms.offset+sizeof(hlsl::float32_t3x4)*m_jointCount); } virtual ~ISkeleton() { diff --git a/include/nbl/asset/utils/CQuantQuaternionCache.h b/include/nbl/asset/utils/CQuantQuaternionCache.h index 8e46dffb0a..a51549d24d 100644 --- a/include/nbl/asset/utils/CQuantQuaternionCache.h +++ b/include/nbl/asset/utils/CQuantQuaternionCache.h @@ -60,7 +60,7 @@ class CQuantQuaternionCache : public CDirQuantCacheBase - value_type_t quantize(const core::quaternion& quat) + value_type_t quantize(const hlsl::quaternion& quat) { return Base::quantize<4u,CacheFormat>(reinterpret_cast(quat)); } diff --git a/include/nbl/builtin/hlsl/camera/view_matrix.hlsl b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl new file mode 100644 index 0000000000..7752d9b6eb --- /dev/null +++ b/include/nbl/builtin/hlsl/camera/view_matrix.hlsl @@ -0,0 +1,51 @@ +#ifndef _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CAMERA_VIEW_MATRIX_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// /Arek: glm:: for normalize till dot product is fixed (ambiguity with glm namespace + linker issues) +template +inline matrix buildCameraLookAtMatrixLH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(target - position); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +template +inline matrix buildCameraLookAtMatrixRH( + const vector& position, + const vector& target, + const vector& upVector) +{ + const vector zaxis = hlsl::normalize(position - target); + const vector xaxis = hlsl::normalize(hlsl::cross(upVector, zaxis)); + const vector yaxis = hlsl::cross(zaxis, xaxis); + + matrix r; + r[0] = vector(xaxis, -hlsl::dot(xaxis, position)); + r[1] = vector(yaxis, -hlsl::dot(yaxis, position)); + r[2] = vector(zaxis, -hlsl::dot(zaxis, position)); + + return r; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl new file mode 100644 index 0000000000..36bcd944c6 --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/unroll.hlsl @@ -0,0 +1,12 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_UNROLL_INCLUDED_ + +#ifdef __HLSL_VERSION +#define NBL_UNROLL [unroll] +#define NBL_UNROLL_LIMITED(LIMIT) [unroll(LIMIT)] +#else +#define NBL_UNROLL // can't be bothered / TODO +#define NBL_UNROLL_LIMITED(LIMIT) +#endif + +#endif diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl new file mode 100644 index 0000000000..9e42747b28 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion.hlsl @@ -0,0 +1,104 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +//! Quaternion class for representing rotations. +/** It provides cheap combinations and avoids gimbal locks. +Also useful for interpolations. */ + +template +struct quaternion +{ + // i*data[0] + j*data[1] + k*data[2] + data[3] + using vec_t = vector; + vector data; + + //! creates identity quaternion + static inline quaternion create() + { + quaternion q; + q.data = vector(0.0f, 0.0f, 0.0f, 1.0f); + + return q; + } + + static inline quaternion create(float_t x, float_t y, float_t z, float_t w) + { + quaternion q; + q.data = vector(x, y, z, w); + + return q; + } + + static inline quaternion create(NBL_CONST_REF_ARG(quaternion) other) + { + return other; + } + + static inline quaternion create(float_t pitch, float_t yaw, float_t roll) + { + float angle; + + angle = roll * 0.5f; + const float sr = sinf(angle); + const float cr = cosf(angle); + + angle = pitch * 0.5f; + const float sp = sinf(angle); + const float cp = cos(angle); + + angle = yaw * 0.5f; + const float sy = sinf(angle); + const float cy = cosf(angle); + + const float cpcy = cp * cy; + const float spcy = sp * cy; + const float cpsy = cp * sy; + const float spsy = sp * sy; + + quaternion output; + output.data[3] = cr * cp * cy + sr * sp * sy; // w + output.data[0] = cr * sp * cy + sr * cp * sy; // x + output.data[1] = cr * cp * sy - sr * sp * cy; // y + output.data[2] = sr * cp * cy - cr * sp * sy; // z + + return output; + } + + // TODO: + //explicit quaternion(NBL_CONST_REF_ARG(float32_t3x4) m) {} + + inline quaternion operator*(float_t scalar) + { + quaternion output; + output.data = data * scalar; + return output; + } + + inline quaternion operator*(NBL_CONST_REF_ARG(quaternion) other) + { + return quaternion::create( + data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z, + data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y, + data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x, + data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w + ); + } +}; + +} // end namespace core +} // nbl + +#endif + diff --git a/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl new file mode 100644 index 0000000000..d00d9ce2c4 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/quaternion/quaternion_impl.hlsl @@ -0,0 +1,25 @@ +// Copyright (C) 2019 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine" and was originally part of the "Irrlicht Engine" +// For conditions of distribution and use, see copyright notice in nabla.h +// See the original file in irrlicht source for authors + +#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_QUATERNION_IMPL_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +namespace quaternion_impl +{ + +} + +} // end namespace core +} // nbl + +#endif + diff --git a/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl new file mode 100644 index 0000000000..d1a628ccc0 --- /dev/null +++ b/include/nbl/builtin/hlsl/matrix_utils/transformation_matrix_utils.hlsl @@ -0,0 +1,203 @@ +#ifndef _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATRIX_UTILS_TRANSFORMATION_MATRIX_UTILS_INCLUDED_ +#include +// TODO: remove this header when deleting vectorSIMDf.hlsl +#ifndef __HLSL_VERSION +#include +#include "vectorSIMD.h" +#endif +#include +#include "nbl/builtin/hlsl/cpp_compat/unroll.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +template +MatT diagonal(float diagonal = 1) +{ + MatT output; + + NBL_UNROLL_LIMITED(4) + for (uint32_t i = 0; i < matrix_traits::RowCount; ++i) + NBL_UNROLL_LIMITED(4) + for (uint32_t j = 0; j < matrix_traits::ColumnCount; ++j) + output[i][j] = 0; + + NBL_UNROLL_LIMITED(4) + for (uint32_t diag = 0; diag < matrix_traits::RowCount; ++diag) + output[diag][diag] = diagonal; + + return output; +} + +template +MatT identity() +{ + // TODO + // static_assert(MatT::Square); + return diagonal(1); +} + +// TODO: this is temporary function, delete when removing vectorSIMD +#ifndef __HLSL_VERSION +template +inline core::vectorSIMDf transformVector(NBL_CONST_REF_ARG(matrix) mat, NBL_CONST_REF_ARG(core::vectorSIMDf) vec) +{ + core::vectorSIMDf output; + float32_t4 tmp; + for (int i = 0; i < 4; ++i) // rather do that that reinterpret_cast for safety + tmp[i] = output[i]; + + for (int i = 0; i < 4; ++i) + output[i] = hlsl::dot(mat[i], tmp); + + return output; +} +#endif +template +inline matrix getMatrix3x4As4x4(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < 3; ++i) + output[i] = mat[i]; + output[3] = float32_t4(0.0f, 0.0f, 0.0f, 1.0f); + + return output; +} + +template +inline matrix getSub3x3(NBL_CONST_REF_ARG(matrix) mat) +{ + return matrix(mat); +} + +template +inline matrix getAs64BitPrecisionMatrix(NBL_CONST_REF_ARG(matrix) mat) +{ + matrix output; + for (int i = 0; i < N; ++i) + output[i] = mat[i]; + + return output; +} + +namespace transformation_matrix_utils_impl +{ + // This function calculates determinant using the scalar triple product. + template + inline T determinant_helper(NBL_CONST_REF_ARG(matrix) mat, NBL_REF_ARG(vector) r1crossr2) + { + r1crossr2 = hlsl::cross(mat[1], mat[2]); + return hlsl::dot(mat[0], r1crossr2); + } +} + +//! returs adjugate of the cofactor (sub 3x3) matrix +template +inline matrix getSub3x3TransposeCofactors(NBL_CONST_REF_ARG(matrix) mat) +{ + static_assert(N >= 3 && M >= 3); + + matrix output; + vector row0 = vector(mat[0]); + vector row1 = vector(mat[1]); + vector row2 = vector(mat[2]); + output[0] = hlsl::cross(row1, row2); + output[1] = hlsl::cross(row2, row0); + output[2] = hlsl::cross(row0, row1); + + output[0] = hlsl::cross(row0, row1); + + return output; +} + +template +inline bool getSub3x3InverseTranspose(NBL_CONST_REF_ARG(matrix) matIn, NBL_CONST_REF_ARG(matrix) matOut) +{ + matrix matIn3x3 = getSub3x3(matIn); + vector r1crossr2; + T d = transformation_matrix_utils_impl::determinant_helper(matIn3x3, r1crossr2); + if (abs(d) <= FLT_MIN) + return false; + auto rcp = T(1.0f)/d; + + // matrix of cofactors * 1/det + matOut = getSub3x3TransposeCofactors(matIn3x3); + matOut[0] *= rcp; + matOut[1] *= rcp; + matOut[2] *= rcp; + + return true; +} + +// TODO: use portable_float when merged +//! multiplies matrices a and b, 3x4 matrices are treated as 4x4 matrices with 4th row set to (0, 0, 0 ,1) +template +inline matrix concatenateBFollowedByA(NBL_CONST_REF_ARG(matrix) a, NBL_CONST_REF_ARG(const matrix) b) +{ + // TODO + // static_assert(N == 3 || N == 4); + + const matrix a4x4 = getMatrix3x4As4x4(a); + const matrix b4x4 = getMatrix3x4As4x4(b); + return matrix(mul(a4x4, b4x4)); +} + +template +inline void setScale(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) scale) +{ + // TODO + // static_assert(N == 3 || N == 4); + + outMat[0][0] = scale[0]; + outMat[1][1] = scale[1]; + outMat[2][2] = scale[2]; +} + +//! Replaces curent rocation and scale by rotation represented by quaternion `quat`, leaves 4th row and 4th colum unchanged +template +inline void setRotation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(nbl::hlsl::quaternion) quat) +{ + // TODO + //static_assert(N == 3 || N == 4); + + outMat[0] = vector( + 1 - 2 * (quat.data.y * quat.data.y + quat.data.z * quat.data.z), + 2 * (quat.data.x * quat.data.y - quat.data.z * quat.data.w), + 2 * (quat.data.x * quat.data.z + quat.data.y * quat.data.w), + + outMat[0][3] + ); + + outMat[1] = vector( + 2 * (quat.data.x * quat.data.y + quat.data.z * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.z * quat.data.z), + 2 * (quat.data.y * quat.data.z - quat.data.x * quat.data.w), + outMat[1][3] + ); + + outMat[2] = vector( + 2 * (quat.data.x * quat.data.z - quat.data.y * quat.data.w), + 2 * (quat.data.y * quat.data.z + quat.data.x * quat.data.w), + 1 - 2 * (quat.data.x * quat.data.x + quat.data.y * quat.data.y), + outMat[2][3] + ); +} + +template +inline void setTranslation(NBL_REF_ARG(matrix) outMat, NBL_CONST_REF_ARG(vector) translation) +{ + // TODO + // static_assert(N == 3 || N == 4); + + outMat[0].w = translation.x; + outMat[1].w = translation.y; + outMat[2].w = translation.z; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/projection/projection.hlsl b/include/nbl/builtin/hlsl/projection/projection.hlsl new file mode 100644 index 0000000000..caff793083 --- /dev/null +++ b/include/nbl/builtin/hlsl/projection/projection.hlsl @@ -0,0 +1,81 @@ +#ifndef _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PROJECTION_PROJECTION_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ +// TODO: use glm instead for c++ +template +inline matrix buildProjectionMatrixPerspectiveFovRH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, -1.f, 0.f); + + return m; +} +template +inline matrix buildProjectionMatrixPerspectiveFovLH(float fieldOfViewRadians, float aspectRatio, float zNear, float zFar) +{ + const float h = core::reciprocal(tanf(fieldOfViewRadians * 0.5f)); + _NBL_DEBUG_BREAK_IF(aspectRatio == 0.f); //division by zero + const float w = h / aspectRatio; + + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(w, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -h, 0.f, 0.f); + m[2] = vector(0.f, 0.f, zFar / (zFar - zNear), -zNear * zFar / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 1.f, 0.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoRH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, -1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +template +inline matrix buildProjectionMatrixOrthoLH(float widthOfViewVolume, float heightOfViewVolume, float zNear, float zFar) +{ + _NBL_DEBUG_BREAK_IF(widthOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(heightOfViewVolume == 0.f); //division by zero + _NBL_DEBUG_BREAK_IF(zNear == zFar); //division by zero + + matrix m; + m[0] = vector(2.f / widthOfViewVolume, 0.f, 0.f, 0.f); + m[1] = vector(0.f, -2.f / heightOfViewVolume, 0.f, 0.f); + m[2] = vector(0.f, 0.f, 1.f / (zFar - zNear), -zNear / (zFar - zNear)); + m[3] = vector(0.f, 0.f, 0.f, 1.f); + + return m; +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl new file mode 100644 index 0000000000..e1fa9dd3a0 --- /dev/null +++ b/include/nbl/builtin/hlsl/vector_utils/vector_utils.hlsl @@ -0,0 +1,21 @@ +#ifndef _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ +#define _NBL_BUILTIN_HLSL_VECTOR_UTILS_VECTOR_UTILS_INCLUDED_ + +#include + +namespace nbl +{ +namespace hlsl +{ + +// TODO: why cant I NBL_CONST_REF_ARG(vector) +template +inline T lengthsquared(vector vec) +{ + return dot(vec, vec); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/core/declarations.h b/include/nbl/core/declarations.h index 9aa708a793..466ea988aa 100644 --- a/include/nbl/core/declarations.h +++ b/include/nbl/core/declarations.h @@ -50,7 +50,6 @@ #include "nbl/core/math/colorutil.h" #include "nbl/core/math/rational.h" #include "nbl/core/math/plane3dSIMD.h" -#include "nbl/core/math/matrixutil.h" // memory #include "nbl/core/memory/memory.h" #include "nbl/core/memory/new_delete.h" diff --git a/include/nbl/core/definitions.h b/include/nbl/core/definitions.h index c08af6ad74..5913c2c8f2 100644 --- a/include/nbl/core/definitions.h +++ b/include/nbl/core/definitions.h @@ -15,8 +15,4 @@ #include "nbl/core/math/floatutil.tcc" #include "nbl/core/math/glslFunctions.tcc" -// implementations [deprecated] -#include "matrix3x4SIMD_impl.h" -#include "matrix4SIMD_impl.h" - #endif \ No newline at end of file diff --git a/include/nbl/core/math/floatutil.tcc b/include/nbl/core/math/floatutil.tcc index 71c8bd2da7..f20db5dec2 100644 --- a/include/nbl/core/math/floatutil.tcc +++ b/include/nbl/core/math/floatutil.tcc @@ -5,9 +5,8 @@ #ifndef __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ #define __NBL_CORE_FLOAT_UTIL_TCC_INCLUDED__ - +#include "vectorSIMD.h" #include "nbl/core/math/floatutil.h" -#include "matrix4SIMD.h" namespace nbl { @@ -29,16 +28,6 @@ NBL_FORCE_INLINE vectorSIMDf ROUNDING_ERROR() { return vectorSIMDf(ROUNDING_ERROR()); } -template<> -NBL_FORCE_INLINE matrix3x4SIMD ROUNDING_ERROR() -{ - return matrix3x4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} -template<> -NBL_FORCE_INLINE matrix4SIMD ROUNDING_ERROR() -{ - return matrix4SIMD(ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR(),ROUNDING_ERROR()); -} template NBL_FORCE_INLINE T ROUNDING_ERROR() { diff --git a/include/nbl/core/math/glslFunctions.tcc b/include/nbl/core/math/glslFunctions.tcc index 205585965b..b8326b41d1 100644 --- a/include/nbl/core/math/glslFunctions.tcc +++ b/include/nbl/core/math/glslFunctions.tcc @@ -8,7 +8,6 @@ #include "nbl/core/declarations.h" #include "nbl/core/math/floatutil.tcc" -#include "matrix4SIMD.h" #include #include @@ -280,21 +279,6 @@ NBL_FORCE_INLINE vectorSIMDf cross(const vectorSIMDf& a, const vect #endif } -template<> -NBL_FORCE_INLINE matrix4SIMD transpose(const matrix4SIMD& m) -{ - core::matrix4SIMD retval; - __m128 a0 = m.rows[0].getAsRegister(), a1 = m.rows[1].getAsRegister(), a2 = m.rows[2].getAsRegister(), a3 = m.rows[3].getAsRegister(); - _MM_TRANSPOSE4_PS(a0, a1, a2, a3); - retval.rows[0] = a0; - retval.rows[1] = a1; - retval.rows[2] = a2; - retval.rows[3] = a3; - return retval; -} - - - template<> NBL_FORCE_INLINE bool equals(const vectorSIMDf& a, const vectorSIMDf& b, const vectorSIMDf& tolerance) { @@ -307,22 +291,6 @@ NBL_FORCE_INLINE bool equals(const core::vector3df& a, const core::vector3df& b, auto la = a-tolerance; return ha.X>=b.X&&ha.Y>=b.Y&&ha.Z>=b.Z && la.X<=b.X&&la.Y<=b.Y&&la.Z<=b.Z; } -template<> -NBL_FORCE_INLINE bool equals(const matrix4SIMD& a, const matrix4SIMD& b, const matrix4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance.rows[i])) - return false; - return true; -} -template<> -NBL_FORCE_INLINE bool equals(const matrix3x4SIMD& a, const matrix3x4SIMD& b, const matrix3x4SIMD& tolerance) -{ - for (size_t i = 0u; i(a.rows[i], b.rows[i], tolerance[i])) - return false; - return true; -} template NBL_FORCE_INLINE bool equals(const T& a, const T& b, const T& tolerance) { diff --git a/include/nbl/core/math/matrixutil.h b/include/nbl/core/math/matrixutil.h deleted file mode 100644 index afe7955c9b..0000000000 --- a/include/nbl/core/math/matrixutil.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#ifndef _NBL_MATRIX_UTIL_H_INCLUDED_ -#define _NBL_MATRIX_UTIL_H_INCLUDED_ - -#include "matrix4SIMD.h" -#include "matrix3x4SIMD.h" - -namespace nbl::core -{ - - -//! TODO: OPTIMIZE THIS, DON'T PROMOTE THE MATRIX IF DON'T HAVE TO -inline matrix4SIMD concatenateBFollowedByA(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByA(_a, matrix4SIMD(_b)); -} -/* -inline matrix4SIMD concatenateBFollowedByAPrecisely(const matrix4SIMD& _a, const matrix3x4SIMD& _b) -{ - return concatenateBFollowedByAPrecisely(_a, matrix4SIMD(_b)); -} -*/ - -} - -#endif diff --git a/include/nbl/core/math/plane3dSIMD.h b/include/nbl/core/math/plane3dSIMD.h index 891ed1300c..23099f0d61 100644 --- a/include/nbl/core/math/plane3dSIMD.h +++ b/include/nbl/core/math/plane3dSIMD.h @@ -3,11 +3,12 @@ // For conditions of distribution and use, see copyright notice in nabla.h // See the original file in irrlicht source for authors +#include "vectorSIMD.h" +#include + #ifndef __NBL_CORE_PLANE_3D_H_INCLUDED__ #define __NBL_CORE_PLANE_3D_H_INCLUDED__ -#include "matrix3x4SIMD.h" - namespace nbl { namespace core @@ -99,14 +100,19 @@ class plane3dSIMDf : private vectorSIMDf } //! - static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const matrix3x4SIMD& _mat) + static inline plane3dSIMDf transform(const plane3dSIMDf& _in, const hlsl::float32_t3x4& _mat) { - matrix3x4SIMD inv; - _mat.getInverse(inv); + hlsl::float32_t4x4 inv = hlsl::getMatrix3x4As4x4(_mat); + hlsl::inverse(inv); vectorSIMDf normal(_in.getNormal()); // transform by inverse transpose - return plane3dSIMDf(inv.rows[0]*normal.xxxx()+inv.rows[1]*normal.yyyy()+inv.rows[2]*normal.zzzz()+(normal.wwww()&BUILD_MASKF(0,0,0,1))); + hlsl::float32_t4 planeEq = inv[0] * hlsl::float32_t4(normal.x) + inv[1] * hlsl::float32_t4(normal.y) + inv[2] * hlsl::float32_t4(normal.z) + (hlsl::float32_t4(0, 0, 0, normal.w)); + vectorSIMDf planeEqSIMD; + for (int i = 0; i < 4; ++i) + planeEqSIMD[i] = planeEq[i]; + + return plane3dSIMDf(planeEqSIMD); #undef BUILD_MASKF } diff --git a/include/nbl/ext/Bullet/BulletUtility.h b/include/nbl/ext/Bullet/BulletUtility.h index 507adbceda..450c20c50d 100644 --- a/include/nbl/ext/Bullet/BulletUtility.h +++ b/include/nbl/ext/Bullet/BulletUtility.h @@ -64,8 +64,8 @@ namespace Bullet3 return convert(vec); } - inline core::matrix3x4SIMD convertbtTransform(const btTransform &trans) { - core::matrix3x4SIMD mat; + inline hlsl::float32_t3x4 convertbtTransform(const btTransform &trans) { + hlsl::float32_t3x4 mat; for (uint32_t i = 0; i < 3u; ++i) { mat.rows[i] = frombtVec3(trans.getBasis().getRow(i)); @@ -75,7 +75,7 @@ namespace Bullet3 return mat; } - inline btTransform convertMatrixSIMD(const core::matrix3x4SIMD &mat) { + inline btTransform convertMatrixSIMD(const hlsl::float32_t3x4 &mat) { btTransform transform; //Calling makeSafe3D on rows erases translation so save it diff --git a/include/nbl/ext/Bullet/CPhysicsWorld.h b/include/nbl/ext/Bullet/CPhysicsWorld.h index d6529a2565..cfaf70d6d6 100644 --- a/include/nbl/ext/Bullet/CPhysicsWorld.h +++ b/include/nbl/ext/Bullet/CPhysicsWorld.h @@ -24,7 +24,7 @@ class CPhysicsWorld : public core::IReferenceCounted struct RigidBodyData { btCollisionShape *shape; - core::matrix3x4SIMD trans; + hlsl::float32_t3x4 trans; core::vectorSIMDf inertia; float mass; }; diff --git a/include/nbl/ext/DebugDraw/CDraw3DLine.h b/include/nbl/ext/DebugDraw/CDraw3DLine.h index 68cd64e9c1..86b874f9d1 100644 --- a/include/nbl/ext/DebugDraw/CDraw3DLine.h +++ b/include/nbl/ext/DebugDraw/CDraw3DLine.h @@ -33,7 +33,7 @@ class CDraw3DLine : public core::IReferenceCounted } - void setData(const core::matrix4SIMD& viewProjMat, const core::vector>& linesData) + void setData(const hlsl::float32_t4x4& viewProjMat, const core::vector>& linesData) { m_viewProj = viewProjMat; m_lines = linesData; @@ -45,7 +45,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines.clear(); } - void setLine(const core::matrix4SIMD& viewProjMat, + void setLine(const hlsl::float32_t4x4& viewProjMat, float fromX, float fromY, float fromZ, float toX, float toY, float toZ, float r, float g, float b, float a @@ -54,7 +54,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines = core::vector>{ std::pair(S3DLineVertex{{ fromX, fromY, fromZ }, { r, g, b, a }}, S3DLineVertex{{ toX, toY, toZ }, { r, g, b, a }}) }; } - void addLine(const core::matrix4SIMD& viewProjMat, + void addLine(const hlsl::float32_t4x4& viewProjMat, float fromX, float fromY, float fromZ, float toX, float toY, float toZ, float r, float g, float b, float a @@ -73,7 +73,7 @@ class CDraw3DLine : public core::IReferenceCounted m_lines.insert(m_lines.end(), linesData.begin(), linesData.end()); } - void setViewProjMatrix(const core::matrix4SIMD& viewProjMat) + void setViewProjMatrix(const hlsl::float32_t4x4& viewProjMat) { m_viewProj = viewProjMat; } @@ -91,7 +91,7 @@ class CDraw3DLine : public core::IReferenceCounted */ void recordToCommandBuffer(video::IGPUCommandBuffer* cmdBuffer, video::IGPUGraphicsPipeline* graphics_pipeline); - inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const core::matrix3x4SIMD& tform=core::matrix3x4SIMD()) + inline void addBox(const core::aabbox3df& box, float r, float g, float b, float a, const hlsl::float32_t3x4& tform=hlsl::float32_t3x4()) { auto addLine = [&](auto s, auto e) -> void { @@ -128,7 +128,7 @@ class CDraw3DLine : public core::IReferenceCounted core::smart_refctd_ptr m_device; core::smart_refctd_ptr m_linesBuffer = nullptr; core::smart_refctd_ptr m_rpindependent_pipeline; - core::matrix4SIMD m_viewProj; + hlsl::float32_t4x4 m_viewProj; core::vector> m_lines; const uint32_t alignments[1] = { sizeof(S3DLineVertex) }; }; diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h index 678adf59a9..440a1ca463 100644 --- a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h +++ b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h @@ -56,8 +56,8 @@ class EnvmapImportanceSampling float x,y,z; }; #define vec4 core::vectorSIMDf - #define mat4 core::matrix4SIMD - #define mat4x3 core::matrix3x4SIMD + #define mat4 hlsl::float32_t4x4 + #define mat4x3 hlsl::float32_t3x4 #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/structs.glsl" #undef uint #undef vec4 diff --git a/include/nbl/ext/MitsubaLoader/CElementShape.h b/include/nbl/ext/MitsubaLoader/CElementShape.h index 205023afea..c1725963b2 100644 --- a/include/nbl/ext/MitsubaLoader/CElementShape.h +++ b/include/nbl/ext/MitsubaLoader/CElementShape.h @@ -225,7 +225,7 @@ class CElementShape : public IElement std::string getLogName() const override { return "shape"; } - inline core::matrix3x4SIMD getAbsoluteTransform() const + inline hlsl::float32_t3x4 getAbsoluteTransform() const { auto local = transform.matrix.extractSub3x4(); // TODO restore at some point (and make it actually work??) diff --git a/include/nbl/ext/MitsubaLoader/CElementTransform.h b/include/nbl/ext/MitsubaLoader/CElementTransform.h index d518f69e6c..88864f7365 100644 --- a/include/nbl/ext/MitsubaLoader/CElementTransform.h +++ b/include/nbl/ext/MitsubaLoader/CElementTransform.h @@ -35,7 +35,7 @@ class CElementTransform : public IElement } */ - core::matrix4SIMD matrix; + hlsl::float32_t4x4 matrix; }; } diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h index e61ab3fa87..fd28d881db 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaLoader.h @@ -28,7 +28,7 @@ class CMitsubaMaterialCompilerFrontend; //#include "nbl/builtin/glsl/ext/MitsubaLoader/instance_data_struct.glsl" #define uint uint32_t #define uvec2 uint64_t -#define mat4x3 nbl::core::matrix3x4SIMD +#define mat4x3 hlsl::float32_t3x4 #define nbl_glsl_MC_material_data_t asset::material_compiler::material_data_t struct nbl_glsl_ext_Mitsuba_Loader_instance_data_t { @@ -71,13 +71,13 @@ class CMitsubaLoader : public asset::IRenderpassIndependentPipelineLoader // core::vector getMesh(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape); - core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const core::matrix3x4SIMD& relTform); - SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const core::matrix3x4SIMD& relTform); + core::vector loadShapeGroup(SContext& ctx, uint32_t hierarchyLevel, const CElementShape::ShapeGroup* shapegroup, const hlsl::float32_t3x4& relTform); + SContext::shape_ass_type loadBasicShape(SContext& ctx, uint32_t hierarchyLevel, CElementShape* shape, const hlsl::float32_t3x4& relTform); void cacheTexture(SContext& ctx, uint32_t hierarchyLevel, const CElementTexture* texture, const CMitsubaMaterialCompilerFrontend::E_IMAGE_VIEW_SEMANTIC semantic); void cacheEmissionProfile(SContext& ctx, const CElementEmissionProfile* profile); - SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, core::matrix4SIMD tform); + SContext::bsdf_type getBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf, const CElementEmitter* emitter, hlsl::float32_t4x4 tform); SContext::bsdf_type genBSDFtreeTraversal(SContext& ctx, const CElementBSDF* bsdf); template diff --git a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h index 42bad88655..8aaf9083fd 100644 --- a/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h +++ b/include/nbl/ext/MitsubaLoader/CMitsubaMaterialCompilerFrontend.h @@ -43,7 +43,7 @@ class CMitsubaMaterialCompilerFrontend explicit CMitsubaMaterialCompilerFrontend(const SContext* _ctx) : m_loaderContext(_ctx) {} front_and_back_t compileToIRTree(asset::material_compiler::IR* ir, const CElementBSDF* _bsdf); - EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, core::matrix4SIMD transform); + EmitterNode* createEmitterNode(asset::material_compiler::IR* ir, const CElementEmitter* _emitter, hlsl::float32_t4x4 transform); private: using tex_ass_type = std::tuple,core::smart_refctd_ptr,float>; diff --git a/include/nbl/ext/MitsubaLoader/PropertyElement.h b/include/nbl/ext/MitsubaLoader/PropertyElement.h index ac257bd4b3..ce2acd967a 100644 --- a/include/nbl/ext/MitsubaLoader/PropertyElement.h +++ b/include/nbl/ext/MitsubaLoader/PropertyElement.h @@ -6,7 +6,6 @@ #define __PROPERTY_ELEMENT_H_INCLUDED__ #include "nbl/core/declarations.h" -#include "matrix4SIMD.h" #include namespace nbl @@ -202,7 +201,7 @@ struct SPropertyElementData bool bvalue; const char* svalue; core::vectorSIMDf vvalue; // rgb, srgb, vector, point - core::matrix4SIMD mvalue; // matrix, translate, rotate, scale, lookat + hlsl::float32_t4x4 mvalue; // matrix, translate, rotate, scale, lookat }; }; @@ -302,15 +301,15 @@ template<> struct SPropertyElementData::get_typename struct SPropertyElementData::get_typename { using type = void; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename -{ using type = core::matrix4SIMD; }; +{ using type = hlsl::float32_t4x4; }; template<> struct SPropertyElementData::get_typename { using type = void; }; @@ -321,7 +320,7 @@ class CPropertyElementManager static std::pair createPropertyData(const char* _el, const char** _atts); static bool retrieveBooleanValue(const std::string& _data, bool& success); - static core::matrix4SIMD retrieveMatrix(const std::string& _data, bool& success); + static hlsl::float32_t4x4 retrieveMatrix(const std::string& _data, bool& success); static core::vectorSIMDf retrieveVector(const std::string& _data, bool& success); static core::vectorSIMDf retrieveHex(const std::string& _data, bool& success); diff --git a/include/nbl/ext/MitsubaLoader/SContext.h b/include/nbl/ext/MitsubaLoader/SContext.h index 687f97054d..9777edf6f0 100644 --- a/include/nbl/ext/MitsubaLoader/SContext.h +++ b/include/nbl/ext/MitsubaLoader/SContext.h @@ -193,7 +193,7 @@ struct SContext struct SInstanceData { - SInstanceData(core::matrix3x4SIMD _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : + SInstanceData(hlsl::float32_t3x4 _tform, SContext::bsdf_type _bsdf, const std::string& _id, const CElementEmitter& _emitterFront, const CElementEmitter& _emitterBack) : tform(_tform), bsdf(_bsdf), #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) bsdf_id(_id), @@ -201,7 +201,7 @@ struct SContext emitter{_emitterFront, _emitterBack} {} - core::matrix3x4SIMD tform; + hlsl::float32_t3x4 tform; SContext::bsdf_type bsdf; #if defined(_NBL_DEBUG) || defined(_NBL_RELWITHDEBINFO) std::string bsdf_id; diff --git a/include/nbl/scene/ISkinInstanceCache.h b/include/nbl/scene/ISkinInstanceCache.h index 6cb18160d4..2eb83b4aac 100644 --- a/include/nbl/scene/ISkinInstanceCache.h +++ b/include/nbl/scene/ISkinInstanceCache.h @@ -19,7 +19,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted // main pseudo-pool properties using joint_t = ITransformTree::node_t; - using skinning_matrix_t = core::matrix3x4SIMD; + using skinning_matrix_t = hlsl::float32_t3x4; using recomputed_stamp_t = ITransformTree::recomputed_stamp_t; using inverse_bind_pose_offset_t = uint32_t; @@ -35,7 +35,7 @@ class ISkinInstanceCache : public virtual core::IReferenceCounted static inline constexpr uint32_t inverse_bind_pose_offset_prop_ix = 3u; // for the inverse bind pose pool - using inverse_bind_pose_t = core::matrix3x4SIMD; + using inverse_bind_pose_t = hlsl::float32_t3x4; static inline constexpr uint32_t inverse_bind_pose_prop_ix = 0u; diff --git a/include/nbl/scene/ISkinInstanceCacheManager.h b/include/nbl/scene/ISkinInstanceCacheManager.h index 5a5e3f5881..474a8a3eaa 100644 --- a/include/nbl/scene/ISkinInstanceCacheManager.h +++ b/include/nbl/scene/ISkinInstanceCacheManager.h @@ -466,7 +466,7 @@ class ISkinInstanceCacheManager : public virtual core::IReferenceCounted } struct DebugPushConstants { - core::matrix4SIMD viewProjectionMatrix; + hlsl::float32_t4x4 viewProjectionMatrix; core::vector4df_SIMD lineColor; core::vector3df aabbColor; uint32_t skinCount; diff --git a/include/nbl/video/IGPUAccelerationStructure.h b/include/nbl/video/IGPUAccelerationStructure.h index 1bb4fb0c66..3c10a255a2 100644 --- a/include/nbl/video/IGPUAccelerationStructure.h +++ b/include/nbl/video/IGPUAccelerationStructure.h @@ -272,7 +272,7 @@ class IGPUBottomLevelAccelerationStructure : public asset::IBottomLevelAccelerat // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03809 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pInfos-03810 // https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkBuildAccelerationStructuresKHR-pInfos-03773 - if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(core::matrix3x4SIMD),sizeof(core::vectorSIMDf))) + if (Base::invalidInputBuffer(geometry.transform,buildRangeInfo.transformByteOffset,1u,sizeof(hlsl::float32_t3x4),sizeof(core::vectorSIMDf))) return false; } else @@ -622,7 +622,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr inline PolymorphicInstance(const PolymorphicInstance&) = default; inline PolymorphicInstance(PolymorphicInstance&&) = default; - // I made all these assignment operators because of the `core::matrix3x4SIMD` alignment and keeping `type` correct at all times + // I made all these assignment operators because of the `hlsl::float32_t3x4` alignment and keeping `type` correct at all times inline PolymorphicInstance& operator=(const StaticInstance& _static) { type = INSTANCE_TYPE::STATIC; @@ -657,7 +657,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr static_assert(std::is_same_v,uint32_t>); // these must be 0 as per vulkan spec uint32_t reservedMotionFlags = 0u; - // I don't do an actual union because the preceeding members don't play nicely with alignment of `core::matrix3x4SIMD` and Vulkan requires this struct to be packed + // I don't do an actual union because the preceeding members don't play nicely with alignment of `hlsl::float32_t3x4` and Vulkan requires this struct to be packed SRTMotionInstance largestUnionMember = {}; static_assert(alignof(SRTMotionInstance)==8ull); diff --git a/include/vectorSIMD.h b/include/vectorSIMD.h index 9b09f95c97..6144dc446f 100644 --- a/include/vectorSIMD.h +++ b/include/vectorSIMD.h @@ -887,6 +887,42 @@ namespace core } }; + // temporary solution until vectorSIMD gets deleted + inline hlsl::float32_t4 convertToHLSLVector(const vectorSIMDf& vec) + { + hlsl::float32_t4 retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = vec.w; + + return retval; + } + + // temporary solution until vectorSIMD gets deleted + inline vectorSIMDf constructVecorSIMDFromHLSLVector(const hlsl::float32_t4& vec) + { + vectorSIMDf retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = vec.w; + + return retval; + } + + // temporary solution until vectorSIMD gets deleted + inline vectorSIMDf constructVecorSIMDFromHLSLVector(const hlsl::float32_t3& vec) + { + vectorSIMDf retval; + retval.x = vec.x; + retval.y = vec.y; + retval.z = vec.z; + retval.w = 0.0f; + + return retval; + } + } // end namespace core } // end namespace nbl diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index e8798499f9..b0c8a14d2f 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -157,7 +157,10 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/ieee754/impl.hlsl") # utility LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/array_accessors.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/vector_utils/vector_utils.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/matrix_traits.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/matrix_utils/transformation_matrix_utils.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/projection/projection.hlsl") #spirv intrinsics LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/spirv_intrinsics/core.hlsl") @@ -179,6 +182,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/unroll.hlsl") #glsl compat LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/core.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/glsl_compat/subgroup_arithmetic.hlsl") @@ -228,6 +232,9 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl") +#quaternions +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternion/quaternion_impl.hlsl") #extra math LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/gauss_legendre.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quadrature/gauss_legendre/impl.hlsl")