diff --git a/inc/zoo/meta/BitmaskMaker.h b/inc/zoo/meta/BitmaskMaker.h index 4c8008ac..1134773b 100644 --- a/inc/zoo/meta/BitmaskMaker.h +++ b/inc/zoo/meta/BitmaskMaker.h @@ -43,6 +43,8 @@ struct BitmaskMaker { static_assert(0xF0F0 == BitmaskMaker::value); static_assert(0xEDFEDFED == BitmaskMaker::value); + + }} // zoo::meta #endif diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index fec26f22..7691a728 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -4,6 +4,7 @@ #include "zoo/meta/log.h" +#include #include #ifdef _MSC_VER @@ -12,6 +13,16 @@ namespace zoo { namespace swar { +template +struct SWAR; + +template struct Literals_t { + constexpr static void (SWAR::*value)() = nullptr; +}; + +template +constexpr Literals_t Literals{}; + using u64 = uint64_t; using u32 = uint32_t; using u16 = uint16_t; @@ -52,6 +63,7 @@ constexpr std::make_unsigned_t lsbIndex(T v) noexcept { template struct SWAR { using type = std::make_unsigned_t; + constexpr static auto Literal = Literals; constexpr static inline type NBits = NBits_, BitWidth = sizeof(T) * 8, @@ -62,13 +74,53 @@ struct SWAR { AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug? LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, MostSignificantBit = LeastSignificantBit << (NBits - 1), - LeastSignificantLaneMask = - sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits - ~T(0) : - ~(~T(0) << NBits), + LeastSignificantLaneMask = []() { + if constexpr (NBits < sizeof(T) * 8) { + return (T(1) << NBits) - 1; + } else { + return ~T(0); + } + }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported - LowerBits = MostSignificantBit - LeastSignificantBit; + LowerBits = MostSignificantBit - LeastSignificantBit, + MaxUnsignedLaneValue = LeastSignificantLaneMask; + + template + constexpr static auto from_range(InputIt first, InputIt last) noexcept { + auto result = T{0}; + for (; first != last; ++first) { + result = (result << NBits) | *first; + } + return result; + } + + template + constexpr static auto from_array(const U (&values)[Lanes]) noexcept { + using std::begin; using std::end; + return SWAR{from_range(begin(values), end(values))}; + } + + template + constexpr static auto from_array(const std::array &values) noexcept { + using std::begin; using std::end; + return SWAR{from_range(begin(values), end(values))}; + } + + constexpr SWAR(const std::array &array) : m_v{from_range(array.begin(), array.end())} {} + + template > + constexpr + SWAR(Literals_t, const Arg (&values)[N]) : m_v{from_array(values)} {} + + constexpr std::array to_array() const noexcept { + std::array result = {}; + for (int i = 0; i < Lanes; ++i) { + auto otherEnd = Lanes - i - 1; + result[otherEnd] = at(i); + } + return result; + } SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} @@ -161,6 +213,12 @@ struct SWAR { T m_v; }; +template +SWAR(Literals_t, const Arg (&values)[SWAR::Lanes]) -> SWAR; + +template +SWAR(Literals_t, const std::array::Lanes>&) -> SWAR; + /// Defining operator== on base SWAR types is entirely too error prone. Force a verbose invocation. template constexpr auto horizontalEquality(SWAR left, SWAR right) { @@ -231,6 +289,10 @@ template struct BooleanSWAR: SWAR { using Base = SWAR; + template + constexpr BooleanSWAR(Literals_t, const bool (&values)[N]) + : Base(Literals, values) { this->m_v <<= (NBits - 1); } + // Booleanness is stored in the MSBs static constexpr auto MaskMSB = broadcast(Base(T(1) << (NBits -1))); @@ -240,7 +302,7 @@ struct BooleanSWAR: SWAR { static constexpr auto MaskNonLSB = ~MaskLSB; static constexpr auto MaskNonMSB = ~MaskMSB; constexpr explicit BooleanSWAR(T v): Base(v) {} - + constexpr BooleanSWAR clear(int bit) const noexcept { constexpr auto Bit = T(1) << (NBits - 1); return this->m_v ^ (Bit << (NBits * bit)); } @@ -256,7 +318,7 @@ struct BooleanSWAR: SWAR { constexpr auto operator ~() const noexcept { return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this); } - + constexpr auto operator not() const noexcept { return BooleanSWAR(MaskMSB ^ *this); } @@ -305,6 +367,12 @@ struct BooleanSWAR: SWAR { convertToBooleanSWAR(SWAR arg) noexcept; }; +template +BooleanSWAR( + Literals_t, + const bool (&values)[BooleanSWAR::Lanes]) + -> BooleanSWAR; + template constexpr BooleanSWAR convertToBooleanSWAR(SWAR arg) noexcept { @@ -381,7 +449,7 @@ greaterEqual(SWAR left, SWAR right) noexcept { using S = swar::SWAR; const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y const auto z = (x|h) - (y&~h); - // bitwise ternary median! + // bitwise ternary median! const auto t = h & ~median(x, ~y, z); return ~BooleanSWAR{static_cast(t)}; // ~(x= y } diff --git a/inc/zoo/swar/math.h b/inc/zoo/swar/math.h new file mode 100644 index 00000000..d089fb15 --- /dev/null +++ b/inc/zoo/swar/math.h @@ -0,0 +1,64 @@ +#pragma once +#include "SWAR.h" + +namespace zoo::math { + +template +constexpr static +std::enable_if_t, bool> +isPowerOfTwo(IntegerType x) noexcept { + return x && (x & (x - 1)) == 0; +} + +template +constexpr static +std::enable_if_t, bool> +isPowerOfTwo() noexcept { + return isPowerOfTwo(X); +} + + +template +constexpr static +std::enable_if_t< + std::is_integral_v && + isPowerOfTwo(), size_t> +moduloPowerOfTwo(IntegerType x) noexcept { + return x & (N - 1); +} + +} + +namespace zoo::swar { +template +constexpr static auto subtractOneUnsafe(S x) noexcept { + constexpr auto Ones = S::LeastSignificantBit; + auto x_minus_1 = S{x.value() - Ones}; + return x_minus_1; +} +// todo subtract K unsafe using BitmaskMaker +// todo subtract K "saturated" using BitmaskMaker + +template +constexpr static auto isPowerOfTwo(S x) noexcept { + constexpr auto NBits = S::NBits; + using T = typename S::type; + auto greater_than_0 = greaterEqual(x, S{0}); + auto x_minus_1 = subtractOneUnsafe(x); + auto zero = equals(S{x_minus_1.value() & x.value()}, S{0}); + return greater_than_0 & zero; +} + +template +constexpr static +std::enable_if_t(), S> +moduloPowerOfTwo(const S x) noexcept { + constexpr auto N_minus_1 = N - 1; + constexpr auto N_in_lanes = zoo::meta::BitmaskMaker::value; + auto y = x.value() & N_in_lanes; + return S{y}; +} + + +} // namespace zoo::swar + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 411d9ce1..57209c37 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -122,7 +122,7 @@ else() set( ZOO_TEST_SOURCES ${CATCH2_MAIN_SOURCE} ${TYPE_ERASURE_SOURCES} ${ALGORITHM_SOURCES} - ${SWAR_SOURCES} + ${SWAR_SOURCES} ${MISCELLANEA_SOURCES} ) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1628e222..b814f0fd 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -1,13 +1,13 @@ #include "zoo/swar/associative_iteration.h" #include "catch2/catch.hpp" +#include "math.h" +#include "zoo/swar/math.h" -#include -#include -#include -#include - - +// #include +// #include +// #include +// #include using namespace zoo; using namespace zoo::swar; @@ -33,6 +33,117 @@ using S32_32 = SWAR<32, uint32_t>; using S64_64 = SWAR<64, uint64_t>; +static_assert(SWAR<16, u64>::MaxUnsignedLaneValue == 65535); +static_assert(SWAR<16, u32>::MaxUnsignedLaneValue == 65535); +static_assert(SWAR<8, u32>::MaxUnsignedLaneValue == 255); +static_assert(SWAR<4, u32>::MaxUnsignedLaneValue == 15); +static_assert(SWAR<2, u32>::MaxUnsignedLaneValue == 3); + +static_assert(SWAR{zoo::swar::Literals<32, zoo::swar::u64>, {2, 1}}.value() == 0x00000002'00000001); +static_assert(SWAR{Literals<32, u64>, {1, 2}}.value() == 0x00000001'00000002); + +static_assert(SWAR{Literals<16, u64>, {4, 3, 2, 1}}.value() == 0x0004'0003'0002'0001); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.value() == 0x0001'0002'0003'0004); + +static_assert(SWAR{Literals<16, u32>, {2, 1}}.value() == 0x0002'0001); +static_assert(SWAR{Literals<16, u32>, {1, 2}}.value() == 0x0001'0002); + +static_assert(SWAR{Literals<8, u32>, {4, 3, 2, 1}}.value() == 0x04'03'02'01); +static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); +static_assert(SWAR{Literals<8, u32>, {1, 2, 3, 4}}.value() == 0x01'02'03'04); + +static_assert(SWAR{Literals<8, u16>, {2, 1}}.value() == 0x0201); +static_assert(SWAR{Literals<8, u16>, {1, 2}}.value() == 0x0102); + +static_assert(SWAR{Literals<4, u8>, {2, 1}}.value() == 0x21); +static_assert(SWAR{Literals<4, u8>, {1, 2}}.value() == 0x12); + +// Little-endian +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(0) == 4); +static_assert(SWAR{Literals<16, u64>, {1, 2, 3, 4}}.at(1) == 3); + +// Macro required because initializer lists are not constexpr +#define ARRAY_TEST(SwarType, ...) \ + static_assert([]() { \ + using S = SwarType; \ + constexpr auto arry = std::array{__VA_ARGS__}; \ + constexpr auto test_array = S{S::Literal, {__VA_ARGS__}}.to_array(); \ + static_assert(arry.size() == S::Lanes); \ + for (auto i = 0; i < S::Lanes; ++i) { \ + if (arry[i] != test_array.at(i)) { \ + return false; \ + } \ + } \ + return true; \ + }()); \ + +ARRAY_TEST(S16_64, 1, 2, 3, 4); +ARRAY_TEST(S16_64, 4, 3, 2, 1); + +ARRAY_TEST(S8_32, 255, 255, 255, 255); +ARRAY_TEST(S8_64, 255, 255, 255, 255, 255, 255, 255, 255); + +ARRAY_TEST(S16_32, 65534, 65534); +ARRAY_TEST(S16_64, 65534, 65534, 65534, 65534); + +using BS = BooleanSWAR<4, u16>; +static_assert(BS{Literals<4, u16>, {0, 0, 0, 0}}.value() == 0b0000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {1, 0, 0, 0}}.value() == 0b1000'0000'0000'0000); +static_assert(BS{Literals<4, u16>, {0, 1, 0, 0}}.value() == 0b0000'1000'0000'0000); +static_assert(BS{Literals<4, u16>, {0, 0, 1, 0}}.value() == 0b0000'0000'1000'0000); +static_assert(BS{Literals<4, u16>, {0, 0, 0, 1}}.value() == 0b0000'0000'0000'1000); +static_assert(BS{Literals<4, u16>, {1, 0, 0, 0}}.value() == 0b1000'0000'0000'0000); + + +namespace equality { +using S = SWAR<8, u32>; +using BS = BooleanSWAR<8, u32>; +template +constexpr auto laneWiseEqualsTest( + const typename S::type (&left)[S::Lanes], + const typename S::type (&right)[S::Lanes], + const bool (&expected)[S::Lanes]) { + return equals(S{S::Literal, left}, S{S::Literal, right}).value() + == BS{BS::Literal, expected}.value(); +} +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {1, 2, 3, 4}, {1, 1, 1, 1})); +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {5, 6, 7, 8}, {0, 0, 0, 0})); +static_assert(laneWiseEqualsTest({1, 2, 3, 4}, {5, 2, 7, 4}, {0, 1, 0, 1})); +} + +namespace math_test { +static_assert(math::isPowerOfTwo()); +static_assert(math::moduloPowerOfTwo<4>(0) == 0); +static_assert(math::moduloPowerOfTwo<8>(9) == 1); +static_assert(math::moduloPowerOfTwo<4096>(4097) == 1); + +using S = SWAR<8, u32>; +using BS = BooleanSWAR<8, u32>; +template +constexpr auto powerOfTwoTest( + const typename S::type (&input)[S::Lanes], + const bool (&expected)[S::Lanes]) { + return isPowerOfTwo(S{S::Literal, input}).value() == BS{BS::Literal, expected}.value(); +} +static_assert(powerOfTwoTest({1, 2, 3, 4}, {1, 1, 0, 1})); +static_assert(powerOfTwoTest({2, 3, 64, 77}, {1, 0, 1, 0})); +static_assert(powerOfTwoTest({3, 65, 128, 0}, {0, 0, 1, 1})); +static_assert(powerOfTwoTest({256, 7, 11, 101}, {1, 0, 0, 0})); +static_assert(powerOfTwoTest({2, 64, 128, 7}, {1, 1, 1, 0})); + +template +constexpr auto moduloSwarTest( + const typename S::type (&input)[S::Lanes], + const typename S::type (&expected)[S::Lanes]) { + return moduloPowerOfTwo(S{S::Literal, input}).value() == S{S::Literal, expected}.value(); +} +static_assert(moduloPowerOfTwo<4>(S{0}).value() == 0); +static_assert(moduloSwarTest<4>({0, 2, 4, 6}, {0, 2, 0, 2})); +static_assert(moduloSwarTest<4>({1, 3, 5, 7}, {1, 3, 1, 3})); +static_assert(moduloSwarTest<8>({9, 8, 16, 7}, {1, 0, 0, 7})); +static_assert(moduloSwarTest<16>({17, 32, 64, 127}, {1, 0, 0, 15})); +} + namespace Multiplication { static_assert(~int64_t(0) == negate(S4_64{S4_64::LeastSignificantBit}).value()); @@ -357,7 +468,7 @@ TEST_CASE( const auto left = S2_16{0}.blitElement(1, i); const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); } } SECTION("single") { @@ -365,7 +476,7 @@ TEST_CASE( const auto large = S4_32{0}.blitElement(1, i+1); const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } SECTION("allLanes") { @@ -373,7 +484,7 @@ TEST_CASE( const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } } @@ -425,7 +536,7 @@ TEST_CASE( "BooleanSWAR MSBtoLaneMask", "[swar]" ) { - // BooleanSWAR as a mask: + // BooleanSWAR as a mask: auto bswar =BooleanSWAR<4, u32>(0x0808'0000); auto mask = S4_32(0x0F0F'0000); CHECK(bswar.MSBtoLaneMask().value() == mask.value()); @@ -452,6 +563,7 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } +