From e82dbddfc223fc9850fc630f4bef4f5a53f93528 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 02:00:45 -0700 Subject: [PATCH 01/26] woah holy shit --- inc/zoo/swar/SWAR.h | 10 +++- inc/zoo/swar/associative_iteration.h | 83 +++++++++++++++++++++++++++- test/swar/BasicOperations.cpp | 22 ++++++-- 3 files changed, 103 insertions(+), 12 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 508ba0ac..8c0c3acb 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -4,6 +4,7 @@ #include "zoo/meta/log.h" +#include #include #ifdef _MSC_VER @@ -75,6 +76,7 @@ struct SWAR { SignificantBitsCount = BitWidth - PaddingBitsCount, AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug? LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, + AllOnesInFirstLane = AllOnes >> (NBits * (Lanes - 1)), MostSignificantBit = LeastSignificantBit << (NBits - 1), LeastSignificantLaneMask = sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits @@ -254,7 +256,7 @@ struct BooleanSWAR: SWAR { static constexpr auto MaskNonLSB = ~MaskLSB; static constexpr auto MaskNonMSB = ~MaskMSB; constexpr explicit BooleanSWAR(T v): Base(v) {} - + constexpr BooleanSWAR clear(int bit) const noexcept { constexpr auto Bit = T(1) << (NBits - 1); return this->m_v ^ (Bit << (NBits * bit)); } @@ -270,7 +272,7 @@ struct BooleanSWAR: SWAR { constexpr auto operator ~() const noexcept { return BooleanSWAR(Base{Base::MostSignificantBit} ^ *this); } - + constexpr auto operator not() const noexcept { return BooleanSWAR(MaskMSB ^ *this); } @@ -395,7 +397,7 @@ greaterEqual(SWAR left, SWAR right) noexcept { using S = swar::SWAR; const auto h = S::MostSignificantBit, x = left.value(), y = right.value(); // x=left, y= right is x < y const auto z = (x|h) - (y&~h); - // bitwise ternary median! + // bitwise ternary median! const auto t = h & ~median(x, ~y, z); return ~BooleanSWAR{static_cast(t)}; // ~(x= y } @@ -476,4 +478,6 @@ static_assert( 0x0706050403020100ull ); +static_assert(SWAR<4, uint16_t>::AllOnesInFirstLane == 0b0000'0000'0000'1111); + }} diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 587c5ce6..490ac983 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -1,7 +1,10 @@ #ifndef ZOO_SWAR_ASSOCIATIVE_ITERATION_H #define ZOO_SWAR_ASSOCIATIVE_ITERATION_H +#include "zoo/meta/BitmaskMaker.h" #include "zoo/swar/SWAR.h" +#include +#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -256,13 +259,16 @@ constexpr auto makeLaneMaskFromLSB(SWAR input) { return impl::makeLaneMaskFromMSB_and_LSB(lsbCopiedToMSB, lsb); } +// TODO WRITE TEST FOR THIS template constexpr auto makeLaneMaskFromMSB(SWAR input) { using S = SWAR; auto msb = input & S{S::MostSignificantBit}; - auto msbCopiedToLSB = S{msb.value() >> (NB - 1)}; + B val = msb.value() >> (NB - 1); + auto msbCopiedToLSB = S{val}; return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB); } +static_assert(makeLaneMaskFromMSB(SWAR<4, uint16_t>{0b1001'0000'0000'0000}).value() == 0b1111'0000'0000'0000); template struct ArithmeticResultTriplet { @@ -392,8 +398,13 @@ template< typename CountHalver > constexpr auto associativeOperatorIterated_regressive( - Base base, Base neutral, IterationCount count, IterationCount forSquaring, - Operator op, unsigned log2Count, CountHalver ch + Base base, + Base neutral, + IterationCount count, + IterationCount forSquaring, + Operator op, + unsigned log2Count, + CountHalver ch ) { auto result = neutral; if(!log2Count) { return result; } @@ -483,6 +494,72 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( ); } +using S = SWAR<4, uint16_t>; + +template +constexpr auto labelNumBits(S input) { + using T = typename S::type; + auto mask = makeLaneMaskFromMSB(input).value(); + T result = 0; + for(auto i = 0; i < S::Lanes; ++i) { + result = result + T(mask & 1); + mask = mask >> 1; + } + return S{result}; +} +static_assert(labelNumBits(S{0b0001'0010'0011'0100}).value() == S{0b0001'0011'0111'1111}.value()); +// HOLY SHIT THIS IS ALSO ASSOCIATIVE ITERATION! + +template +constexpr auto shiftOp(S left, S right, S counts) { + using T = typename S::type; + auto mask = makeLaneMaskFromMSB(counts).value(); + auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); + auto shifted = inputLsbCleared >> 1; + T res = (shifted & mask) | (left.value() & ~mask); + return S{res}; +}; +using S = SWAR<4, uint16_t>; +static_assert(shiftOp(S{0b1000'1000'1000'1000}, S{0b0001'0001'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); + +template +constexpr auto rightShift_LaneWise( + SWAR input, SWAR numShifts +) { + using S = SWAR; + + auto operation = [](auto left, auto right, auto counts) { + auto mask = makeLaneMaskFromMSB(counts).value(); + auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); + auto shifted = inputLsbCleared >> 1; + T res = (shifted & mask) | (left.value() & ~mask); + return S{res}; + }; + + auto halver = [](auto counts) { + auto msbCleared = counts & ~S{S::MostSignificantBit}; + T halved = msbCleared.value() >> T{1}; + return S{halved}; + }; + + return associativeOperatorIterated_regressive( + input, + S{1}, + numShifts, + S{S::MostSignificantBit}, + operation, + ActualBits, + halver + ); +} + +static_assert(1 >> 0 == 1); + +static_assert(rightShift_LaneWise<4>( + SWAR<4, uint16_t>{0b1000'1000'1000'1000}, + SWAR<4, uint16_t>{0b0001'0001'0001'0001} +).value() == 0b0100'0100'0100'0100); + template constexpr auto multiplication_OverflowUnsafe( SWAR multiplicand, diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 1628e222..87d1241f 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -348,6 +348,16 @@ GE_MSB_ON_TEST( 0x0123'4567, 0x8888'8888) +TEST_CASE( + "right shift lanewise", + "[swar][jamie]" +) { + CHECK(rightShift_LaneWise<4>( + SWAR<4, uint16_t>{0b1000'1000'1000'1000}, + SWAR<4, uint16_t>{0b0001'0001'0001'0001} + ).value() == 0b0100'0100'0100'0100); +} + TEST_CASE( "greaterEqualMSBOn", "[swar][unsigned-swar]" @@ -357,7 +367,7 @@ TEST_CASE( const auto left = S2_16{0}.blitElement(1, i); const auto right = S2_16{S2_16::AllOnes}.blitElement(1, i-1); const auto test = S2_16{0}.blitElement(1, 2); - CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); + CHECK(test.value() == greaterEqual<2, u16>(left, right).value()); } } SECTION("single") { @@ -365,7 +375,7 @@ TEST_CASE( const auto large = S4_32{0}.blitElement(1, i+1); const auto small = S4_32{S4_32::AllOnes}.blitElement(1, i-1); const auto test = S4_32{0}.blitElement(1, 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } SECTION("allLanes") { @@ -373,7 +383,7 @@ TEST_CASE( const auto small = S4_32(S4_32::LeastSignificantBit * (i-1)); const auto large = S4_32(S4_32::LeastSignificantBit * (i+1)); const auto test = S4_32(S4_32::LeastSignificantBit * 8); - CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); + CHECK(test.value() == greaterEqual<4, u32>(large, small).value()); } } } @@ -425,7 +435,7 @@ TEST_CASE( "BooleanSWAR MSBtoLaneMask", "[swar]" ) { - // BooleanSWAR as a mask: + // BooleanSWAR as a mask: auto bswar =BooleanSWAR<4, u32>(0x0808'0000); auto mask = S4_32(0x0F0F'0000); CHECK(bswar.MSBtoLaneMask().value() == mask.value()); @@ -452,6 +462,6 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value()); CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value()); - CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); - CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); + CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); + CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } From 54f30a3bac0a65255d022a4163651eca421490ce Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 02:26:55 -0700 Subject: [PATCH 02/26] wip --- inc/zoo/swar/associative_iteration.h | 48 ++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 490ac983..6534e6ba 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -507,8 +507,53 @@ constexpr auto labelNumBits(S input) { } return S{result}; } -static_assert(labelNumBits(S{0b0001'0010'0011'0100}).value() == S{0b0001'0011'0111'1111}.value()); // HOLY SHIT THIS IS ALSO ASSOCIATIVE ITERATION! +using S = SWAR<4, uint16_t>; + +template +constexpr S shiftLeftAppendOne(S input, BS should) { + using T = typename S::type; + auto mask = makeLaneMaskFromMSB(should).value(); + auto inputMsbCleared = input.value() & ~S{S::MostSignificantBit}.value(); + T shifted = inputMsbCleared << 1; + constexpr auto ones = S::LeastSignificantBit; + T appendedOnes = shifted | ones; + T result = (appendedOnes & mask) | (input.value() & ~mask); + return S{result}; +} + +static_assert(shiftLeftAppendOne( + S{0b0000'0001'0100'1000}, + S{0b0000'1000'1000'1000}).value() == + 0b0000'0011'1001'0001); + +template +constexpr auto labelNumBits_ai( + SWAR numShifts +) { + using S = SWAR; + + auto operation = [](auto left, auto _, auto counts) { + return shiftLeftAppendOne(left, counts); + }; + + auto halver = [](auto counts) { + auto msbCleared = counts & ~S{S::MostSignificantBit}; + T halved = msbCleared.value() >> T{1}; + return S{halved}; + }; + + return associativeOperatorIterated_regressive( + S{0}, + S{0}, + numShifts, + S{S::MostSignificantBit}, + operation, + ActualBits, + halver + ); +} +static_assert(labelNumBits_ai<4>(S{0b0001'0010'0011'0100}).value() == S{0b0001'0011'0111'1111}.value()); template constexpr auto shiftOp(S left, S right, S counts) { @@ -519,7 +564,6 @@ constexpr auto shiftOp(S left, S right, S counts) { T res = (shifted & mask) | (left.value() & ~mask); return S{res}; }; -using S = SWAR<4, uint16_t>; static_assert(shiftOp(S{0b1000'1000'1000'1000}, S{0b0001'0001'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); template From 657f65935142eca46f12d43b73670387a78f9991 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 02:55:48 -0700 Subject: [PATCH 03/26] WIP --- inc/zoo/swar/associative_iteration.h | 44 ++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 6534e6ba..d5118d91 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -521,12 +521,37 @@ constexpr S shiftLeftAppendOne(S input, BS should) { T result = (appendedOnes & mask) | (input.value() & ~mask); return S{result}; } - static_assert(shiftLeftAppendOne( S{0b0000'0001'0100'1000}, S{0b0000'1000'1000'1000}).value() == 0b0000'0011'1001'0001); +constexpr auto maskedOperation = [](auto input, auto mask, auto op) { + auto output = op(input); + return (output & mask) | (input & ~mask); +}; + +template +constexpr S shiftLeftNAppendN(S input, S numTimes, BS should) { + using T = typename S::type; + auto inputMsbCleared = input.value() & ~S{S::MostSignificantBit}.value(); + auto mask = makeLaneMaskFromMSB(should).value(); + + T res = maskedOperation(inputMsbCleared, mask, [](auto input) { + auto shifted = input << 1; + constexpr auto ones = S::LeastSignificantBit; + auto appendedOnes = shifted | ones; + return appendedOnes; + }); + + return S{res}; +} +static_assert(shiftLeftNAppendN( + S{0b0000'0001'0100'1000}, + S{0b0000'0001'0100'1000}, + S{0b1000'1000'1000'1000}).value() == + 0b0001'0011'1001'0001); + template constexpr auto labelNumBits_ai( SWAR numShifts @@ -556,7 +581,20 @@ constexpr auto labelNumBits_ai( static_assert(labelNumBits_ai<4>(S{0b0001'0010'0011'0100}).value() == S{0b0001'0011'0111'1111}.value()); template -constexpr auto shiftOp(S left, S right, S counts) { +constexpr auto shiftRightOnce_lanewise(S left, S counts) { + using T = typename S::type; + auto mask = makeLaneMaskFromMSB(counts).value(); + auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); + auto shifted = inputLsbCleared >> 1; + T res = (shifted & mask) | (left.value() & ~mask); + return S{res}; +}; +static_assert(shiftRightOnce_lanewise(S{0b1000'1000'1000'1000}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); +static_assert(shiftRightOnce_lanewise(S{0b1111'1000'1111'1000}, S{0b1000'1000'1000'0000}).value() == 0b0111'0100'0111'1000); +static_assert(shiftRightOnce_lanewise(S{0b1111'1111'1111'1111}, S{0b1000'1000'1000'1000}).value() == 0b0111'0111'0111'0111); + +template +constexpr auto shiftOp_2(S left, S right, S counts) { using T = typename S::type; auto mask = makeLaneMaskFromMSB(counts).value(); auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); @@ -564,7 +602,7 @@ constexpr auto shiftOp(S left, S right, S counts) { T res = (shifted & mask) | (left.value() & ~mask); return S{res}; }; -static_assert(shiftOp(S{0b1000'1000'1000'1000}, S{0b0001'0001'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); +static_assert(shiftOp(S{0b1000'1000'1000'1000}, S{0b0001'0010'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); template constexpr auto rightShift_LaneWise( From a872b5ca174fb85c10ab18c0d775b3331c98e773 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 03:15:35 -0700 Subject: [PATCH 04/26] wip --- inc/zoo/swar/SWAR.h | 14 +++++++++++++- inc/zoo/swar/associative_iteration.h | 6 ++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 8c0c3acb..ff95125f 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -13,10 +13,14 @@ namespace zoo { namespace swar { +namespace type_defs { +using u128 = __uint128_t; using u64 = uint64_t; using u32 = uint32_t; using u16 = uint16_t; -using u8 = std::uint8_t; +using u8 = uint8_t; +} +using namespace type_defs; template constexpr uint64_t popcount(uint64_t a) noexcept { @@ -86,6 +90,14 @@ struct SWAR { // "don't care" bits when non-power-of-two bit lane sizes are supported LowerBits = MostSignificantBit - LeastSignificantBit; + static_assert(std::is_unsigned_v, + "You should not use an unsigned type as the base for a SWAR type. " + "If you have used `int` or `long`, please use `uint32_t` or `uint64_t` instead. " + "This type parameter is only used to determine the total width of the SWAR register. " + "The signed-ness of the type has no *intentional* semantic meaning to what you're defining and " + "furthermore, some bitwise operations are different for signed and unsigned types." + ); + SWAR() = default; constexpr explicit SWAR(T v): m_v(v) {} constexpr explicit operator T() const noexcept { return m_v; } diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index d5118d91..6e11fff5 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -494,7 +494,9 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( ); } -using S = SWAR<4, uint16_t>; +using namespace zoo::swar::type_defs; +using S = SWAR<4, u16>; +static_assert(SWAR<4, u16>::LeastSignificantBit == 0b0001'0001'0001'0001); template constexpr auto labelNumBits(S input) { @@ -602,7 +604,7 @@ constexpr auto shiftOp_2(S left, S right, S counts) { T res = (shifted & mask) | (left.value() & ~mask); return S{res}; }; -static_assert(shiftOp(S{0b1000'1000'1000'1000}, S{0b0001'0010'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); +static_assert(shiftOp_2(S{0b1000'1000'1000'1000}, S{0b0001'0010'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); template constexpr auto rightShift_LaneWise( From 8398c2feef95b14f416c1f0cfab0a1c52c4dbf01 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 17:30:43 -0700 Subject: [PATCH 05/26] damn --- inc/zoo/swar/associative_iteration.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 6e11fff5..be6042b5 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -528,6 +528,16 @@ static_assert(shiftLeftAppendOne( S{0b0000'1000'1000'1000}).value() == 0b0000'0011'1001'0001); +template +constexpr auto createShiftMask(S input) { + constexpr auto two = S{meta::BitmaskMaker::value}; + constexpr auto one = S::LeastSignificantBit; + typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; + return S{v}; +} +static_assert(createShiftMask(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); +static_assert(createShiftMask(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); + constexpr auto maskedOperation = [](auto input, auto mask, auto op) { auto output = op(input); return (output & mask) | (input & ~mask); From d87f9c1d8a9602b3e7c32d4559035ee18f491652 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:03:46 -0700 Subject: [PATCH 06/26] wip --- inc/zoo/swar/associative_iteration.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index be6042b5..d722d81d 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -538,6 +538,22 @@ constexpr auto createShiftMask(S input) { static_assert(createShiftMask(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); static_assert(createShiftMask(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); +static_assert(S::LeastSignificantLaneMask == 0x00'00'00'00'FF); + +// static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); + +template +constexpr auto thing (S input, S shifts) { + auto minimumMask = createShiftMask(shifts); + auto inputMasked = input & minimumMask; + + auto result = 0; + for (int i = 0; i < S::Lanes; i++) { + auto laneMask + auto firstElement = inputMasked + } +} + constexpr auto maskedOperation = [](auto input, auto mask, auto op) { auto output = op(input); return (output & mask) | (input & ~mask); From 0740d8b15896312cb048b69033bde230f5de7bd8 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:31:08 -0700 Subject: [PATCH 07/26] well, it works! --- README.md | 2 +- inc/zoo/swar/SWAR.h | 19 ++++++++---- inc/zoo/swar/associative_iteration.h | 43 +++++++++++++++++++--------- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 2efdedd9..08b24f58 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![C++ CI](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml/badge.svg)](https://github.com/thecppzoo/zoo/actions/workflows/master.yaml) ## Build suggestion diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index ff95125f..f44edc60 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -82,10 +82,13 @@ struct SWAR { LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, AllOnesInFirstLane = AllOnes >> (NBits * (Lanes - 1)), MostSignificantBit = LeastSignificantBit << (NBits - 1), - LeastSignificantLaneMask = - sizeof(T) * 8 == NBits ? // needed to avoid shifting all bits - ~T(0) : - ~(~T(0) << NBits), + LeastSignificantLaneMask = []() { + if constexpr (NBits < sizeof(T) * 8) { + return (T(1) << NBits) - 1; + } else { + return ~T(0); + } + }(), // Use LowerBits in favor of ~MostSignificantBit to not pollute // "don't care" bits when non-power-of-two bit lane sizes are supported LowerBits = MostSignificantBit - LeastSignificantBit; @@ -118,9 +121,13 @@ struct SWAR { SWAR_BINARY_OPERATORS_X_LIST #undef X + constexpr static T laneMask(int laneIndex) noexcept { + return LeastSignificantLaneMask << (NBits * laneIndex); + } + // Returns lane at position with other lanes cleared. - constexpr T isolateLane(int position) const noexcept { - return m_v & (LeastSignificantLaneMask << (NBits * position)); + constexpr T isolateLane(int laneIndex) const noexcept { + return m_v & laneMask(laneIndex); } // Returns lane value at position, in lane 0, rest of SWAR cleared. diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index d722d81d..701296ba 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -528,31 +528,46 @@ static_assert(shiftLeftAppendOne( S{0b0000'1000'1000'1000}).value() == 0b0000'0011'1001'0001); + +/** Transforms a number into a number into a binary tally. + * E.g. 0b0011 (3) -> 0b0111 */ template -constexpr auto createShiftMask(S input) { +constexpr auto base2TallyTransform(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } -static_assert(createShiftMask(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); -static_assert(createShiftMask(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); - -static_assert(S::LeastSignificantLaneMask == 0x00'00'00'00'FF); - -// static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); template -constexpr auto thing (S input, S shifts) { - auto minimumMask = createShiftMask(shifts); - auto inputMasked = input & minimumMask; - - auto result = 0; +constexpr auto rightShift_Plural(S input, S shifts) { + using T = typename S::type; + auto minimumMask = ~base2TallyTransform(shifts); + T inputMasked = input.value() & minimumMask.value(); + T result = 0; for (int i = 0; i < S::Lanes; i++) { - auto laneMask - auto firstElement = inputMasked + T currentNumShift = shifts.at(i); + auto shifted = inputMasked >> currentNumShift; + auto laneMask = S::laneMask(i); + auto thisResult = shifted & laneMask; + result |= thisResult; } + return S{result}; } +static_assert(rightShift_Plural( + S{0b0000'1000'1000'1000}, + S{0b0100'0011'0010'0001} +).value() == 0b0000'0001'0010'0100); + +static_assert(base2TallyTransform(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); +static_assert(base2TallyTransform(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); + +static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); +static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); +static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); +static_assert(S::laneMask(2) == 0b0000'1111'0000'0000); +static_assert(S::laneMask(3) == 0b1111'0000'0000'0000); +static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111); constexpr auto maskedOperation = [](auto input, auto mask, auto op) { auto output = op(input); From d8011c9cb9f048fa22d0331ac17563f74c209d89 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:36:17 -0700 Subject: [PATCH 08/26] move --- inc/zoo/swar/associative_iteration.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 701296ba..c87c4f9f 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -538,6 +538,8 @@ constexpr auto base2TallyTransform(S input) { typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } +static_assert(base2TallyTransform(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); +static_assert(base2TallyTransform(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); template constexpr auto rightShift_Plural(S input, S shifts) { @@ -554,14 +556,12 @@ constexpr auto rightShift_Plural(S input, S shifts) { } return S{result}; } + static_assert(rightShift_Plural( S{0b0000'1000'1000'1000}, S{0b0100'0011'0010'0001} ).value() == 0b0000'0001'0010'0100); -static_assert(base2TallyTransform(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); -static_assert(base2TallyTransform(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); - static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); From ad5ca7a31ab31773d0e1fa79a272ea9cf0106fab Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:37:21 -0700 Subject: [PATCH 09/26] cleanup --- inc/zoo/swar/associative_iteration.h | 116 --------------------------- 1 file changed, 116 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index c87c4f9f..8aae6a1a 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -569,122 +569,6 @@ static_assert(S::laneMask(2) == 0b0000'1111'0000'0000); static_assert(S::laneMask(3) == 0b1111'0000'0000'0000); static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111); -constexpr auto maskedOperation = [](auto input, auto mask, auto op) { - auto output = op(input); - return (output & mask) | (input & ~mask); -}; - -template -constexpr S shiftLeftNAppendN(S input, S numTimes, BS should) { - using T = typename S::type; - auto inputMsbCleared = input.value() & ~S{S::MostSignificantBit}.value(); - auto mask = makeLaneMaskFromMSB(should).value(); - - T res = maskedOperation(inputMsbCleared, mask, [](auto input) { - auto shifted = input << 1; - constexpr auto ones = S::LeastSignificantBit; - auto appendedOnes = shifted | ones; - return appendedOnes; - }); - - return S{res}; -} -static_assert(shiftLeftNAppendN( - S{0b0000'0001'0100'1000}, - S{0b0000'0001'0100'1000}, - S{0b1000'1000'1000'1000}).value() == - 0b0001'0011'1001'0001); - -template -constexpr auto labelNumBits_ai( - SWAR numShifts -) { - using S = SWAR; - - auto operation = [](auto left, auto _, auto counts) { - return shiftLeftAppendOne(left, counts); - }; - - auto halver = [](auto counts) { - auto msbCleared = counts & ~S{S::MostSignificantBit}; - T halved = msbCleared.value() >> T{1}; - return S{halved}; - }; - - return associativeOperatorIterated_regressive( - S{0}, - S{0}, - numShifts, - S{S::MostSignificantBit}, - operation, - ActualBits, - halver - ); -} -static_assert(labelNumBits_ai<4>(S{0b0001'0010'0011'0100}).value() == S{0b0001'0011'0111'1111}.value()); - -template -constexpr auto shiftRightOnce_lanewise(S left, S counts) { - using T = typename S::type; - auto mask = makeLaneMaskFromMSB(counts).value(); - auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); - auto shifted = inputLsbCleared >> 1; - T res = (shifted & mask) | (left.value() & ~mask); - return S{res}; -}; -static_assert(shiftRightOnce_lanewise(S{0b1000'1000'1000'1000}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); -static_assert(shiftRightOnce_lanewise(S{0b1111'1000'1111'1000}, S{0b1000'1000'1000'0000}).value() == 0b0111'0100'0111'1000); -static_assert(shiftRightOnce_lanewise(S{0b1111'1111'1111'1111}, S{0b1000'1000'1000'1000}).value() == 0b0111'0111'0111'0111); - -template -constexpr auto shiftOp_2(S left, S right, S counts) { - using T = typename S::type; - auto mask = makeLaneMaskFromMSB(counts).value(); - auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); - auto shifted = inputLsbCleared >> 1; - T res = (shifted & mask) | (left.value() & ~mask); - return S{res}; -}; -static_assert(shiftOp_2(S{0b1000'1000'1000'1000}, S{0b0001'0010'0001'0001}, S{0b1000'1000'1000'0000}).value() == 0b0100'0100'0100'1000); - -template -constexpr auto rightShift_LaneWise( - SWAR input, SWAR numShifts -) { - using S = SWAR; - - auto operation = [](auto left, auto right, auto counts) { - auto mask = makeLaneMaskFromMSB(counts).value(); - auto inputLsbCleared = left.value() & ~S{S::LeastSignificantBit}.value(); - auto shifted = inputLsbCleared >> 1; - T res = (shifted & mask) | (left.value() & ~mask); - return S{res}; - }; - - auto halver = [](auto counts) { - auto msbCleared = counts & ~S{S::MostSignificantBit}; - T halved = msbCleared.value() >> T{1}; - return S{halved}; - }; - - return associativeOperatorIterated_regressive( - input, - S{1}, - numShifts, - S{S::MostSignificantBit}, - operation, - ActualBits, - halver - ); -} - -static_assert(1 >> 0 == 1); - -static_assert(rightShift_LaneWise<4>( - SWAR<4, uint16_t>{0b1000'1000'1000'1000}, - SWAR<4, uint16_t>{0b0001'0001'0001'0001} -).value() == 0b0100'0100'0100'0100); - template constexpr auto multiplication_OverflowUnsafe( SWAR multiplicand, From 2b37cf8dc483a2041f9a8c5fcfb3dbc3c69989e4 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:38:25 -0700 Subject: [PATCH 10/26] clean --- inc/zoo/swar/associative_iteration.h | 32 ---------------------------- 1 file changed, 32 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 8aae6a1a..92d31604 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -496,38 +496,6 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( using namespace zoo::swar::type_defs; using S = SWAR<4, u16>; -static_assert(SWAR<4, u16>::LeastSignificantBit == 0b0001'0001'0001'0001); - -template -constexpr auto labelNumBits(S input) { - using T = typename S::type; - auto mask = makeLaneMaskFromMSB(input).value(); - T result = 0; - for(auto i = 0; i < S::Lanes; ++i) { - result = result + T(mask & 1); - mask = mask >> 1; - } - return S{result}; -} -// HOLY SHIT THIS IS ALSO ASSOCIATIVE ITERATION! -using S = SWAR<4, uint16_t>; - -template -constexpr S shiftLeftAppendOne(S input, BS should) { - using T = typename S::type; - auto mask = makeLaneMaskFromMSB(should).value(); - auto inputMsbCleared = input.value() & ~S{S::MostSignificantBit}.value(); - T shifted = inputMsbCleared << 1; - constexpr auto ones = S::LeastSignificantBit; - T appendedOnes = shifted | ones; - T result = (appendedOnes & mask) | (input.value() & ~mask); - return S{result}; -} -static_assert(shiftLeftAppendOne( - S{0b0000'0001'0100'1000}, - S{0b0000'1000'1000'1000}).value() == - 0b0000'0011'1001'0001); - /** Transforms a number into a number into a binary tally. * E.g. 0b0011 (3) -> 0b0111 */ From 26a549467b5f215f6a91a5427b32350d00152905 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:47:00 -0700 Subject: [PATCH 11/26] update name --- inc/zoo/swar/associative_iteration.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 92d31604..e97123a1 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -500,19 +500,19 @@ using S = SWAR<4, u16>; /** Transforms a number into a number into a binary tally. * E.g. 0b0011 (3) -> 0b0111 */ template -constexpr auto base2TallyTransform(S input) { +constexpr auto base2TallyTransform_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } -static_assert(base2TallyTransform(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); -static_assert(base2TallyTransform(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); +static_assert(base2TallyTransform_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); +static_assert(base2TallyTransform_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); template constexpr auto rightShift_Plural(S input, S shifts) { using T = typename S::type; - auto minimumMask = ~base2TallyTransform(shifts); + auto minimumMask = ~base2TallyTransform_Plural(shifts); T inputMasked = input.value() & minimumMask.value(); T result = 0; for (int i = 0; i < S::Lanes; i++) { From ca23a823850ca6034bf2e20cb8a16f94e83bf67c Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:47:30 -0700 Subject: [PATCH 12/26] fix test --- test/swar/BasicOperations.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 87d1241f..6682f710 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -348,16 +348,6 @@ GE_MSB_ON_TEST( 0x0123'4567, 0x8888'8888) -TEST_CASE( - "right shift lanewise", - "[swar][jamie]" -) { - CHECK(rightShift_LaneWise<4>( - SWAR<4, uint16_t>{0b1000'1000'1000'1000}, - SWAR<4, uint16_t>{0b0001'0001'0001'0001} - ).value() == 0b0100'0100'0100'0100); -} - TEST_CASE( "greaterEqualMSBOn", "[swar][unsigned-swar]" From df27e450a50fff99e71ae2e180caf29b906c4022 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:49:27 -0700 Subject: [PATCH 13/26] for some reason complier is sad --- inc/zoo/swar/associative_iteration.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index e97123a1..44be9ac4 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -430,10 +430,12 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount( auto halver = [](auto counts) { auto msbCleared = counts & ~S{S::MostSignificantBit}; - return S{msbCleared.value() << 1}; + T res = msbCleared.value() << 1; + return S{res}; }; - multiplier = S{multiplier.value() << (NB - ActualBits)}; + T val = multiplier.value() << (NB - ActualBits); + multiplier = S{val}; return associativeOperatorIterated_regressive( multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation, ActualBits, halver From 54fdc0315cd723b5b29cbb796d3ff50218380767 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:50:50 -0700 Subject: [PATCH 14/26] rm stuff --- inc/zoo/swar/SWAR.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index f44edc60..7045c527 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -13,14 +13,10 @@ namespace zoo { namespace swar { -namespace type_defs { -using u128 = __uint128_t; using u64 = uint64_t; using u32 = uint32_t; using u16 = uint16_t; using u8 = uint8_t; -} -using namespace type_defs; template constexpr uint64_t popcount(uint64_t a) noexcept { From a5b9ccb8384bf45e805882c153a7e1bd8c7e3e10 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:51:08 -0700 Subject: [PATCH 15/26] rm --- inc/zoo/swar/SWAR.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 7045c527..72946992 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -4,7 +4,6 @@ #include "zoo/meta/log.h" -#include #include #ifdef _MSC_VER @@ -16,7 +15,7 @@ namespace zoo { namespace swar { using u64 = uint64_t; using u32 = uint32_t; using u16 = uint16_t; -using u8 = uint8_t; +using u8 = std::uint8_t; template constexpr uint64_t popcount(uint64_t a) noexcept { From 9cd7c83ea2811fbc9087039167000bad0c1da664 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:52:00 -0700 Subject: [PATCH 16/26] more clean --- inc/zoo/swar/associative_iteration.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 44be9ac4..a18f2b10 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -3,8 +3,6 @@ #include "zoo/meta/BitmaskMaker.h" #include "zoo/swar/SWAR.h" -#include -#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -496,7 +494,6 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( ); } -using namespace zoo::swar::type_defs; using S = SWAR<4, u16>; /** Transforms a number into a number into a binary tally. From 9e52623293834512b243a0bdf38f4151aca5404d Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:54:22 -0700 Subject: [PATCH 17/26] clean --- inc/zoo/swar/associative_iteration.h | 1 - test/swar/BasicOperations.cpp | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index a18f2b10..06366210 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -257,7 +257,6 @@ constexpr auto makeLaneMaskFromLSB(SWAR input) { return impl::makeLaneMaskFromMSB_and_LSB(lsbCopiedToMSB, lsb); } -// TODO WRITE TEST FOR THIS template constexpr auto makeLaneMaskFromMSB(SWAR input) { using S = SWAR; diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 6682f710..73521425 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -42,6 +42,11 @@ constexpr auto PrecisionFixtureTest = 0x89ABCDEF; constexpr auto Doubled = doublePrecision(SWAR<4, uint32_t>{PrecisionFixtureTest}); +static_assert(makeLaneMaskFromMSB(SWAR<4, uint16_t>{ + 0b1000'0000'1000'0000}).value() == + 0b1111'0000'1111'0000 +); + static_assert(0x090B0D0F == Doubled.even.value()); static_assert(0x080A0C0E == Doubled.odd.value()); static_assert(PrecisionFixtureTest == halvePrecision(Doubled.even, Doubled.odd).value()); From a43259dff5c0f4433d0ce60f15984b030139d128 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 21:35:21 -0700 Subject: [PATCH 18/26] ok fix lol --- inc/zoo/swar/associative_iteration.h | 38 +++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 06366210..890f3e11 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -265,7 +265,6 @@ constexpr auto makeLaneMaskFromMSB(SWAR input) { auto msbCopiedToLSB = S{val}; return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB); } -static_assert(makeLaneMaskFromMSB(SWAR<4, uint16_t>{0b1001'0000'0000'0000}).value() == 0b1111'0000'0000'0000); template struct ArithmeticResultTriplet { @@ -506,28 +505,53 @@ constexpr auto base2TallyTransform_Plural(S input) { } static_assert(base2TallyTransform_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); static_assert(base2TallyTransform_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); +static_assert(base2TallyTransform_Plural(S{0b0000'0000'0000'0001}).value() == 0b0000'0000'0000'0001); template constexpr auto rightShift_Plural(S input, S shifts) { using T = typename S::type; - auto minimumMask = ~base2TallyTransform_Plural(shifts); - T inputMasked = input.value() & minimumMask.value(); + auto minimumMask = ~base2TallyTransform_Plural(shifts); // 1111'1111'1111'1110 + auto inputMasked = input.value() & minimumMask.value(); // 0000'0000'1111'0000 + T result = 0; for (int i = 0; i < S::Lanes; i++) { - T currentNumShift = shifts.at(i); - auto shifted = inputMasked >> currentNumShift; auto laneMask = S::laneMask(i); - auto thisResult = shifted & laneMask; - result |= thisResult; + auto currentShiftAmount = shifts.at(i); + auto masked = inputMasked & laneMask; + auto shifted = masked >> currentShiftAmount; + result |= shifted; } return S{result}; } +static_assert(1 >> 0 == 1); + +static_assert(rightShift_Plural( + S{0b0000'0000'1111'0001}, + S{0b0000'0000'0000'0001} +).value() == 0b0000'0000'1111'0000); + static_assert(rightShift_Plural( S{0b0000'1000'1000'1000}, S{0b0100'0011'0010'0001} ).value() == 0b0000'0001'0010'0100); +static_assert(rightShift_Plural( + S{0b1111'1111'1111'1111}, + S{0b0001'0001'0001'0001} +).value() == 0b0111'0111'0111'0111); + +static_assert(rightShift_Plural( + S{0b0000'0000'1111'0001}, + S{0b0000'0000'0000'0000} +).value() == 0b0000'0000'1111'0001); + +static_assert(rightShift_Plural( + S{0b0000'0000'1111'0001}, + S{0b0000'0000'0001'0001} +).value() == 0b0000'0000'0111'0000); + + static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); From 1e98db42c761aba9b6de46714f512554285865a4 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 21:36:57 -0700 Subject: [PATCH 19/26] clean --- inc/zoo/swar/associative_iteration.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 890f3e11..2908e33b 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -510,8 +510,8 @@ static_assert(base2TallyTransform_Plural(S{0b0000'0000'0000'0001}).value() == 0b template constexpr auto rightShift_Plural(S input, S shifts) { using T = typename S::type; - auto minimumMask = ~base2TallyTransform_Plural(shifts); // 1111'1111'1111'1110 - auto inputMasked = input.value() & minimumMask.value(); // 0000'0000'1111'0000 + auto minimumMask = ~base2TallyTransform_Plural(shifts); + auto inputMasked = input.value() & minimumMask.value(); T result = 0; for (int i = 0; i < S::Lanes; i++) { From 123c37cc02e74e2dc27013b17b48b3067cdea1d9 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 21:39:03 -0700 Subject: [PATCH 20/26] more test --- inc/zoo/swar/associative_iteration.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 2908e33b..f9af7365 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -526,6 +526,11 @@ constexpr auto rightShift_Plural(S input, S shifts) { static_assert(1 >> 0 == 1); +static_assert(rightShift_Plural( + S{0b0111'0111'0111'0111}, + S{0b0010'0010'0010'0010} +).value() == 0b0001'0001'0001'0001); + static_assert(rightShift_Plural( S{0b0000'0000'1111'0001}, S{0b0000'0000'0000'0001} From 65d03a0d6fe7097100793cc886b99738bfcf4d57 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Tue, 4 Jun 2024 21:46:32 -0700 Subject: [PATCH 21/26] warning about tally width --- inc/zoo/swar/associative_iteration.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index f9af7365..33205bc1 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -3,6 +3,7 @@ #include "zoo/meta/BitmaskMaker.h" #include "zoo/swar/SWAR.h" +#include //#define ZOO_DEVELOPMENT_DEBUGGING #ifdef ZOO_DEVELOPMENT_DEBUGGING @@ -495,7 +496,9 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( using S = SWAR<4, u16>; /** Transforms a number into a number into a binary tally. - * E.g. 0b0011 (3) -> 0b0111 */ + * E.g. 0b0011 (3) -> 0b0111 + * It seems that trying to get the lane width as a tally is weird and overflowy. + * */ template constexpr auto base2TallyTransform_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; @@ -503,9 +506,11 @@ constexpr auto base2TallyTransform_Plural(S input) { typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } +static_assert(base2TallyTransform_Plural(S{0b0001'0010'0011'0011}).value() == 0b0001'0011'0111'0111); static_assert(base2TallyTransform_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); static_assert(base2TallyTransform_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); static_assert(base2TallyTransform_Plural(S{0b0000'0000'0000'0001}).value() == 0b0000'0000'0000'0001); +static_assert(base2TallyTransform_Plural(SWAR<8, uint16_t>{0b000000111'00000101}).value() == 0b01111111'00011111); // 7 -> 5 template constexpr auto rightShift_Plural(S input, S shifts) { From 91ffbaf65fabc5336bbeb4f2120a9b83bd8fbe04 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:26:09 -0700 Subject: [PATCH 22/26] rename --- inc/zoo/swar/SWAR.h | 3 --- inc/zoo/swar/associative_iteration.h | 27 ++++++++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h index 72946992..eecbcec3 100644 --- a/inc/zoo/swar/SWAR.h +++ b/inc/zoo/swar/SWAR.h @@ -75,7 +75,6 @@ struct SWAR { SignificantBitsCount = BitWidth - PaddingBitsCount, AllOnes = ~std::make_unsigned_t{0} >> PaddingBitsCount, // Also constructed in RobinHood utils: possible bug? LeastSignificantBit = meta::BitmaskMaker{1}, NBits>::value, - AllOnesInFirstLane = AllOnes >> (NBits * (Lanes - 1)), MostSignificantBit = LeastSignificantBit << (NBits - 1), LeastSignificantLaneMask = []() { if constexpr (NBits < sizeof(T) * 8) { @@ -492,6 +491,4 @@ static_assert( 0x0706050403020100ull ); -static_assert(SWAR<4, uint16_t>::AllOnesInFirstLane == 0b0000'0000'0000'1111); - }} diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 33205bc1..f0bb69b1 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -495,27 +495,40 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( using S = SWAR<4, u16>; +template +constexpr auto binaryToUnaryAtMSB(T binary) { + using UT = std::make_unsigned_t; + constexpr auto + AllOnes = ~UT(0), + ZeroAtMSB_OnesRest = AllOnes >> UT(1), + MSB = ~ZeroAtMSB_OnesRest; + auto + MSB_Shifted = MSB >> binary, + unaryNearMSB = MSB - MSB_Shifted; + return unaryNearMSB << 1; +} + /** Transforms a number into a number into a binary tally. * E.g. 0b0011 (3) -> 0b0111 * It seems that trying to get the lane width as a tally is weird and overflowy. * */ template -constexpr auto base2TallyTransform_Plural(S input) { +constexpr auto binaryToUnary_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } -static_assert(base2TallyTransform_Plural(S{0b0001'0010'0011'0011}).value() == 0b0001'0011'0111'0111); -static_assert(base2TallyTransform_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); -static_assert(base2TallyTransform_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); -static_assert(base2TallyTransform_Plural(S{0b0000'0000'0000'0001}).value() == 0b0000'0000'0000'0001); -static_assert(base2TallyTransform_Plural(SWAR<8, uint16_t>{0b000000111'00000101}).value() == 0b01111111'00011111); // 7 -> 5 +static_assert(binaryToUnary_Plural(S{0b0001'0010'0011'0011}).value() == 0b0001'0011'0111'0111); +static_assert(binaryToUnary_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); +static_assert(binaryToUnary_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); +static_assert(binaryToUnary_Plural(S{0b0000'0000'0000'0001}).value() == 0b0000'0000'0000'0001); +static_assert(binaryToUnary_Plural(SWAR<8, uint16_t>{0b000000111'00000101}).value() == 0b01111111'00011111); // 7 -> 5 template constexpr auto rightShift_Plural(S input, S shifts) { using T = typename S::type; - auto minimumMask = ~base2TallyTransform_Plural(shifts); + auto minimumMask = ~binaryToUnary_Plural(shifts); auto inputMasked = input.value() & minimumMask.value(); T result = 0; From 49dc06910c2b17e433385f6d4a603155c326d5ab Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:54:37 -0700 Subject: [PATCH 23/26] make things prettier --- inc/zoo/swar/associative_iteration.h | 61 ++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index f0bb69b1..dd52b60b 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -508,30 +508,54 @@ constexpr auto binaryToUnaryAtMSB(T binary) { return unaryNearMSB << 1; } -/** Transforms a number into a number into a binary tally. - * E.g. 0b0011 (3) -> 0b0111 - * It seems that trying to get the lane width as a tally is weird and overflowy. - * */ +/** Transforms a binary number into it's unary representation (in binary). + * E.g. 0b0011 (3) -> 0b0111 + * It seems that getting the lane width exactly is overflowy */ template constexpr auto binaryToUnary_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; + constexpr auto max_size = S::LeastSignificantLaneMask; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } -static_assert(binaryToUnary_Plural(S{0b0001'0010'0011'0011}).value() == 0b0001'0011'0111'0111); -static_assert(binaryToUnary_Plural(S{0b0000'0001'0010'0011}).value() == 0b0000'0001'0011'0111); -static_assert(binaryToUnary_Plural(S{0b0100'0001'0010'0011}).value() == 0b1111'0001'0011'0111); -static_assert(binaryToUnary_Plural(S{0b0000'0000'0000'0001}).value() == 0b0000'0000'0000'0001); -static_assert(binaryToUnary_Plural(SWAR<8, uint16_t>{0b000000111'00000101}).value() == 0b01111111'00011111); // 7 -> 5 + +template +constexpr static bool binaryToUnary_Plural_Test() { + return binaryToUnary_Plural(SWAR{Input}).value() == Expected; +}; + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0001'0010'0011'0011, + 0b0001'0011'0111'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0001'0010'0011, + 0b0000'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0100'0001'0010'0011, + 0b1111'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0000'0000'0001, + 0b0000'0000'0000'0001 +>()); + +static_assert(binaryToUnary_Plural_Test<8, uint16_t, + 0b000000111'00000101, // 7 ' 5 + 0b001111111'00011111 // seven ones, fives ones! +>()); template constexpr auto rightShift_Plural(S input, S shifts) { - using T = typename S::type; auto minimumMask = ~binaryToUnary_Plural(shifts); auto inputMasked = input.value() & minimumMask.value(); - T result = 0; + typename S::type result = 0; for (int i = 0; i < S::Lanes; i++) { auto laneMask = S::laneMask(i); auto currentShiftAmount = shifts.at(i); @@ -542,12 +566,17 @@ constexpr auto rightShift_Plural(S input, S shifts) { return S{result}; } -static_assert(1 >> 0 == 1); +template +constexpr static bool rightShift_Plural_Test() { + using S = SWAR; + return rightShift_Plural(S{Input}, S{Count}).value() == Expected; +}; -static_assert(rightShift_Plural( - S{0b0111'0111'0111'0111}, - S{0b0010'0010'0010'0010} -).value() == 0b0001'0001'0001'0001); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0111'0111'0111'0111, // input + 0b0010'0010'0010'0010, // 2 ' 2 ' 2 ' 2 + 0b0001'0001'0001'0001 // notice, input, shifted over two to right! +>()); static_assert(rightShift_Plural( S{0b0000'0000'1111'0001}, From 98565d444a31ddd071b783bd63ff25e38e2abe00 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:58:18 -0700 Subject: [PATCH 24/26] prettier --- inc/zoo/swar/associative_iteration.h | 47 +++++++++++++++------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index dd52b60b..feeb0fde 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -515,7 +515,6 @@ template constexpr auto binaryToUnary_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; - constexpr auto max_size = S::LeastSignificantLaneMask; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } @@ -578,31 +577,35 @@ static_assert(rightShift_Plural_Test<4, uint16_t, 0b0001'0001'0001'0001 // notice, input, shifted over two to right! >()); -static_assert(rightShift_Plural( - S{0b0000'0000'1111'0001}, - S{0b0000'0000'0000'0001} -).value() == 0b0000'0000'1111'0000); - -static_assert(rightShift_Plural( - S{0b0000'1000'1000'1000}, - S{0b0100'0011'0010'0001} -).value() == 0b0000'0001'0010'0100); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0001, + 0b0000'0000'1111'0000 +>()); -static_assert(rightShift_Plural( - S{0b1111'1111'1111'1111}, - S{0b0001'0001'0001'0001} -).value() == 0b0111'0111'0111'0111); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'1000'1000'1000, + 0b0100'0011'0010'0001, + 0b0000'0001'0010'0100 +>()); -static_assert(rightShift_Plural( - S{0b0000'0000'1111'0001}, - S{0b0000'0000'0000'0000} -).value() == 0b0000'0000'1111'0001); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b1111'1111'1111'1111, + 0b0001'0001'0001'0001, + 0b0111'0111'0111'0111 +>()); -static_assert(rightShift_Plural( - S{0b0000'0000'1111'0001}, - S{0b0000'0000'0001'0001} -).value() == 0b0000'0000'0111'0000); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0000, + 0b0000'0000'1111'0001 +>()); +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0001'0001, + 0b0000'0000'0111'0000 +>()); static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); From 0fa63bd8adca3672e3413ca3a868a978f56e4e16 Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 5 Jun 2024 15:02:56 -0700 Subject: [PATCH 25/26] rm eduardo code for now --- inc/zoo/swar/associative_iteration.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index feeb0fde..0aa80079 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -494,20 +494,6 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( } using S = SWAR<4, u16>; - -template -constexpr auto binaryToUnaryAtMSB(T binary) { - using UT = std::make_unsigned_t; - constexpr auto - AllOnes = ~UT(0), - ZeroAtMSB_OnesRest = AllOnes >> UT(1), - MSB = ~ZeroAtMSB_OnesRest; - auto - MSB_Shifted = MSB >> binary, - unaryNearMSB = MSB - MSB_Shifted; - return unaryNearMSB << 1; -} - /** Transforms a binary number into it's unary representation (in binary). * E.g. 0b0011 (3) -> 0b0111 * It seems that getting the lane width exactly is overflowy */ @@ -515,6 +501,7 @@ template constexpr auto binaryToUnary_Plural(S input) { constexpr auto two = S{meta::BitmaskMaker::value}; constexpr auto one = S::LeastSignificantBit; + constexpr auto max_size = S::LeastSignificantLaneMask; typename S::type v = exponentiation_OverflowUnsafe_SpecificBitCount(two, input).value() - one; return S{v}; } From 0207738bdd60bd63ce16795efd39dca3ab68cd0d Mon Sep 17 00:00:00 2001 From: Jamie Pond <73431532+jamierpond@users.noreply.github.com> Date: Wed, 5 Jun 2024 15:05:06 -0700 Subject: [PATCH 26/26] move tests --- inc/zoo/swar/associative_iteration.h | 80 --------------------------- test/swar/BasicOperations.cpp | 82 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 80 deletions(-) diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h index 0aa80079..137028eb 100644 --- a/inc/zoo/swar/associative_iteration.h +++ b/inc/zoo/swar/associative_iteration.h @@ -493,7 +493,6 @@ constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount( ); } -using S = SWAR<4, u16>; /** Transforms a binary number into it's unary representation (in binary). * E.g. 0b0011 (3) -> 0b0111 * It seems that getting the lane width exactly is overflowy */ @@ -506,36 +505,6 @@ constexpr auto binaryToUnary_Plural(S input) { return S{v}; } -template -constexpr static bool binaryToUnary_Plural_Test() { - return binaryToUnary_Plural(SWAR{Input}).value() == Expected; -}; - -static_assert(binaryToUnary_Plural_Test<4, uint16_t, - 0b0001'0010'0011'0011, - 0b0001'0011'0111'0111 ->()); - -static_assert(binaryToUnary_Plural_Test<4, uint16_t, - 0b0000'0001'0010'0011, - 0b0000'0001'0011'0111 ->()); - -static_assert(binaryToUnary_Plural_Test<4, uint16_t, - 0b0100'0001'0010'0011, - 0b1111'0001'0011'0111 ->()); - -static_assert(binaryToUnary_Plural_Test<4, uint16_t, - 0b0000'0000'0000'0001, - 0b0000'0000'0000'0001 ->()); - -static_assert(binaryToUnary_Plural_Test<8, uint16_t, - 0b000000111'00000101, // 7 ' 5 - 0b001111111'00011111 // seven ones, fives ones! ->()); - template constexpr auto rightShift_Plural(S input, S shifts) { auto minimumMask = ~binaryToUnary_Plural(shifts); @@ -552,55 +521,6 @@ constexpr auto rightShift_Plural(S input, S shifts) { return S{result}; } -template -constexpr static bool rightShift_Plural_Test() { - using S = SWAR; - return rightShift_Plural(S{Input}, S{Count}).value() == Expected; -}; - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b0111'0111'0111'0111, // input - 0b0010'0010'0010'0010, // 2 ' 2 ' 2 ' 2 - 0b0001'0001'0001'0001 // notice, input, shifted over two to right! ->()); - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b0000'0000'1111'0001, - 0b0000'0000'0000'0001, - 0b0000'0000'1111'0000 ->()); - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b0000'1000'1000'1000, - 0b0100'0011'0010'0001, - 0b0000'0001'0010'0100 ->()); - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b1111'1111'1111'1111, - 0b0001'0001'0001'0001, - 0b0111'0111'0111'0111 ->()); - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b0000'0000'1111'0001, - 0b0000'0000'0000'0000, - 0b0000'0000'1111'0001 ->()); - -static_assert(rightShift_Plural_Test<4, uint16_t, - 0b0000'0000'1111'0001, - 0b0000'0000'0001'0001, - 0b0000'0000'0111'0000 ->()); - -static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); -static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); -static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); -static_assert(S::laneMask(2) == 0b0000'1111'0000'0000); -static_assert(S::laneMask(3) == 0b1111'0000'0000'0000); -static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111); - template constexpr auto multiplication_OverflowUnsafe( SWAR multiplicand, diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp index 73521425..d61a3598 100644 --- a/test/swar/BasicOperations.cpp +++ b/test/swar/BasicOperations.cpp @@ -460,3 +460,85 @@ TEST_CASE( CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); } + +template +constexpr static bool binaryToUnary_Plural_Test() { + return binaryToUnary_Plural(SWAR{Input}).value() == Expected; +}; + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0001'0010'0011'0011, + 0b0001'0011'0111'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0001'0010'0011, + 0b0000'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0100'0001'0010'0011, + 0b1111'0001'0011'0111 +>()); + +static_assert(binaryToUnary_Plural_Test<4, uint16_t, + 0b0000'0000'0000'0001, + 0b0000'0000'0000'0001 +>()); + +static_assert(binaryToUnary_Plural_Test<8, uint16_t, + 0b000000111'00000101, // 7 ' 5 + 0b001111111'00011111 // seven ones, fives ones! +>()); + +template +constexpr static bool rightShift_Plural_Test() { + using S = SWAR; + return rightShift_Plural(S{Input}, S{Count}).value() == Expected; +}; + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0111'0111'0111'0111, // input + 0b0010'0010'0010'0010, // 2 ' 2 ' 2 ' 2 + 0b0001'0001'0001'0001 // notice, input, shifted over two to right! +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0001, + 0b0000'0000'1111'0000 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'1000'1000'1000, + 0b0100'0011'0010'0001, + 0b0000'0001'0010'0100 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b1111'1111'1111'1111, + 0b0001'0001'0001'0001, + 0b0111'0111'0111'0111 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0000'0000, + 0b0000'0000'1111'0001 +>()); + +static_assert(rightShift_Plural_Test<4, uint16_t, + 0b0000'0000'1111'0001, + 0b0000'0000'0001'0001, + 0b0000'0000'0111'0000 +>()); + +using S = SWAR<4, uint16_t>; +static_assert(S::LeastSignificantLaneMask == 0b0000'0000'0000'1111); +static_assert(S::laneMask(0) == 0b0000'0000'0000'1111); +static_assert(S::laneMask(1) == 0b0000'0000'1111'0000); +static_assert(S::laneMask(2) == 0b0000'1111'0000'0000); +static_assert(S::laneMask(3) == 0b1111'0000'0000'0000); +static_assert(S{S::laneMask(3)}.at(3) == 0b0000'0000'0000'1111); + +