@@ -144,7 +144,7 @@ import Control.DeepSeq (NFData (..), NFData1 (..), NFData2 (..))
144144import Control.Monad.ST (ST , runST )
145145import Data.Bifoldable (Bifoldable (.. ))
146146import Data.Bits (complement , popCount , unsafeShiftL ,
147- unsafeShiftR , (.&.) , (.|.) )
147+ unsafeShiftR , (.&.) , (.|.) , countTrailingZeros )
148148import Data.Coerce (coerce )
149149import Data.Data (Constr , Data (.. ), DataType )
150150import Data.Functor.Classes (Eq1 (.. ), Eq2 (.. ), Ord1 (.. ), Ord2 (.. ),
@@ -1622,26 +1622,27 @@ unionArrayBy :: (a -> a -> a) -> Bitmap -> Bitmap -> A.Array a -> A.Array a
16221622-- Core size reductions with GHC 9.2.2. See the Core diffs in
16231623-- https://github.com/haskell-unordered-containers/unordered-containers/pull/376.
16241624unionArrayBy f ! b1 ! b2 ! ary1 ! ary2 = A. run $ do
1625- let b' = b1 .|. b2
1626- mary <- A. new_ (popCount b' )
1625+ let bCombined = b1 .|. b2
1626+ mary <- A. new_ (popCount bCombined )
16271627 -- iterate over nonzero bits of b1 .|. b2
1628- -- it would be nice if we could shift m by more than 1 each time
1629- let ba = b1 .&. b2
1630- go ! i ! i1 ! i2 ! m
1631- | m > b' = return ()
1632- | b' .&. m == 0 = go i i1 i2 (m `unsafeShiftL` 1 )
1633- | ba .&. m /= 0 = do
1628+ let go ! i ! i1 ! i2 ! b
1629+ | b == 0 = return ()
1630+ | testBit (b1 .&. b2) = do
16341631 x1 <- A. indexM ary1 i1
16351632 x2 <- A. indexM ary2 i2
16361633 A. write mary i $! f x1 x2
1637- go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) (m `unsafeShiftL` 1 )
1638- | b1 .&. m /= 0 = do
1634+ go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) b'
1635+ | testBit b1 = do
16391636 A. write mary i =<< A. indexM ary1 i1
1640- go (i+ 1 ) (i1+ 1 ) i2 (m `unsafeShiftL` 1 )
1641- | otherwise = do
1637+ go (i+ 1 ) (i1+ 1 ) i2 b'
1638+ | otherwise = do
16421639 A. write mary i =<< A. indexM ary2 i2
1643- go (i+ 1 ) i1 (i2+ 1 ) (m `unsafeShiftL` 1 )
1644- go 0 0 0 (b' .&. negate b') -- XXX: b' must be non-zero
1640+ go (i+ 1 ) i1 (i2+ 1 ) b'
1641+ where
1642+ m = 1 `unsafeShiftL` (countTrailingZeros b)
1643+ testBit x = x .&. m /= 0
1644+ b' = b .&. complement m
1645+ go 0 0 0 bCombined
16451646 return mary
16461647 -- TODO: For the case where b1 .&. b2 == b1, i.e. when one is a
16471648 -- subset of the other, we could use a slightly simpler algorithm,
0 commit comments