@@ -311,8 +311,20 @@ fromListConstr = Data.mkConstr hashMapDataType "fromList" [] Data.Prefix
311311hashMapDataType :: DataType
312312hashMapDataType = Data. mkDataType " Data.HashMap.Internal.HashMap" [fromListConstr]
313313
314+ -- | This type is used to store the hash of a key, as produced with 'hash'.
314315type Hash = Word
316+
317+ -- | A bitmap as contained by a 'BitmapIndexed' node, or a 'fullNodeMask'
318+ -- corresponding to a 'Full' node.
319+ --
320+ -- Only the lower 'maxChildren' bits are used. The remaining bits must be zeros.
315321type Bitmap = Word
322+
323+ -- | 'Shift' values correspond to the level of the tree that we're currently
324+ -- operating at. At the root level the 'Shift' is @0@. For the subsequent
325+ -- levels the 'Shift' values are 'bitsPerSubkey', @2*'bitsPerSubkey'@ etc.
326+ --
327+ -- Valid values are non-negative and less than @bitSize (0 :: Word)@.
316328type Shift = Int
317329
318330instance Show2 HashMap where
@@ -2358,36 +2370,72 @@ clone ary =
23582370------------------------------------------------------------------------
23592371-- Bit twiddling
23602372
2373+ -- TODO: Name this 'bitsPerLevel'?! What is a "subkey"?
2374+ -- https://github.com/haskell-unordered-containers/unordered-containers/issues/425
2375+
2376+ -- | Number of bits that are inspected at each level of the hash tree.
2377+ --
2378+ -- This constant is named /t/ in the original /Ideal Hash Trees/ paper.
23612379bitsPerSubkey :: Int
23622380bitsPerSubkey = 5
23632381
2382+ -- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@.
23642383maxChildren :: Int
23652384maxChildren = 1 `unsafeShiftL` bitsPerSubkey
23662385
2367- subkeyMask :: Bitmap
2386+ -- | Bit mask with the lowest 'bitsPerSubkey' bits set, i.e. @0b11111@.
2387+ subkeyMask :: Word
23682388subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1
23692389
2370- sparseIndex :: Bitmap -> Bitmap -> Int
2371- sparseIndex b m = popCount (b .&. (m - 1 ))
2372- {-# INLINE sparseIndex #-}
2390+ -- | Given a 'Hash' and a 'Shift' that indicates the level in the tree, compute
2391+ -- the index into a 'Full' node or into the bitmap of a `BitmapIndexed` node.
2392+ --
2393+ -- >>> index 0b0010_0010 0
2394+ -- 0b0000_0010
2395+ index :: Hash -> Shift -> Int
2396+ index w s = fromIntegral $ unsafeShiftR w s .&. subkeyMask
2397+ {-# INLINE index #-}
23732398
2374- mask :: Word -> Shift -> Bitmap
2399+ -- | Given a 'Hash' and a 'Shift' that indicates the level in the tree, compute
2400+ -- the bitmap that contains only the 'index' of the hash at this level.
2401+ --
2402+ -- The result can be used for constructing one-element 'BitmapIndexed' nodes or
2403+ -- to check whether a 'BitmapIndexed' node may possibly contain the given 'Hash'.
2404+ --
2405+ -- >>> mask 0b0010_0010 0
2406+ -- 0b0100
2407+ mask :: Hash -> Shift -> Bitmap
23752408mask w s = 1 `unsafeShiftL` index w s
23762409{-# INLINE mask #-}
23772410
2378- -- | Mask out the 'bitsPerSubkey' bits used for indexing at this level
2379- -- of the tree.
2380- index :: Hash -> Shift -> Int
2381- index w s = fromIntegral $ unsafeShiftR w s .&. subkeyMask
2382- {-# INLINE index #-}
2411+ -- | This array index is computed by counting the number of bits below the
2412+ -- 'index' represented by the mask.
2413+ --
2414+ -- >>> sparseIndex 0b0110_0110 0b0010_0000
2415+ -- 2
2416+ sparseIndex
2417+ :: Bitmap
2418+ -- ^ Bitmap of a 'BitmapIndexed' node
2419+ -> Bitmap
2420+ -- ^ One-bit 'mask' corresponding to the 'index' of a hash
2421+ -> Int
2422+ -- ^ Index into the array of the 'BitmapIndexed' node
2423+ sparseIndex b m = popCount (b .&. (m - 1 ))
2424+ {-# INLINE sparseIndex #-}
2425+
2426+ -- TODO: Should be named _(bit)map_ instead of _mask_
23832427
2384- -- | A bitmask with the 'bitsPerSubkey' least significant bits set.
2428+ -- | A bitmap with the 'maxChildren' least significant bits set, i.e.
2429+ -- @0xFF_FF_FF_FF@.
23852430fullNodeMask :: Bitmap
23862431-- This needs to use 'shiftL' instead of 'unsafeShiftL', to avoid UB.
23872432-- See issue #412.
23882433fullNodeMask = complement (complement 0 `shiftL` maxChildren)
23892434{-# INLINE fullNodeMask #-}
23902435
2436+ ------------------------------------------------------------------------
2437+ -- Pointer equality
2438+
23912439-- | Check if two the two arguments are the same value. N.B. This
23922440-- function might give false negatives (due to GC moving objects.)
23932441ptrEq :: a -> a -> Bool
0 commit comments