diff --git a/.github/workflows/cabal.yaml b/.github/workflows/cabal.yaml index 3bd40a2..0854cde 100644 --- a/.github/workflows/cabal.yaml +++ b/.github/workflows/cabal.yaml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - ghc: # should mirror current stable releases: https://www.haskell.org/ghc/download.html + ghc: - '9.8' - '9.10' os: [ubuntu-latest, macOS-latest, windows-latest] @@ -19,7 +19,7 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: haskell/actions/setup@v2 # https://github.com/haskell/actions/tree/main/setup#haskellactionssetup + - uses: haskell/actions/setup@v2 with: ghc-version: ${{ matrix.ghc }} diff --git a/.github/workflows/nix.yaml b/.github/workflows/nix.yaml index ba6acc1..3304d87 100644 --- a/.github/workflows/nix.yaml +++ b/.github/workflows/nix.yaml @@ -19,13 +19,9 @@ jobs: extra_nix_config: | access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} - # follow guide: https://nix.dev/tutorials/continuous-integration-github-actions - # this uses the tokens which are revokable - uses: cachix/cachix-action@v15 with: name: jappie - # If you chose API tokens for write access OR if you have a private cache - # authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' signingKey: '${{ secrets.CACHIX_SIGNING_KEY }}' - - run: nix-build - - run: nix-shell --run "echo OK" + - run: nix-build nix/ci.nix + - run: nix-shell --run "cabal test" diff --git a/CHANGELOG.md b/CHANGELOG.md index 17b7614..382cae7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 0.21.1 ++ Reverted 0.21.0 changes: restored custom Base16/Base32/Base64 encode/decode, + the GHC.Prim-based `Bytes` implementation, and `readWord8OffAddr#`-based FNV + hashing. The `base16`, `base32`, `base64`, and `text` dependencies are removed + again. ++ Deleted `Data.Memory.Internal.CompatPrim64` (unreferenced). ++ Added GitHub Actions CI (nix + cabal matrix). + ## 0.21.0 + `Data.ByteArray.Encoding`: replaced custom Base16/Base32/Base64 encode/decode diff --git a/Data/ByteArray.hs b/Data/ByteArray.hs index ea0f414..32455c6 100644 --- a/Data/ByteArray.hs +++ b/Data/ByteArray.hs @@ -9,6 +9,9 @@ -- -- This module should be imported qualified. -- +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE NoImplicitPrelude #-} module Data.ByteArray ( diff --git a/Data/ByteArray/Bytes.hs b/Data/ByteArray/Bytes.hs index 80c8abf..d0b29a3 100644 --- a/Data/ByteArray/Bytes.hs +++ b/Data/ByteArray/Bytes.hs @@ -7,50 +7,191 @@ -- -- Simple and efficient byte array types -- +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE DeriveDataTypeable #-} module Data.ByteArray.Bytes ( Bytes ) where -import Control.DeepSeq (NFData(..)) -import Data.ByteArray.Types -import Data.Char (chr) -import Data.Foldable (toList) +import Control.DeepSeq(NFData(..)) +import GHC.Exts (unsafeCoerce#) +import GHC.Word +import GHC.Char (chr) +import GHC.Types +import GHC.Prim +import GHC.Ptr import Data.Semigroup +import Data.Foldable (toList) +import Data.Memory.PtrMethods +import Data.Memory.Internal.CompatPrim +import Data.Memory.Internal.Compat (unsafeDoIO) +import Data.ByteArray.Types import Data.Typeable -import qualified Data.ByteString as BS -import qualified Data.ByteString.Internal as BSI -import Foreign.ForeignPtr (withForeignPtr) -import Foreign.Ptr (castPtr) + -- | Simplest Byte Array -newtype Bytes = Bytes BS.ByteString +data Bytes = Bytes (MutableByteArray# RealWorld) deriving (Typeable) instance NFData Bytes where rnf b = b `seq` () instance Show Bytes where - showsPrec p (Bytes b) = showsPrec p (map (chr . fromIntegral) (BS.unpack b)) - + showsPrec p b r = showsPrec p (bytesUnpackChars b []) r instance Eq Bytes where - Bytes a == Bytes b = a == b - + (==) = bytesEq instance Ord Bytes where - compare (Bytes a) (Bytes b) = compare a b - + compare = bytesCompare instance Semigroup Bytes where - Bytes a <> Bytes b = Bytes (a <> b) - sconcat bs = Bytes $ BS.concat [b | Bytes b <- toList bs] - + b1 <> b2 = unsafeDoIO $ bytesAppend b1 b2 + sconcat = unsafeDoIO . bytesConcat . toList instance Monoid Bytes where - mempty = Bytes BS.empty + mempty = unsafeDoIO (newBytes 0) instance ByteArrayAccess Bytes where - length (Bytes b) = BS.length b - withByteArray (Bytes b) = withByteArray b - + length = bytesLength + withByteArray = withBytes instance ByteArray Bytes where - allocRet n f = do - fptr <- BSI.mallocByteString n - r <- withForeignPtr fptr (f . castPtr) - return (r, Bytes (BSI.PS fptr 0 n)) + allocRet = bytesAllocRet + +------------------------------------------------------------------------ +newBytes :: Int -> IO Bytes +newBytes (I# sz) + | booleanPrim (sz <# 0#) = error "Bytes: size must be >= 0" + | otherwise = IO $ \s -> + case newAlignedPinnedByteArray# sz 8# s of + (# s', mbarr #) -> (# s', Bytes mbarr #) + +touchBytes :: Bytes -> IO () +touchBytes (Bytes mba) = IO $ \s -> case touch# mba s of s' -> (# s', () #) +{-# INLINE touchBytes #-} + +sizeofBytes :: Bytes -> Int +sizeofBytes (Bytes mba) = I# (sizeofMutableByteArray# mba) +{-# INLINE sizeofBytes #-} + +withPtr :: Bytes -> (Ptr p -> IO a) -> IO a +withPtr b@(Bytes mba) f = do + a <- f (Ptr (byteArrayContents# (unsafeCoerce# mba))) + touchBytes b + return a +------------------------------------------------------------------------ + +bytesAlloc :: Int -> (Ptr p -> IO ()) -> IO Bytes +bytesAlloc sz f = do + ba <- newBytes sz + withPtr ba f + return ba + +bytesConcat :: [Bytes] -> IO Bytes +bytesConcat l = bytesAlloc retLen (copy l) + where + !retLen = sum $ map bytesLength l + + copy [] _ = return () + copy (x:xs) dst = do + withPtr x $ \src -> memCopy dst src chunkLen + copy xs (dst `plusPtr` chunkLen) + where + !chunkLen = bytesLength x + +bytesAppend :: Bytes -> Bytes -> IO Bytes +bytesAppend b1 b2 = bytesAlloc retLen $ \dst -> do + withPtr b1 $ \s1 -> memCopy dst s1 len1 + withPtr b2 $ \s2 -> memCopy (dst `plusPtr` len1) s2 len2 + where + !len1 = bytesLength b1 + !len2 = bytesLength b2 + !retLen = len1 + len2 + +bytesAllocRet :: Int -> (Ptr p -> IO a) -> IO (a, Bytes) +bytesAllocRet sz f = do + ba <- newBytes sz + r <- withPtr ba f + return (r, ba) + +bytesLength :: Bytes -> Int +bytesLength = sizeofBytes +{-# LANGUAGE bytesLength #-} + +withBytes :: Bytes -> (Ptr p -> IO a) -> IO a +withBytes = withPtr + +bytesEq :: Bytes -> Bytes -> Bool +bytesEq b1@(Bytes m1) b2@(Bytes m2) + | l1 /= l2 = False + | otherwise = unsafeDoIO $ IO $ \s -> loop 0# s + where + !l1@(I# len) = bytesLength b1 + !l2 = bytesLength b2 + + loop i s + | booleanPrim (i ==# len) = (# s, True #) + | otherwise = + case readWord8Array# m1 i s of + (# s', e1 #) -> case readWord8Array# m2 i s' of + (# s'', e2 #) -> + if (W8# e1) == (W8# e2) + then loop (i +# 1#) s'' + else (# s'', False #) + {-# INLINE loop #-} + +bytesCompare :: Bytes -> Bytes -> Ordering +bytesCompare b1@(Bytes m1) b2@(Bytes m2) = unsafeDoIO $ loop 0 + where + !l1 = bytesLength b1 + !l2 = bytesLength b2 + !len = min l1 l2 + + loop !i + | i == len = + if l1 == l2 + then pure EQ + else if l1 > l2 then pure GT + else pure LT + | otherwise = do + e1 <- read8 m1 i + e2 <- read8 m2 i + if e1 == e2 + then loop (i+1) + else if e1 < e2 then pure LT + else pure GT + + read8 m (I# i) = IO $ \s -> case readWord8Array# m i s of + (# s2, e #) -> (# s2, W8# e #) + +bytesUnpackChars :: Bytes -> String -> String +bytesUnpackChars (Bytes mba) xs = chunkLoop 0# + where + !len = sizeofMutableByteArray# mba + -- chunk 64 bytes at a time + chunkLoop :: Int# -> [Char] + chunkLoop idx + | booleanPrim (len ==# idx) = [] + | booleanPrim ((len -# idx) ># 63#) = + bytesLoop idx 64# (chunkLoop (idx +# 64#)) + | otherwise = + bytesLoop idx (len -# idx) xs + + bytesLoop idx chunkLenM1 paramAcc = unsafeDoIO $ + loop (idx +# chunkLenM1 -# 1#) paramAcc + where loop i acc + | booleanPrim (i ==# idx) = do + c <- rChar i + return (c : acc) + | otherwise = do + c <- rChar i + loop (i -# 1#) (c : acc) + + rChar :: Int# -> IO Char + rChar idx = IO $ \s -> + case readWord8Array# mba idx s of + (# s2, w #) -> (# s2, chr (fromIntegral (W8# w)) #) + +{- +bytesShowHex :: Bytes -> String +bytesShowHex b = showHexadecimal (withPtr b) (bytesLength b) +{-# NOINLINE bytesShowHex #-} +-} diff --git a/Data/ByteArray/Encoding.hs b/Data/ByteArray/Encoding.hs index 01ddd49..3210fb9 100644 --- a/Data/ByteArray/Encoding.hs +++ b/Data/ByteArray/Encoding.hs @@ -16,17 +16,10 @@ module Data.ByteArray.Encoding import Data.ByteArray.Types import qualified Data.ByteArray.Types as B import qualified Data.ByteArray.Methods as B -import qualified Data.ByteString as BS -import qualified Data.ByteString.Base16 as B16 -import qualified Data.ByteString.Base32 as B32 -import qualified Data.ByteString.Base64 as B64 -import qualified Data.ByteString.Base64.URL as B64URL -import Data.Base16.Types (extractBase16) -import Data.Base64.Types (extractBase64) -import qualified Data.Text as T -import Data.Memory.Encoding.Base64 (toBase64OpenBSD, fromBase64OpenBSD, - unBase64LengthUnpadded) -import Data.Memory.Internal.Compat (unsafeDoIO) +import Data.Memory.Internal.Compat +import Data.Memory.Encoding.Base16 +import Data.Memory.Encoding.Base32 +import Data.Memory.Encoding.Base64 -- $setup -- >>> :set -XOverloadedStrings @@ -81,20 +74,27 @@ data Base = Base16 -- ^ similar to hexadecimal -- "Zm9vYmFy" -- convertToBase :: (ByteArrayAccess bin, ByteArray bout) => Base -> bin -> bout -convertToBase base b = - let bs = B.convert b :: BS.ByteString - in case base of - Base16 -> B.convert $ extractBase16 (B16.encodeBase16' bs) - Base32 -> B.convert $ B32.encodeBase32' bs - Base64 -> B.convert $ extractBase64 (B64.encodeBase64' bs) - Base64URLUnpadded -> B.convert $ extractBase64 (B64URL.encodeBase64Unpadded' bs) - Base64OpenBSD -> - let binLength = B.length b - (q, r) = binLength `divMod` 3 - outLen = 4 * q + (if r == 0 then 0 else r + 1) - in B.unsafeCreate outLen $ \bout -> - withByteArray b $ \bin -> - toBase64OpenBSD bout bin binLength +convertToBase base b = case base of + Base16 -> doConvert (binLength * 2) toHexadecimal + Base32 -> let (q,r) = binLength `divMod` 5 + outLen = 8 * (if r == 0 then q else q + 1) + in doConvert outLen toBase32 + Base64 -> doConvert base64Length toBase64 + -- Base64URL -> doConvert base64Length (toBase64URL True) + Base64URLUnpadded -> doConvert base64UnpaddedLength (toBase64URL False) + Base64OpenBSD -> doConvert base64UnpaddedLength toBase64OpenBSD + where + binLength = B.length b + + base64Length = let (q,r) = binLength `divMod` 3 + in 4 * (if r == 0 then q else q+1) + + base64UnpaddedLength = let (q,r) = binLength `divMod` 3 + in 4 * q + (if r == 0 then 0 else r+1) + doConvert l f = + B.unsafeCreate l $ \bout -> + B.withByteArray b $ \bin -> + f bout bin binLength -- | Try to decode some bytes from the equivalent representation in a specific 'Base'. -- @@ -108,27 +108,55 @@ convertToBase base b = -- Trying to decode invalid data will return an error string: -- -- >>> convertFromBase Base64 ("!!!" :: ByteString) :: Either String ByteString --- Left "Base64-encoded bytestring requires padding" +-- Left "base64: input: invalid length" -- convertFromBase :: (ByteArrayAccess bin, ByteArray bout) => Base -> bin -> Either String bout -convertFromBase base b = - let bs = B.convert b :: BS.ByteString - run = fmap B.convert . mapLeft T.unpack - in case base of - Base16 -> run $ B16.decodeBase16Untyped bs - Base32 -> run $ B32.decodeBase32 bs - Base64 -> run $ B64.decodeBase64Untyped bs - Base64URLUnpadded -> run $ B64URL.decodeBase64UnpaddedUntyped bs - Base64OpenBSD -> unsafeDoIO $ - withByteArray b $ \bin -> - case unBase64LengthUnpadded (B.length b) of - Nothing -> return $ Left "base64 unpadded: input: invalid length" - Just dstLen -> do - (ret, out) <- B.allocRet dstLen $ \bout -> fromBase64OpenBSD bout bin (B.length b) - return $ case ret of - Nothing -> Right out - Just ofs -> Left ("base64 unpadded: input: invalid encoding at offset: " ++ show ofs) +convertFromBase Base16 b + | odd (B.length b) = Left "base16: input: invalid length" + | otherwise = unsafeDoIO $ do + (ret, out) <- + B.allocRet (B.length b `div` 2) $ \bout -> + B.withByteArray b $ \bin -> + fromHexadecimal bout bin (B.length b) + case ret of + Nothing -> return $ Right out + Just ofs -> return $ Left ("base16: input: invalid encoding at offset: " ++ show ofs) +convertFromBase Base32 b = unsafeDoIO $ + withByteArray b $ \bin -> do + mDstLen <- unBase32Length bin (B.length b) + case mDstLen of + Nothing -> return $ Left "base32: input: invalid length" + Just dstLen -> do + (ret, out) <- B.allocRet dstLen $ \bout -> fromBase32 bout bin (B.length b) + case ret of + Nothing -> return $ Right out + Just ofs -> return $ Left ("base32: input: invalid encoding at offset: " ++ show ofs) +convertFromBase Base64 b = unsafeDoIO $ + withByteArray b $ \bin -> do + mDstLen <- unBase64Length bin (B.length b) + case mDstLen of + Nothing -> return $ Left "base64: input: invalid length" + Just dstLen -> do + (ret, out) <- B.allocRet dstLen $ \bout -> fromBase64 bout bin (B.length b) + case ret of + Nothing -> return $ Right out + Just ofs -> return $ Left ("base64: input: invalid encoding at offset: " ++ show ofs) +convertFromBase Base64URLUnpadded b = unsafeDoIO $ + withByteArray b $ \bin -> + case unBase64LengthUnpadded (B.length b) of + Nothing -> return $ Left "base64URL unpadded: input: invalid length" + Just dstLen -> do + (ret, out) <- B.allocRet dstLen $ \bout -> fromBase64URLUnpadded bout bin (B.length b) + case ret of + Nothing -> return $ Right out + Just ofs -> return $ Left ("base64URL unpadded: input: invalid encoding at offset: " ++ show ofs) +convertFromBase Base64OpenBSD b = unsafeDoIO $ + withByteArray b $ \bin -> + case unBase64LengthUnpadded (B.length b) of + Nothing -> return $ Left "base64 unpadded: input: invalid length" + Just dstLen -> do + (ret, out) <- B.allocRet dstLen $ \bout -> fromBase64OpenBSD bout bin (B.length b) + case ret of + Nothing -> return $ Right out + Just ofs -> return $ Left ("base64 unpadded: input: invalid encoding at offset: " ++ show ofs) -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft f (Left a) = Left (f a) -mapLeft _ (Right c) = Right c diff --git a/Data/Memory/Hash/FNV.hs b/Data/Memory/Hash/FNV.hs index 6fd9dc5..d311b1f 100644 --- a/Data/Memory/Hash/FNV.hs +++ b/Data/Memory/Hash/FNV.hs @@ -9,6 +9,8 @@ -- -- {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE BangPatterns #-} module Data.Memory.Hash.FNV ( @@ -22,11 +24,14 @@ module Data.Memory.Hash.FNV , fnv1a_64 ) where -import Data.Bits (xor) -import Data.Word (Word8, Word32, Word64) -import Foreign.Ptr (Ptr) -import Foreign.Storable (peekByteOff) -import Control.DeepSeq (NFData(..)) +import Data.Bits (xor) +import Data.Memory.Internal.Compat () +import Data.Memory.Internal.Imports +import GHC.Word +import GHC.Prim hiding (Word64#, Int64#) +import GHC.Types +import GHC.Ptr +import Control.DeepSeq(NFData(..)) -- | FNV1(a) hash (32 bit variants) newtype FnvHash32 = FnvHash32 Word32 @@ -54,32 +59,48 @@ fnv1a_64_Mix8 !w (FnvHash64 acc) = FnvHash64 (0x100000001b3 * xor acc (fromInteg -- | compute FNV1 (32 bit variant) of a raw piece of memory fnv1 :: Ptr Word8 -> Int -> IO FnvHash32 -fnv1 ptr n = loop (FnvHash32 0x811c9dc5) 0 - where +fnv1 (Ptr addr) n = loop (FnvHash32 0x811c9dc5) 0 + where + loop :: FnvHash32 -> Int -> IO FnvHash32 loop !acc !i - | i == n = pure acc - | otherwise = peekByteOff ptr i >>= \v -> loop (fnv1_32_Mix8 v acc) (i + 1) + | i == n = pure $ acc + | otherwise = do + v <- read8 addr i + loop (fnv1_32_Mix8 v acc) (i + 1) -- | compute FNV1a (32 bit variant) of a raw piece of memory fnv1a :: Ptr Word8 -> Int -> IO FnvHash32 -fnv1a ptr n = loop (FnvHash32 0x811c9dc5) 0 - where +fnv1a (Ptr addr) n = loop (FnvHash32 0x811c9dc5) 0 + where + loop :: FnvHash32 -> Int -> IO FnvHash32 loop !acc !i - | i == n = pure acc - | otherwise = peekByteOff ptr i >>= \v -> loop (fnv1a_32_Mix8 v acc) (i + 1) + | i == n = pure $ acc + | otherwise = do + v <- read8 addr i + loop (fnv1a_32_Mix8 v acc) (i + 1) -- | compute FNV1 (64 bit variant) of a raw piece of memory fnv1_64 :: Ptr Word8 -> Int -> IO FnvHash64 -fnv1_64 ptr n = loop (FnvHash64 0xcbf29ce484222325) 0 - where +fnv1_64 (Ptr addr) n = loop (FnvHash64 0xcbf29ce484222325) 0 + where + loop :: FnvHash64 -> Int -> IO FnvHash64 loop !acc !i - | i == n = pure acc - | otherwise = peekByteOff ptr i >>= \v -> loop (fnv1_64_Mix8 v acc) (i + 1) + | i == n = pure $ acc + | otherwise = do + v <- read8 addr i + loop (fnv1_64_Mix8 v acc) (i + 1) -- | compute FNV1a (64 bit variant) of a raw piece of memory fnv1a_64 :: Ptr Word8 -> Int -> IO FnvHash64 -fnv1a_64 ptr n = loop (FnvHash64 0xcbf29ce484222325) 0 - where +fnv1a_64 (Ptr addr) n = loop (FnvHash64 0xcbf29ce484222325) 0 + where + loop :: FnvHash64 -> Int -> IO FnvHash64 loop !acc !i - | i == n = pure acc - | otherwise = peekByteOff ptr i >>= \v -> loop (fnv1a_64_Mix8 v acc) (i + 1) + | i == n = pure $ acc + | otherwise = do + v <- read8 addr i + loop (fnv1a_64_Mix8 v acc) (i + 1) + +read8 :: Addr# -> Int -> IO Word8 +read8 addr (I# i) = IO $ \s -> case readWord8OffAddr# addr i s of + (# s2, e #) -> (# s2, W8# e #) diff --git a/nix/ci.nix b/nix/ci.nix new file mode 100644 index 0000000..e1bb0bb --- /dev/null +++ b/nix/ci.nix @@ -0,0 +1,3 @@ +{ + ram = import ../default.nix {}; +} diff --git a/ram.cabal b/ram.cabal index 618383d..33ca67d 100644 --- a/ram.cabal +++ b/ram.cabal @@ -1,6 +1,6 @@ cabal-version: 3.0 name: ram -version: 0.21.0 +version: 0.21.1 synopsis: memory and related abstraction stuff description: This is a fork of memory. It's open to accept changes from anyone, @@ -20,10 +20,6 @@ description: * Hashing : FNV, SipHash - Legacy note: This package shouldn't be used for new software. - It only exists and is maintained to give other packages time to - rewrite their software to use better libraries. - license: BSD-3-Clause license-file: LICENSE copyright: Vincent Hanquez @@ -74,13 +70,9 @@ library exposed-modules: Data.ByteArray.Sized build-depends: , base <4.23 - , base16 >=1.0 && <2 - , base32 >=0.4 && <1 - , base64 >=1.0 && <2 , bytestring <0.13 , deepseq >=1.1 && <1.17 , ghc-prim <0.14 - , text >=1.0 && <3 -- FIXME armel or mispel is also little endian. -- might be a good idea to also add a runtime autodetect mode.