JuliaLang · rfourquet · Nov 21, 2025
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
@@ -2,9 +2,6 @@ name = "Random"
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 version = "1.11.0"
 
-[deps]
-SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
@@ -102,80 +102,6 @@ end
 rng_native_52(::RandomDevice) = UInt64
 
 
-## SeedHasher
-
-"""
-    Random.SeedHasher(seed=nothing)
-
-Create a `Random.SeedHasher` RNG object, which generates random bytes with the help
-of a cryptographic hash function (SHA2), via calls to [`Random.hash_seed`](@ref).
-
-Given two seeds `s1` and `s2`, the random streams generated by
-`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if
-`s1` and `s2` are distinct.
-
-This RNG is used by default in `Random.seed!(::AbstractRNG, seed::Any)`, such that
-RNGs usually need only to implement `seed!(rng, ::AbstractRNG)`.
-
-This is an internal type, subject to change.
-"""
-mutable struct SeedHasher <: AbstractRNG
-    bytes::Vector{UInt8}
-    idx::Int
-    cnt::Int64
-
-    SeedHasher(seed=nothing) = seed!(new(), seed)
-end
-
-seed!(rng::SeedHasher, seeder::AbstractRNG) = seed!(rng, rand(seeder, UInt64, 4))
-seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice())
-
-function seed!(rng::SeedHasher, seed)
-    # typically, no more than 256 bits will be needed, so use
-    # SHA2_256 because it's faster
-    ctx = SHA2_256_CTX()
-    hash_seed(seed, ctx)
-    rng.bytes = SHA.digest!(ctx)::Vector{UInt8}
-    rng.idx = 0
-    rng.cnt = 0
-    rng
-end
-
-@noinline function rehash!(rng::SeedHasher)
-    # more random bytes are necessary, from now on use SHA2_512 to generate
-    # more bytes at once
-    ctx = SHA2_512_CTX()
-    SHA.update!(ctx, rng.bytes)
-    # also hash the counter, just for the extremely unlikely case where the hash of
-    # rng.bytes is equal to rng.bytes (i.e. rng.bytes is a "fixed point"), or more generally
-    # if there is a small cycle
-    SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, rng.cnt += 1))
-    rng.bytes = SHA.digest!(ctx)
-    rng.idx = 0
-    rng
-end
-
-function rand(rng::SeedHasher, ::SamplerType{UInt8})
-    rng.idx < length(rng.bytes) || rehash!(rng)
-    rng.bytes[rng.idx += 1]
-end
-
-for TT = Base.BitInteger_types
-    TT === UInt8 && continue
-    @eval function rand(rng::SeedHasher, ::SamplerType{$TT})
-        xx = zero($TT)
-        for ii = 0:sizeof($TT)-1
-            xx |= (rand(rng, UInt8) % $TT) << (8 * ii)
-        end
-        xx
-    end
-end
-
-rand(rng::SeedHasher, ::SamplerType{Bool}) = rand(rng, UInt8) % Bool
-
-rng_native_52(::SeedHasher) = UInt64
-
-
 ## seeding
 
 """
@@ -244,28 +170,28 @@ function seed!(rng::AbstractRNG, seed::Any=nothing)
 end
 
 
-### hash_seed()
+### hashseed!()
 
 """
-    Random.hash_seed(seed, ctx::SHA_CTX)::AbstractVector{UInt8}
+    Random.hashseed!(ctx::SeedHasher, seed)
 
-Update `ctx` via `SHA.update!` with the content of `seed`.
+Update `ctx` via `ingest!` with the content of `seed`.
 This function is used by the [`SeedHasher`](@ref) RNG to produce
 random bytes.
 
 `seed` can currently be of type
 `Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
 but modules can extend this function for types they own.
 
-`hash_seed` is "injective" : for two equivalent context objects `cn` and `cm`,
+`hashseed!` is "injective" : for two equivalent context objects `cn` and `cm`,
 if `n != m`, then `cn` and `cm` will be distinct after calling
-`hash_seed(n, cn); hash_seed(m, cm)`.
+`hashseed!(cn, n); hashseed!(cm, m)`.
 Moreover, if `n == m`, then `cn` and `cm` remain equivalent after calling
-`hash_seed(n, cn); hash_seed(m, cm)`.
+`hashseed!(cn, n); hashseed!(cm, m)`.
 """
-function hash_seed end
+function hashseed!! end
 
-function hash_seed(seed::Integer, ctx::SHA_CTX)
+function hashseed!(ctx::SeedHasher, seed::Integer)
     neg = signbit(seed)
     if neg
         seed = ~seed
@@ -274,35 +200,35 @@ function hash_seed(seed::Integer, ctx::SHA_CTX)
     while true
         word = (seed % UInt32) & 0xffffffff
         seed >>>= 32
-        SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word))
+        ingest!(ctx, reinterpret(NTuple{4, UInt8}, word))
         iszero(seed) && break
     end
     # make sure the hash of negative numbers is different from the hash of positive numbers
-    neg && SHA.update!(ctx, (0x01,))
+    neg && ingest!(ctx, (0x01,))
     nothing
 end
 
-function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}, ctx::SHA_CTX)
+function hashseed!(ctx::SeedHasher, seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
     for xx in seed
-        SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
+        ingest!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
     end
-    # discriminate from hash_seed(::Integer)
-    SHA.update!(ctx, (0x10,))
+    # discriminate from hashseed!(ctx, ::Integer)
+    ingest!(ctx, (0x10,))
 end
 
-function hash_seed(str::AbstractString, ctx::SHA_CTX)
+function hashseed!(ctx::SeedHasher, str::AbstractString)
     # convert to String such that `codeunits(str)` below is consistent between equal
     # strings of different types
     str = String(str)
-    SHA.update!(ctx, codeunits(str))
-    # signature for strings: so far, all hash_seed functions end-up hashing a multiple
+    ingest!(ctx, codeunits(str))
+    # signature for strings: so far, all hashseed! functions end-up hashing a multiple
     # of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
     # bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
     # and then hash the signature 0x05; in order for strings of different lengths to have
     # different hashes, padding bytes are set equal to the number of padding bytes
     pad = 4 - mod(ncodeunits(str), 4)
     for _=1:pad
-        SHA.update!(ctx, (pad % UInt8,))
+        ingest!(ctx, (pad % UInt8,))
     end
-    SHA.update!(ctx, (0x05,))
+    ingest!(ctx, (0x05,))
 end
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
@@ -13,7 +13,6 @@ include("DSFMT.jl")
 using .DSFMT
 using Base.GMP.MPZ
 using Base.GMP: Limb
-using SHA: SHA, SHA2_256_CTX, SHA2_512_CTX, SHA_CTX
 
 using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing,
     _throw_argerror
@@ -418,6 +417,7 @@ rand!
 
 
 include("Xoshiro.jl")
+include("SeedHasher.jl")
 include("RNGs.jl")
 include("MersenneTwister.jl")
 include("generation.jl")

diff --git a/stdlib/Random/src/SeedHasher.jl b/stdlib/Random/src/SeedHasher.jl
@@ -0,0 +1,178 @@
+## SeedHasher
+
+#=
+`SeedHasher` implements the seed-mixing algorithm designed by M. E. O'Neill
+as an alternative to `std::seed_seq`, intended to produce, from a
+user-provided seed, high-quality initialization data for RNGs.
+Cf. https://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html
+
+This implementation is derived from the `seed_seq_fe` C++ reference version:
+https://gist.github.com/imneme/540829265469e673d045 (MIT license).
+
+The original algorithm uses a fixed-size entropy buffer (128 or 256 bits).
+`SeedHasher` adjusts the buffer size dynamically, roughly to the size of the
+input seed, up to a maximum of 256 bits.
+=#
+
+const _SH_MAX_ENTROPY = 8 # number of stored UInt32 words of entropy
+const _SH_BUFSIZE = 32
+@assert _SH_BUFSIZE > _SH_MAX_ENTROPY # the diff should be at least 8 for decent performance
+const _SH_XSHIFT::UInt32 = UInt32(sizeof(UInt32) * 4)
+
+"""
+    Random.SeedHasher(seed=nothing)
+
+Create a `Random.SeedHasher` RNG, which produces random bytes derived from the
+entropy extracted from `seed` via calls to [`Random.hashseed!`](@ref).
+
+Given two seeds `s1` and `s2`, the random streams generated by
+`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if
+`s1` and `s2` are distinct.
+
+`SeedHasher` is used by default in `Random.seed!(::AbstractRNG, seed::Any)`,
+so RNGs typically need only implement `seed!(rng, ::AbstractRNG)`.
+
+!!! warning
+    `SeedHasher` is intended only for producing initialization data for other RNGs.
+    It is *not* suitable for use as a general-purpose RNG.
+
+This is an internal type, subject to change.
+"""
+mutable struct SeedHasher <: AbstractRNG
+    const mixer::Memory{UInt32}
+    len::Int # size of the entropy store
+    idx::Int
+    hash_const::UInt32
+
+    SeedHasher(::UndefInitializer) =
+        new(Memory{UInt32}(undef, _SH_BUFSIZE), 0, 0, UInt32(0))
+end
+
+SeedHasher(seed=nothing) = seed!(SeedHasher(undef), seed)
+
+seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice())
+
+function seed!(rng::SeedHasher, seeder::AbstractRNG)
+    # no seed mixing necessary, directly randomize `mixer`
+    rand!(seeder, view(rng.mixer, 1:_SH_MAX_ENTROPY))
+    rng.len = _SH_MAX_ENTROPY
+    rng.idx = 0
+    rng.hash_const = 0x8b51f9dd # INIT_B
+    rng
+end
+
+function seed!(rng::SeedHasher, seed)
+    rng.len = 0
+    rng.idx = 0
+    rng.hash_const = 0x43b0d7e5 # INIT_A
+    hashseed!(rng, seed)
+    finalize!(rng)
+    if rng.len <= 2
+        # additional mixing (`stir()` in the C++ code)
+        rng.idx = rng.len << 2
+        rng.len = 0
+        rng.hash_const = 0x43b0d7e5 # this follows the C++ code, but might not be necessary?
+        mix_entropy!(rng)
+    end
+    rng.idx = 0
+    rng.hash_const = 0x8b51f9dd # INIT_B
+    rng
+end
+
+# During seed ingestion (entropy extraction), the seed is encoded as bytes
+# (via `hashseed!`) and written verbatim into `rng.mixer`, starting at the
+# byte index `rng.idx + 1`. Once the buffer is filled for the first time,
+# this initial block is mixed to produce the initial state of the entropy
+# store, which occupies the first `_EM_MAX_ENTROPY` UInt32 words of
+# `rng.mixer`. The remaining portion of the buffer stays available to ingest
+# further bytes from the seed.
+function ingest!(rng::SeedHasher,
+                 xs::Union{AbstractArray{UInt8}, NTuple{N, UInt8}}) where N
+    mixer8 = reinterpret(UInt8, rng.mixer)
+    xsi = 0 # number of consumed bytes from xs
+    while xsi != length(xs)
+        if rng.idx == length(mixer8)
+            mix_entropy!(rng)
+            # now, the upper side of mixer8 is free
+        end
+        (; idx) = rng
+        tocopy = min(length(xs) - xsi, length(mixer8) - idx)
+        for ii = 1:tocopy
+            @inbounds mixer8[idx + ii] = xs[xsi + ii]
+        end
+        xsi += tocopy
+        rng.idx += tocopy
+    end
+    rng
+end
+
+function finalize!(rng)
+    mixer8 = reinterpret(UInt8, rng.mixer)
+    while 0 != (rng.idx & 0x3)
+        mixer8[rng.idx += 1] = 0
+    end
+    mix_entropy!(rng)
+end
+
+function mix_entropy!(rng::SeedHasher)
+    function hash(value::UInt32)
+        value ⊻= rng.hash_const
+        rng.hash_const *= 0x931e8875
+        value *= rng.hash_const
+        value ⊻= value >> _SH_XSHIFT
+        value
+    end
+
+    function mix(x::UInt32, y::UInt32)
+        result::UInt32 = 0xca01f9dd * x - 0x4973f715 * y
+        result ⊻= result >> _SH_XSHIFT
+        result
+    end
+
+    (; mixer, len, idx) = rng
+    @assert 0 == (idx & 0x3)
+    idx >>= 2 # number of `UInt32` values written into mixer
+
+    if len == 0 # nothing has been mixed in so far
+        len = rng.len = min(_SH_MAX_ENTROPY, idx)
+        for ii = 1:len
+            @inbounds mixer[ii] = hash(mixer[ii])
+        end
+        for isrc = 1:len, idst = 1:len
+            if isrc != idst
+                @inbounds mixer[idst] = mix(mixer[idst], hash(mixer[isrc]))
+            end
+        end
+    end
+
+    for ii = len+1:idx
+        for idst = 1:len
+            @inbounds mixer[idst] = mix(mixer[idst], hash(mixer[ii]))
+        end
+    end
+
+    rng.idx = len << 2
+    rng
+end
+
+### generation
+
+function rand(rng::SeedHasher, ::SamplerType{UInt32})
+    (; mixer, len, idx, hash_const) = rng
+    dataval = @inbounds mixer[idx += 1]
+    dataval ⊻= hash_const
+    hash_const *= 0x58f38ded # MULT_B
+    dataval *= hash_const
+    dataval ⊻= dataval >> _SH_XSHIFT
+    rng.idx = idx == len ? 0 : idx
+    rng.hash_const = hash_const
+    dataval
+end
+
+rand(rng::SeedHasher, T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32)) =
+    rand(rng, UInt32) % T[]
+rand(rng::SeedHasher, T::SamplerUnion(Int64, UInt64)) =
+    (rand(rng, UInt32) % T[]) << 32 ⊻ rand(rng, UInt32) % T[]
+rand(rng::SeedHasher, T::SamplerUnion(Int128, UInt128)) = rand_generic(rng, T[])
+
+rng_native_52(::SeedHasher) = UInt64
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
@@ -13,7 +13,6 @@ using Random.DSFMT
 using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
 using Random: jump_128, jump_192, jump_128!, jump_192!, SeedHasher
 
-import SHA
 import Future # randjump
 
 function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat}
@@ -1246,11 +1245,15 @@ end
 end
 
 
-@testset "seed! and hash_seed" begin
+@testset "seed! and hashseed!" begin
     function hash_seed(seed)
-        ctx = SHA.SHA2_256_CTX()
-        Random.hash_seed(seed, ctx)
-        bytes2hex(SHA.digest!(ctx))
+        # prepare SeedHasher like in seed!(::SeedHasher, seed)
+        rng = SeedHasher(undef)
+        rng.len = 0
+        rng.idx = 0
+        rng.hash_const = 0x43b0d7e5
+        Random.hashseed!(rng, seed)
+        bytes2hex(view(reinterpret(UInt8, rng.mixer), 1:rng.idx))
     end
 
     # Test that: