Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions stdlib/Random/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ name = "Random"
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
version = "1.11.0"

[deps]
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Expand Down
112 changes: 19 additions & 93 deletions stdlib/Random/src/RNGs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,80 +102,6 @@ end
rng_native_52(::RandomDevice) = UInt64


## SeedHasher

"""
Random.SeedHasher(seed=nothing)

Create a `Random.SeedHasher` RNG object, which generates random bytes with the help
of a cryptographic hash function (SHA2), via calls to [`Random.hash_seed`](@ref).

Given two seeds `s1` and `s2`, the random streams generated by
`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if
`s1` and `s2` are distinct.

This RNG is used by default in `Random.seed!(::AbstractRNG, seed::Any)`, such that
RNGs usually need only to implement `seed!(rng, ::AbstractRNG)`.

This is an internal type, subject to change.
"""
mutable struct SeedHasher <: AbstractRNG
bytes::Vector{UInt8}
idx::Int
cnt::Int64

SeedHasher(seed=nothing) = seed!(new(), seed)
end

seed!(rng::SeedHasher, seeder::AbstractRNG) = seed!(rng, rand(seeder, UInt64, 4))
seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice())

function seed!(rng::SeedHasher, seed)
# typically, no more than 256 bits will be needed, so use
# SHA2_256 because it's faster
ctx = SHA2_256_CTX()
hash_seed(seed, ctx)
rng.bytes = SHA.digest!(ctx)::Vector{UInt8}
rng.idx = 0
rng.cnt = 0
rng
end

@noinline function rehash!(rng::SeedHasher)
# more random bytes are necessary, from now on use SHA2_512 to generate
# more bytes at once
ctx = SHA2_512_CTX()
SHA.update!(ctx, rng.bytes)
# also hash the counter, just for the extremely unlikely case where the hash of
# rng.bytes is equal to rng.bytes (i.e. rng.bytes is a "fixed point"), or more generally
# if there is a small cycle
SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, rng.cnt += 1))
rng.bytes = SHA.digest!(ctx)
rng.idx = 0
rng
end

function rand(rng::SeedHasher, ::SamplerType{UInt8})
rng.idx < length(rng.bytes) || rehash!(rng)
rng.bytes[rng.idx += 1]
end

for TT = Base.BitInteger_types
TT === UInt8 && continue
@eval function rand(rng::SeedHasher, ::SamplerType{$TT})
xx = zero($TT)
for ii = 0:sizeof($TT)-1
xx |= (rand(rng, UInt8) % $TT) << (8 * ii)
end
xx
end
end

rand(rng::SeedHasher, ::SamplerType{Bool}) = rand(rng, UInt8) % Bool

rng_native_52(::SeedHasher) = UInt64


## seeding

"""
Expand Down Expand Up @@ -244,28 +170,28 @@ function seed!(rng::AbstractRNG, seed::Any=nothing)
end


### hash_seed()
### hashseed!()

"""
Random.hash_seed(seed, ctx::SHA_CTX)::AbstractVector{UInt8}
Random.hashseed!(ctx::SeedHasher, seed)

Update `ctx` via `SHA.update!` with the content of `seed`.
Update `ctx` via `ingest!` with the content of `seed`.
This function is used by the [`SeedHasher`](@ref) RNG to produce
random bytes.

`seed` can currently be of type
`Union{Integer, AbstractString, AbstractArray{UInt32}, AbstractArray{UInt64}}`,
but modules can extend this function for types they own.

`hash_seed` is "injective" : for two equivalent context objects `cn` and `cm`,
`hashseed!` is "injective" : for two equivalent context objects `cn` and `cm`,
if `n != m`, then `cn` and `cm` will be distinct after calling
`hash_seed(n, cn); hash_seed(m, cm)`.
`hashseed!(cn, n); hashseed!(cm, m)`.
Moreover, if `n == m`, then `cn` and `cm` remain equivalent after calling
`hash_seed(n, cn); hash_seed(m, cm)`.
`hashseed!(cn, n); hashseed!(cm, m)`.
"""
function hash_seed end
function hashseed!! end

function hash_seed(seed::Integer, ctx::SHA_CTX)
function hashseed!(ctx::SeedHasher, seed::Integer)
neg = signbit(seed)
if neg
seed = ~seed
Expand All @@ -274,35 +200,35 @@ function hash_seed(seed::Integer, ctx::SHA_CTX)
while true
word = (seed % UInt32) & 0xffffffff
seed >>>= 32
SHA.update!(ctx, reinterpret(NTuple{4, UInt8}, word))
ingest!(ctx, reinterpret(NTuple{4, UInt8}, word))
iszero(seed) && break
end
# make sure the hash of negative numbers is different from the hash of positive numbers
neg && SHA.update!(ctx, (0x01,))
neg && ingest!(ctx, (0x01,))
nothing
end

function hash_seed(seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}}, ctx::SHA_CTX)
function hashseed!(ctx::SeedHasher, seed::Union{AbstractArray{UInt32}, AbstractArray{UInt64}})
for xx in seed
SHA.update!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
ingest!(ctx, reinterpret(NTuple{8, UInt8}, UInt64(xx)))
end
# discriminate from hash_seed(::Integer)
SHA.update!(ctx, (0x10,))
# discriminate from hashseed!(ctx, ::Integer)
ingest!(ctx, (0x10,))
end

function hash_seed(str::AbstractString, ctx::SHA_CTX)
function hashseed!(ctx::SeedHasher, str::AbstractString)
# convert to String such that `codeunits(str)` below is consistent between equal
# strings of different types
str = String(str)
SHA.update!(ctx, codeunits(str))
# signature for strings: so far, all hash_seed functions end-up hashing a multiple
ingest!(ctx, codeunits(str))
# signature for strings: so far, all hashseed! functions end-up hashing a multiple
# of 4 bytes of data, and add the signature (1 byte) at the end; so hash as many
# bytes as necessary to have a total number of hashed bytes equal to 0 mod 4 (padding),
# and then hash the signature 0x05; in order for strings of different lengths to have
# different hashes, padding bytes are set equal to the number of padding bytes
pad = 4 - mod(ncodeunits(str), 4)
for _=1:pad
SHA.update!(ctx, (pad % UInt8,))
ingest!(ctx, (pad % UInt8,))
end
SHA.update!(ctx, (0x05,))
ingest!(ctx, (0x05,))
end
2 changes: 1 addition & 1 deletion stdlib/Random/src/Random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ include("DSFMT.jl")
using .DSFMT
using Base.GMP.MPZ
using Base.GMP: Limb
using SHA: SHA, SHA2_256_CTX, SHA2_512_CTX, SHA_CTX

using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing,
_throw_argerror
Expand Down Expand Up @@ -418,6 +417,7 @@ rand!


include("Xoshiro.jl")
include("SeedHasher.jl")
include("RNGs.jl")
include("MersenneTwister.jl")
include("generation.jl")
Expand Down
178 changes: 178 additions & 0 deletions stdlib/Random/src/SeedHasher.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
## SeedHasher

#=
`SeedHasher` implements the seed-mixing algorithm designed by M. E. O'Neill
as an alternative to `std::seed_seq`, intended to produce, from a
user-provided seed, high-quality initialization data for RNGs.
Cf. https://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html

This implementation is derived from the `seed_seq_fe` C++ reference version:
https://gist.github.com/imneme/540829265469e673d045 (MIT license).

The original algorithm uses a fixed-size entropy buffer (128 or 256 bits).
`SeedHasher` adjusts the buffer size dynamically, roughly to the size of the
input seed, up to a maximum of 256 bits.
=#

const _SH_MAX_ENTROPY = 8 # number of stored UInt32 words of entropy
const _SH_BUFSIZE = 32
@assert _SH_BUFSIZE > _SH_MAX_ENTROPY # the diff should be at least 8 for decent performance
const _SH_XSHIFT::UInt32 = UInt32(sizeof(UInt32) * 4)

"""
Random.SeedHasher(seed=nothing)

Create a `Random.SeedHasher` RNG, which produces random bytes derived from the
entropy extracted from `seed` via calls to [`Random.hashseed!`](@ref).

Given two seeds `s1` and `s2`, the random streams generated by
`SeedHasher(s1)` and `SeedHasher(s2)` should be distinct if and only if
`s1` and `s2` are distinct.

`SeedHasher` is used by default in `Random.seed!(::AbstractRNG, seed::Any)`,
so RNGs typically need only implement `seed!(rng, ::AbstractRNG)`.

!!! warning
`SeedHasher` is intended only for producing initialization data for other RNGs.
It is *not* suitable for use as a general-purpose RNG.

This is an internal type, subject to change.
"""
mutable struct SeedHasher <: AbstractRNG
const mixer::Memory{UInt32}
len::Int # size of the entropy store
idx::Int
hash_const::UInt32

SeedHasher(::UndefInitializer) =
new(Memory{UInt32}(undef, _SH_BUFSIZE), 0, 0, UInt32(0))
end

SeedHasher(seed=nothing) = seed!(SeedHasher(undef), seed)

seed!(rng::SeedHasher, ::Nothing) = seed!(rng, RandomDevice())

function seed!(rng::SeedHasher, seeder::AbstractRNG)
# no seed mixing necessary, directly randomize `mixer`
rand!(seeder, view(rng.mixer, 1:_SH_MAX_ENTROPY))
rng.len = _SH_MAX_ENTROPY
rng.idx = 0
rng.hash_const = 0x8b51f9dd # INIT_B
rng
end

function seed!(rng::SeedHasher, seed)
rng.len = 0
rng.idx = 0
rng.hash_const = 0x43b0d7e5 # INIT_A
hashseed!(rng, seed)
finalize!(rng)
if rng.len <= 2
# additional mixing (`stir()` in the C++ code)
rng.idx = rng.len << 2
rng.len = 0
rng.hash_const = 0x43b0d7e5 # this follows the C++ code, but might not be necessary?
mix_entropy!(rng)
end
rng.idx = 0
rng.hash_const = 0x8b51f9dd # INIT_B
rng
end

# During seed ingestion (entropy extraction), the seed is encoded as bytes
# (via `hashseed!`) and written verbatim into `rng.mixer`, starting at the
# byte index `rng.idx + 1`. Once the buffer is filled for the first time,
# this initial block is mixed to produce the initial state of the entropy
# store, which occupies the first `_EM_MAX_ENTROPY` UInt32 words of
# `rng.mixer`. The remaining portion of the buffer stays available to ingest
# further bytes from the seed.
function ingest!(rng::SeedHasher,
xs::Union{AbstractArray{UInt8}, NTuple{N, UInt8}}) where N
mixer8 = reinterpret(UInt8, rng.mixer)
xsi = 0 # number of consumed bytes from xs
while xsi != length(xs)
if rng.idx == length(mixer8)
mix_entropy!(rng)
# now, the upper side of mixer8 is free
end
(; idx) = rng
tocopy = min(length(xs) - xsi, length(mixer8) - idx)
for ii = 1:tocopy
@inbounds mixer8[idx + ii] = xs[xsi + ii]
end
xsi += tocopy
rng.idx += tocopy
end
rng
end

function finalize!(rng)
mixer8 = reinterpret(UInt8, rng.mixer)
while 0 != (rng.idx & 0x3)
mixer8[rng.idx += 1] = 0
end
mix_entropy!(rng)
end

function mix_entropy!(rng::SeedHasher)
function hash(value::UInt32)
value ⊻= rng.hash_const
rng.hash_const *= 0x931e8875
value *= rng.hash_const
value ⊻= value >> _SH_XSHIFT
value
end

function mix(x::UInt32, y::UInt32)
result::UInt32 = 0xca01f9dd * x - 0x4973f715 * y
result ⊻= result >> _SH_XSHIFT
result
end

(; mixer, len, idx) = rng
@assert 0 == (idx & 0x3)
idx >>= 2 # number of `UInt32` values written into mixer

if len == 0 # nothing has been mixed in so far
len = rng.len = min(_SH_MAX_ENTROPY, idx)
for ii = 1:len
@inbounds mixer[ii] = hash(mixer[ii])
end
for isrc = 1:len, idst = 1:len
if isrc != idst
@inbounds mixer[idst] = mix(mixer[idst], hash(mixer[isrc]))
end
end
end

for ii = len+1:idx
for idst = 1:len
@inbounds mixer[idst] = mix(mixer[idst], hash(mixer[ii]))
end
end

rng.idx = len << 2
rng
end

### generation

function rand(rng::SeedHasher, ::SamplerType{UInt32})
(; mixer, len, idx, hash_const) = rng
dataval = @inbounds mixer[idx += 1]
dataval ⊻= hash_const
hash_const *= 0x58f38ded # MULT_B
dataval *= hash_const
dataval ⊻= dataval >> _SH_XSHIFT
rng.idx = idx == len ? 0 : idx
rng.hash_const = hash_const
dataval
end

rand(rng::SeedHasher, T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32)) =
rand(rng, UInt32) % T[]
rand(rng::SeedHasher, T::SamplerUnion(Int64, UInt64)) =
(rand(rng, UInt32) % T[]) << 32 ⊻ rand(rng, UInt32) % T[]
rand(rng::SeedHasher, T::SamplerUnion(Int128, UInt128)) = rand_generic(rng, T[])

rng_native_52(::SeedHasher) = UInt64
13 changes: 8 additions & 5 deletions stdlib/Random/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ using Random.DSFMT
using Random: default_rng, Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
using Random: jump_128, jump_192, jump_128!, jump_192!, SeedHasher

import SHA
import Future # randjump

function test_uniform(xs::AbstractArray{T}) where {T<:AbstractFloat}
Expand Down Expand Up @@ -1246,11 +1245,15 @@ end
end


@testset "seed! and hash_seed" begin
@testset "seed! and hashseed!" begin
function hash_seed(seed)
ctx = SHA.SHA2_256_CTX()
Random.hash_seed(seed, ctx)
bytes2hex(SHA.digest!(ctx))
# prepare SeedHasher like in seed!(::SeedHasher, seed)
rng = SeedHasher(undef)
rng.len = 0
rng.idx = 0
rng.hash_const = 0x43b0d7e5
Random.hashseed!(rng, seed)
bytes2hex(view(reinterpret(UInt8, rng.mixer), 1:rng.idx))
end

# Test that:
Expand Down