diff --git a/src/HyperdimensionalComputing.jl b/src/HyperdimensionalComputing.jl index ab891ea..ba099a1 100644 --- a/src/HyperdimensionalComputing.jl +++ b/src/HyperdimensionalComputing.jl @@ -30,10 +30,14 @@ export multiset, crossproduct, ngrams, graph, - level + level, + encodelevel, + decodelevel, + convertlevel include("inference.jl") export similarity, + δ, nearest_neighbor diff --git a/src/encoding.jl b/src/encoding.jl index 90d6c89..a7d4b26 100644 --- a/src/encoding.jl +++ b/src/encoding.jl @@ -499,7 +499,7 @@ Creates a set of level correlated hypervectors, where the first and last hyperve # Arguments - `v::HV`: Base hypervector -- `n::Int`: Number of levels +- `n::Int`: Number of levels (alternatively, provide a vector to be encoded) """ function level(v::HV, n::Int) where {HV <: AbstractHV} hvs = [v] @@ -510,4 +510,93 @@ function level(v::HV, n::Int) where {HV <: AbstractHV} end return hvs end + level(HV::Type{<:AbstractHV}, n::Int; dims::Int = 10_000) = level(HV(dims), n) + +level(HVv, vals::AbstractVector) = level(HVv, length(vals)) +level(HVv, vals::UnitRange) = level(HVv, length(vals)) + + +""" + encodelevel(hvlevels::AbstractVector{<:AbstractHV}, numvalues; testbound=false) + +Generate an encoding function based on `level`, for encoding numerical values. It returns a function +that gives the corresponding hypervector for a given numerical input. + +# Arguments +- hvlevels::AbstractVector{<:AbstractHV}: vector of hypervectors representing the level encoding +- numvalues: the range or vector with the corresponding numerical values +- [testbound=false]: optional keyword argument to check whether the provided value is in bounds + +# Example +```julia +numvalues = range(0, 2pi, 100) +hvlevels = level(BipolarHV(), 100) + +encoder = encodelevel(hvlevels, numvalues) + +encoder(pi/3) # hypervector that best represents this numerical value +``` +""" +function encodelevel(hvlevels::AbstractVector{<:AbstractHV}, numvalues; testbound = false) + @assert length(hvlevels) == length(numvalues) "HV levels do not match numerical values" + # construct the encoder + function encoder(x::Number) + @assert !testbound || minimum(numvalues) ≤ x ≤ maximum(numvalues) "x not in numerical range" + (_, ind) = findmin(v -> abs(x - v), numvalues) + return hvlevels[ind] + end + return encoder +end + +""" + encodelevel(hvlevels::AbstractVector{<:AbstractHV}, a::Number, b::Number; testbound=false) + +See `encodelevel`, same but provide lower (`a`) and upper (`b`) limit of the interval to be encoded. +""" +encodelevel(hvlevels::AbstractVector{<:AbstractHV}, a::Number, b::Number; testbound = false) = encodelevel(hvlevels, range(a, b, length(hvlevels)); testbound) + +encodelevel(HV, numvalues; testbound = false) = encodelevel(level(HV, length(numvalues)), numvalues; testbound) + + +""" + decodelevel(hvlevels::AbstractVector{<:AbstractHV}, numvalues) + +Generate a decoding function based on `level`, for decoding numerical values. It returns a function +that gives the numerical value for a given hypervector, based on similarity matching. + +# Arguments +- hvlevels::AbstractVector{<:AbstractHV}: vector of hypervectors representing the level encoding +- numvalues: the range or vector with the corresponding numerical values + +# Example +```julia +numvalues = range(0, 2pi, 100) +hvlevels = level(BipolarHV(), 100) + +decoder = decodelevel(hvlevels, numvalues) + +decoder(hvlevels[17]) # value that closely matches the corresponding HV +``` +""" +function decodelevel(hvlevels::AbstractVector{<:AbstractHV}, numvalues) + @assert length(hvlevels) == length(numvalues) "HV levels do not match numerical values" + # construct the decoder + function decoder(hv::AbstractHV) + (_, ind) = findmax(v -> similarity(v, hv), hvlevels) + return numvalues[ind] + end + return decoder +end + +decodelevel(hvlevels::AbstractVector{<:AbstractHV}, a::Number, b::Number) = decodelevel(hvlevels, range(a, b, length(hvlevels))) + +decodelevel(HV, numvalues; testbound = false) = decodelevel(level(HV, length(numvalues)), numvalues) + +""" + convertlevel(hvlevels, numvals..., kwargs...) + +Creates the `encoder` and `decoder` for a level incoding in one step. See `encodelevel` +and `decodelevel` for their respective documentations. +""" +convertlevel(hvlevels, numvals...; kwargs...) = encodelevel(hvlevels, numvals...; kwargs...), decodelevel(hvlevels, numvals..., kwargs...) diff --git a/src/inference.jl b/src/inference.jl index c6c14cd..2cd50d6 100644 --- a/src/inference.jl +++ b/src/inference.jl @@ -14,47 +14,83 @@ sim_jacc(u::AbstractVector, v::AbstractVector) = dot(u, v) / sum(ui + vi - ui * dist_hamming(u::AbstractVector, v::AbstractVector) = sum(abs(ui - vi) for (ui, vi) in zip(u, v)) -similarity(x::BipolarHV, y::BipolarHV) = sim_cos(x, y) -similarity(x::TernaryHV, y::TernaryHV) = sim_cos(x, y) -similarity(x::GradedBipolarHV, y::GradedBipolarHV) = sim_cos(x, y) -similarity(x::RealHV, y::RealHV) = sim_cos(x, y) -similarity(x::BinaryHV, y::BinaryHV) = sim_jacc(x, y) -similarity(x::GradedHV, y::GradedHV) = sim_jacc(x, y) +similarity(u::BipolarHV, v::BipolarHV) = sim_cos(u, v) +similarity(u::TernaryHV, v::TernaryHV) = sim_cos(u, v) +similarity(u::GradedBipolarHV, v::GradedBipolarHV) = sim_cos(u, v) +similarity(u::RealHV, v::RealHV) = sim_cos(u, v) +similarity(u::BinaryHV, v::BinaryHV) = sim_jacc(u, v) +similarity(u::GradedHV, v::GradedHV) = sim_jacc(u, v) """ - similarity(x::AbstractVector, y::AbstractVector; method::Symbol) + similarity(u::AbstractVector, v::AbstractVector; method::Symbol) Computes similarity between two (hyper)vectors using a `method` ∈ `[:cosine, :jaccard, :hamming]`. When no method is given, a default is used (cosine for vectors that can have negative elements and Jaccard for those that only have positive elements). """ -function similarity(x::AbstractVector, y::AbstractVector; method::Symbol) - @assert length(x) == length(y) "Vectors have to be of the same length" +function similarity(u::AbstractVector, v::AbstractVector; method::Symbol) + @assert length(u) == length(v) "Vectors have to be of the same length" methods = [:cosine, :jaccard, :hamming] @assert method ∈ methods "`method` has to be one of $methods" if method == :cosine - return sim_cos(x, y) + return sim_cos(u, v) elseif method == :jaccard - return sim_jacc(x, y) + return sim_jacc(u, v) elseif method == :hamming - return length(x) - dist_hamming(x, y) + return length(u) - dist_hamming(u, v) end end -nearest_neighbor(x, collection; kwargs...) = +""" + similarity(hvs::AbstractVector{<:AbstractHV}; [method]) + +Computes the similarity matrix for a vector of hypervectors using +the similarity metrics defined by the pairwise version of `similarity`. +""" +function similarity(hvs::AbstractVector{<:AbstractHV}; kwargs...) + n = length(hvs) + S = zeros(n, n) + for i in 1:n + for j in i:n + S[i, j] = S[j, i] = similarity(hvs[i], hvs[j]; kwargs...) + end + end + return S +end + +""" + similarity(u::AbstractHV; [method]) + +Create a function that computes the similarity between its argument and `u`` +using `similarity`, i.e. a function equivalent to `v -> similarity(u, v)`. +""" +similarity(u::AbstractHV; kwargs...) = v -> similarity(u, v; kwargs...) + + +""" + δ(u::AbstractHV, v::AbstractHV; [method]) + δ(u::AbstractHV; [method]) + δ(hvs::AbstractVector{<:AbstractHV}; [method]) + +Alias for `similarity`. See `similarity` for the main documentation. +""" +δ = similarity + + +nearest_neighbor(u::AbstractHV, collection; kwargs...) = maximum( - (similarity(x, xi; kwargs...), i, xi) + (similarity(u, xi; kwargs...), i, xi) for (i, xi) in enumerate(collection) ) -nearest_neighbor(x, collection::Dict; kwargs...) = - maximum((similarity(x, xi; kwargs...), k, xi) for (k, xi) in collection) +nearest_neighbor(u::AbstractHV, collection::Dict; kwargs...) = + maximum((similarity(u, xi; kwargs...), k, xi) for (k, xi) in collection) """ - nearest_neighbor(x, collection[, k::Int]; kwargs...) + nearest_neighbor(u::AbstractHV, collection[, k::Int]; kwargs...) -Returns the element of `collection` that is most similar to `x`. +Returns the element of `collection` that is most similar to `u`. Function outputs `(τ, i, xi)` with `τ` the highest similarity value, `i` the index (or key if `collection` is a dictionary) of the closest @@ -64,17 +100,17 @@ for the similarity search. If a number `k` is given, the `k` closest neighbor are returned, as a sorted list of `(τ, i)`. """ -function nearest_neighbor(x, collection, k::Int; kwargs...) +function nearest_neighbor(u::AbstractHV, collection, k::Int; kwargs...) sims = [ - (similarity(x, xi; kwargs...), i) + (similarity(u, xi; kwargs...), i) for (i, xi) in enumerate(collection) ] return partialsort!(sims, 1:k, rev = true) end -function nearest_neighbor(x, collection::Dict, k::Int; kwargs...) +function nearest_neighbor(u::AbstractHV, collection::Dict, k::Int; kwargs...) sims = [ - (similarity(x, xi; kwargs...), i) + (similarity(u, xi; kwargs...), i) for (i, xi) in collection ] return partialsort!(sims, 1:k, rev = true) diff --git a/test/encoding.jl b/test/encoding.jl index 69bf5d9..c4f76f1 100644 --- a/test/encoding.jl +++ b/test/encoding.jl @@ -49,4 +49,18 @@ @test graph(hvs[s], hvs[t]; directed = true) == Bool.([1, 0, 0, 1, 0]) @test_throws AssertionError graph(hvs[s], hvs[[1, 2, 3]]) end + + @testset "levels" begin + numvals = 0:0.1:2pi + levels = level(BinaryHV(100), numvals) + + @test length(levels) == length(numvals) + @test eltype(levels) <: BinaryHV + + encoder, decoder = convertlevel(levels, numvals) + hv = encoder(1.467) + @test hv isa BinaryHV + x = decoder(hv) + @test 1 ≤ x ≤ 2 + end end diff --git a/test/inference.jl b/test/inference.jl index 733fd10..1d83c94 100644 --- a/test/inference.jl +++ b/test/inference.jl @@ -7,6 +7,7 @@ y = BinaryHV([false, false, false, true]) @test similarity(x, y) ≈ 1 / 3 ≈ sim_jacc(x.v, y.v) + @test similarity(x, y) == δ(x)(y) end @testset "GradedHV" begin @@ -14,7 +15,7 @@ y = GradedHV([0.9, 0.8, 0.1, 0.3]) @test similarity(x, y) ≈ sim_jacc(x.v, y.v) ≈ dot(x.v, y.v) / sum(xi + yi - xi * yi for (xi, yi) in zip(x, y)) - + @test similarity(x, y) == δ(x)(y) end @testset "BipolarHV" begin @@ -25,6 +26,7 @@ yd = collect(y) @test similarity(x, y) ≈ sim_cos(x, y) ≈ dot(xd, yd) / norm(xd) / norm(yd) + @test similarity(x, y) == δ(x)(y) end @testset "TernaryHV" begin @@ -35,6 +37,7 @@ yd = collect(y) @test similarity(x, y) ≈ sim_cos(x.v, y.v) ≈ dot(xd, yd) / norm(xd) / norm(yd) + @test similarity(x, y) == δ(x)(y) end @testset "GradedBipolarHV" begin @@ -45,6 +48,7 @@ yd = collect(y) @test similarity(x, y) ≈ sim_cos(x.v, y.v) ≈ dot(xd, yd) / norm(xd) / norm(yd) + @test similarity(x, y) == δ(x)(y) end @testset "RealHV" begin @@ -55,6 +59,15 @@ yd = collect(y) @test similarity(x, y) ≈ sim_cos(x.v, y.v) ≈ dot(xd, yd) / norm(xd) / norm(yd) + @test similarity(x, y) == δ(x)(y) + end + + @testset "Similarity matrix" begin + levels = level(RealHV(100), 10) + M = similarity(levels) + @test M isa Matrix + @test size(M) == (10, 10) + @test M ≈ M' end @testset "NN" begin