diff --git a/src/RareEvents.jl b/src/RareEvents.jl index 3d9cb1a..2a8d4a4 100644 --- a/src/RareEvents.jl +++ b/src/RareEvents.jl @@ -107,31 +107,16 @@ function RareEventSampler{FT}( return RareEventSampler{FT,typeof(ensemble),typeof(evolve_single_trajectory)}(args...) end -""" - score(res::RareEventSampler, trajectory) -Scores the trajectory based on the `metric` function -and the parameter `k` chosen by the user, as -`` -e^[∫k metric(trajectory) dt] -`` -""" -function score(res::RareEventSampler, trajectory) - k = res.k - dt = res.dt - integrand = map(res.metric, trajectory) - return exp(k*sum(integrand[2:end]+integrand[1:end-1])/2.0*dt) -end - """ run!(sim::RareEventSampler) Runs the rare event algorithm according to the properties of `sim`. """ function run!(sim::RareEventSampler) # Preallocation - u1 = deepcopy([sim.ensemble[k][1] for k in 1:sim.nensemble]) scores = zeros(sim.nensemble) ncopies = ones(Int, sim.nensemble) - + ids = Array(1:1:sim.nensemble) + t_start = sim.tspan[1] t_integration = sim.tspan[2] - t_start nresample = Int(div(t_integration, sim.τ, RoundUp)) @@ -144,21 +129,35 @@ function run!(sim::RareEventSampler) t2 = t_start+sim.τ i1 = 1 i2 = 1+1*nsteps_per_resample + cloned_id_set = Array(1:1:sim.nensemble) + + # dimensionality of state + d = length(sim.ensemble[1][1]) map(0:(nresample-1)) do i - # can be done per worker - if i > 0 - sample_and_rewrite_history!(sim.ensemble, ncopies, i1, rng) - perturb_trajectories!(u1, sim.ensemble, sim.nensemble, i1, sim.ϵ) - end - trajectories = integrate(sim, u1,t1,t2) - score_trajectories!(sim, scores, trajectories, i1, i2) - - + for k in 1:sim.nensemble + # "rewrite history" + # Note that if i = 0, cloned_id_set is such that no trajectory is rewritten. + if cloned_id_set[k] != k + idx_clone = cloned_id_set[k] + sim.ensemble[k][1:i1] .= sim.ensemble[idx_clone][1:i1] + end + # Perturb + # Note: We don't need to do this at i = 0, but it doesn't matter. + perturbed_ic = sim.ensemble[k][i1] + randn(rng, d)*sim.ϵ + # Integrate + trajectory = sim.evolve_single_trajectory((t1,t2))(perturbed_ic) + # Store + sim.ensemble[k][i1:i2] .= trajectory + # Score + scores[k] = score(trajectory, sim.k, sim.dt, sim.metric) + end + # Central sim.weight_norm[i+1] = mean(scores) compute_ncopies!(ncopies, scores, sim.weight_norm[i+1], sim.nensemble, rng) - + sample_ids!(cloned_id_set, ids, ncopies, rng) + # Increment resample index i = i+1 # Update start and end points of integrations @@ -169,113 +168,75 @@ function run!(sim::RareEventSampler) end end - -function integrate(sim::RareEventSampler, u_prev,t_prev,t_curr) - return pmap(sim.evolve_single_trajectory((t_prev,t_curr)),u_prev;distributed = sim.distributed) +""" + score(trajectory, k, dt, metric) +Scores the trajectory based on the `metric` function +and the parameter `k` chosen by the user, as +`` +e^[∫k metric(trajectory) dt] +`` +""" +function score(trajectory, k, dt, metric) + integrand = map(metric, trajectory) + return exp(k*sum(integrand[2:end]+integrand[1:end-1])/2.0*dt) end -function score_trajectories!(sim::RareEventSampler, scores, trajectories, idx_prev::Int,idx_current::Int) - for k in 1:sim.nensemble - sim.ensemble[k][idx_prev:idx_current] .= trajectories[k] - scores[k] = score(sim,trajectories[k]) - end - nothing -end +""" + compute_ncopies!(ncopies::Vector{Int},scores::Vector{Real}, norm::Real, nensemble::Int, rng) -function compute_ncopies!(ncopies::Array,scores::Array, norm, nensemble::Int, rng) + Computes an integer-valued array with elements equal to the number of copies of the ensemble +members needed to perform the resampling step; updates `ncopies` in place. +""" +function compute_ncopies!(ncopies::Vector{Int},scores::Vector{FT}, norm::FT, nensemble::Int, rng) where {FT <: Real} weights = scores ./ norm ncopies .= Int.(floor.(weights .+ rand(rng, nensemble))) nothing end -function sample_ids(ensemble::Vector, frequencies::Array, rng) +""" + sample_ids!(mapped_id_set::Vector{Int}, ids::Vector{Int}, frequencies::Vector{Real}, rng) + +Update `mapped_id_set` in place, by sampling from `ids` with frequency `frequencies`. + +Most of the effort is in ensuring that the returned `mapped_id_set` equals `ids` in the +locations where a member is sampled once. For example, if `ids` = [1, 2, 3, 4, 5]` and +frequencies = [2, 2, 0, 0, 1], we want mapped_id_set = [1, 2, 1, 2, 5] or [1, 2, 2, 1, 5]. +That is, the order of the returned mapped_id_set matters, because we only want to rewrite + elements of the ensemble where mapped_id_set .!= ids. + +If we decide to not rewrite trajectories as the algorithm proceeds, and just do it at the end, +this is not required, I think. +""" +function sample_ids!(mapped_id_set::Vector{Int}, ids::Vector{Int}, frequencies::Vector{Int}, rng) nensemble = length(frequencies) - ids = Array(1:1:nensemble) ncopies = sum(frequencies) # First make a list with id[k] repeated frequencies[k] times copied_id_set = shuffle!(rng, vcat([repeat([k], frequencies[k]) for k in 1:nensemble]...)) if ncopies < nensemble ids_chosen = vcat(copied_id_set, copied_id_set[1:nensemble-ncopies]) # Not quite the same as sampling with replacement else - ids_chosen = copied_id_set[1:nensemble] # should be the same as sampling w/o replacement + ids_chosen = copied_id_set[1:nensemble] # the same as sampling w/o replacement end - # be aware!! this is very sensitive. - # For example, this does not return the same distribution -# if sum(frequencies) < nensemble -# ids_chosen = sample(copied_id_set, nensemble) -# else -# ids_chosen = sample(copied_id_set, nensemble, replace = false) -# end - - # Now we have the set of ids we want to carry on with. Within this set, # we may not have nsenemble unique members; some ids appear more than once. # Determine which ids of the original set are cut, and which require ADDITIONAL copies (which must be cloned) - chosen_ncopies = counts(ids_chosen, nensemble) - ids_to_be_cut = ids[chosen_ncopies .== 0] - ids_to_be_cloned = ids[chosen_ncopies .> 1] + n_appearances = StatsBase.counts(ids_chosen, nensemble) + ids_to_be_cut = ids[n_appearances .== 0] + ids_to_be_cloned = ids[n_appearances .> 1] # Determine how many clones to make of those that need them - nclones = chosen_ncopies[ids_to_be_cloned] .- 1 + n_clones = n_appearances[ids_to_be_cloned] .- 1 # Create a list where the cloned ids appear as many times as they are cloned. # By construction, this has the same length as ids_to_be_cut. - cloned_id_set = vcat([repeat([ids_to_be_cloned[k]], nclones[k]) for k in 1:length(ids_to_be_cloned)]...) - return ids_to_be_cut, cloned_id_set -end - - -function rewrite_history!(ensemble::Vector, ids_to_be_cut::Array, cloned_id_set::Array, idx_current::Int) - for j in 1:length(ids_to_be_cut) - idx_cut = ids_to_be_cut[j] - idx_clone = cloned_id_set[j] - ensemble[idx_cut][1:idx_current] .= ensemble[idx_clone][1:idx_current] - end - nothing -end - - -function sample_and_rewrite_history!(ensemble::Vector, frequencies::Array, idx_current::Int, rng) - ids_to_be_cut, cloned_id_set = sample_ids(ensemble, frequencies, rng) - rewrite_history!(ensemble, ids_to_be_cut, cloned_id_set, idx_current) - nothing -end - - -function perturb_trajectories!(u_current::Array, ensemble::Vector, nensemble::Int, idx_current::Int, ϵ::Real) - d = length(u_current[1]) - for j in 1:nensemble - u_current[j] = ensemble[j][idx_current] + randn(d)*ϵ - end - nothing + cloned_id_set = vcat([repeat([ids_to_be_cloned[k]], n_clones[k]) for k in 1:length(ids_to_be_cloned)]...) + mapped_id_set .= ids + mapped_id_set[ids_to_be_cut] .= cloned_id_set + return mapped_id_set end include("utils.jl") include("gev_utils.jl") - -# I'm keeping this around for now; we use it in tests. -function orig_sample_and_rewrite_history!(ensemble::Vector, frequencies::Array, idx_current::Int,rng) - N = length(frequencies) - N_c = sum(frequencies) - - historical_trajectories = [[ensemble[k][1:idx_current],] for k in 1:N] - - copies = shuffle!(rng, vcat(repeat.(historical_trajectories, frequencies)...)) - - if N_c > N - for k in 1:N - ensemble[k][1:idx_current] .= copies[k] - end - else - for k in 1:N_c - ensemble[k][1:idx_current] .= copies[k] - if k+N_c <= N # start over at the beginning if sum(copies) 1] + ids_left_alone = ids[n_appearances .== 1] + # Determine how many clones to make of those that need them + n_clones = n_appearances[ids_to_be_cloned] .- 1 + @test cloned_ids[ids_left_alone] == ids[ids_left_alone] + @test sum(cloned_ids[ids_to_be_cut] .== ids[ids_to_be_cut]) == 0 + final_counts = StatsBase.counts(cloned_ids, nensemble) + @test ids_to_be_cloned == ids[final_counts .> 1] + @test final_counts[final_counts .> 1] .- 1 == n_clones end -@testset "ncopies = 1 -> no change" begin +@testset "sample_ids!: ncopies = 1 limiting case" begin rng = MersenneTwister(1234); - ensemble = [ones(nsteps)*k for k in 1:nensemble] - orig_ensemble = copy(ensemble) ncopies = Int64.(ones(nensemble)) - sample_and_rewrite_history!(ensemble, ncopies, idx_current, rng) - diff = sort(vcat(ensemble...)) .== sort(vcat(orig_ensemble...)) - @test sum(diff) .== nensemble + ids = Array(1:1:nensemble) + cloned_ids = similar(ids) + sample_ids!(cloned_ids, ids, ncopies, rng) + @test cloned_ids == ids end -@testset "ncopies" begin +@testset "ncopies unit test" begin rng= MersenneTwister(1); nensemble = 10 draw = rand(rng, nensemble) - ncopies = ones(nensemble) - scores = Array(1:1:nensemble) + ncopies = Int.(ones(nensemble)) + scores = Array(1.0:1:nensemble) expected_ncopies = Int.(floor.(scores ./mean(scores) .+ draw)) rng= MersenneTwister(1); - compute_ncopies!(ncopies, scores,mean(scores), nensemble, rng) + compute_ncopies!(ncopies, scores, mean(scores), nensemble, rng) similar = ncopies .== expected_ncopies @test sum(similar) == nensemble end diff --git a/test/test_loop.jl b/test/test_loop.jl deleted file mode 100644 index fd33143..0000000 --- a/test/test_loop.jl +++ /dev/null @@ -1,63 +0,0 @@ -using Test -using RareEvents: sample_and_rewrite_history! -using Random - -function original_loop!(ensemble, nresample, nsteps_per_resample, nensemble,rng) - map(0:(nresample-1)) do i - i1 = 1+(i)*nsteps_per_resample - i2 = 1+(i+1)*nsteps_per_resample - - trajectories = [randn(rng, i2-i1+1) for _ in 1:nensemble] - for k in 1:nensemble - ensemble[k][i1:i2] .= trajectories[k] - end - - ncopies = Int64.(round.(rand(rng,nensemble))) .+1 - sample_and_rewrite_history!(ensemble, ncopies, i2, rng) - i = i+1 - end - nothing -end - -function new_loop!(ensemble, nresample, nsteps_per_resample, nensemble,rng) - i1 = 1 - i2 = 1+1*nsteps_per_resample - ncopies = Int64.(zeros(nensemble)) - map(0:(nresample-1)) do i - # can be done per worker - if i > 0 - sample_and_rewrite_history!(ensemble, ncopies, i1, rng) - end - trajectories = [randn(rng, i2-i1+1) for _ in 1:nensemble] - for k in 1:nensemble - ensemble[k][i1:i2] .= trajectories[k] - end - - # Central - ncopies .= Int64.(round.(rand(rng,nensemble))) .+1 - # Increment resample index - i = i+1 - # Update start and end points of integrations - i1 = i2 - i2 = 1+(i+1)*nsteps_per_resample - end - nothing -end - -@testset "Comparing loops" begin - nsteps_per_resample = 1 - nresample = 100 - nensemble = 10 - u1 = zeros(nensemble) - - rng = MersenneTwister(5) - ensemble = [similar(copy(zeros(nresample+1))) for _ in 1:nensemble] - - original_loop!(ensemble, nresample, nsteps_per_resample, nensemble,rng) - - rng = MersenneTwister(5) - ensemble2 = [similar(copy(zeros(nresample+1))) for _ in 1:nensemble] - - new_loop!(ensemble2, nresample, nsteps_per_resample, nensemble,rng) - @test ensemble == ensemble2 -end