diff --git a/.gitignore b/.gitignore index b202fac..723dd62 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ /Manifest.toml -test/data \ No newline at end of file +test/data +docs/.quarto +docs/presentation_files \ No newline at end of file diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 670ab53..0370f03 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -1785,7 +1785,7 @@ uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d" version = "1.2.0" [[deps.PyramidScheme]] -deps = ["Colors", "DimensionalData", "DiskArrayEngine", "DiskArrayTools", "DiskArrays", "Extents", "FillArrays", "GeoInterface", "Makie", "MakieCore", "Observables", "OffsetArrays", "Proj", "Statistics", "WGLMakie", "YAXArrayBase", "YAXArrays", "Zarr"] +deps = ["Colors", "DimensionalData", "DiskArrayEngine", "DiskArrayTools", "DiskArrays", "Extents", "FillArrays", "GeoInterface", "Makie", "MakieCore", "Observables", "OffsetArrays", "Proj", "Statistics", "YAXArrayBase", "YAXArrays", "Zarr"] path = ".." uuid = "ec211b67-1c2c-4319-878f-eaee078ee145" version = "0.1.0" diff --git a/docs/juliacon.qmd b/docs/presentation.qmd similarity index 89% rename from docs/juliacon.qmd rename to docs/presentation.qmd index 99acaff..9051d04 100644 --- a/docs/juliacon.qmd +++ b/docs/presentation.qmd @@ -1,12 +1,23 @@ --- -engine: julia title: "PyramidScheme.jl \n interactively plotting large raster data" author: "Felix Cremer, Fabian Gans" format: revealjs +engine: julia execute: echo: true --- +## Why use pyramids? + +julia> agbdiffbase = agb2020 .- agb2017 + +╭───────────────────────────────────╮ + +│ 405000×157500 YAXArray{Float32,2} │ + +- julia> savecube(agbdiffbase, tempname()*".zarr") +Progress: 1%|▍ | ETA: 3:15:43 + ## What are pyramids? ```{julia} @@ -65,11 +76,12 @@ plotpyramid(europyr) ``` + ## Compute the pyramids for your data - In Memory data ```{julia} -#| eval: false +#| eval: false using Rasters using RasterDataSources ras = Raster(WorldClim{Elevation},:elev, res="30s", lazy=true) @@ -101,13 +113,13 @@ agb2020 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS- # Interactive visualisation ```{julia} -#| eval: false +#| eval: false plot(agb2020, colormap=:speed, colorscale=sqrt) ``` # Computations on Pyramids ```{julia} -#| eval: false +#| eval: false replacenan(data) = data <= 0 ? NaN32 : Float32(data) agb2020 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.zarr/"))) agb2017 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS-L4-AGB-MERGED-100m-2017-fv4.0.zarr/"))) @@ -118,16 +130,19 @@ plot(agbdiff, colormap=:bukavu, colorrange=(-200, 200)) # Outlook -- pyramids in multiple dimensions +- pyramids in multiple dimensions (WIP) - Understanding more formats +- Expose data as WMTS/WMS for online usage + - Integrate DiskArrayEngine computations - Open it up to more use cases +## Example of cloud access ```{julia} -#| eval:false +#| eval: false using YAXArrays, PyramidScheme using GLMakie import PyramidScheme as PS diff --git a/src/PyramidScheme.jl b/src/PyramidScheme.jl index 5969f82..f45f79d 100644 --- a/src/PyramidScheme.jl +++ b/src/PyramidScheme.jl @@ -45,8 +45,7 @@ struct Pyramid{T,N,D,A,B<:DD.AbstractDimArray{T,N,D,A},L, Me} <: DD.AbstractDimA end function Pyramid(data::DD.AbstractDimArray; resampling_method=mean ∘ skipmissing, kwargs...) - pyrdata, pyraxs = getpyramids(resampling_method, data; kwargs...) - levels = DD.DimArray.(pyrdata, pyraxs) + levels = getpyramids(resampling_method, data; kwargs...) meta = Dict(deepcopy(DD.metadata(data))) push!(meta, "resampling_method" => "mean_skipmissing") Pyramid(data, levels, meta) @@ -206,22 +205,63 @@ Fill the pyramids generated from the `data` with the aggregation function `func` `recursive` indicates whether higher tiles are computed from lower tiles or directly from the original data. This is an optimization which for functions like median might lead to misleading results. """ -function fill_pyramids(data, outputs,func,recursive;runner=LocalRunner, kwargs...) +function fill_pyramids(data, outputs,func,recursive;runner=LocalRunner, verbose=false, outtype=:mem, kwargs...) + t = typeof(func(zeros(eltype(data), 2,2))) + n_level = compute_nlevels(size(data)) + input_axes = pyramidedaxes(data) + nonpyramiddims = DD.otherdims(data, input_axes) + @show nonpyramiddims + if length(input_axes) != 2 + throw(ArgumentError("Expected two spatial dimensions got $input_axes")) + end + verbose && println("Constructing output arrays") + spatialsize = size(data)[collect(DD.dimnum(data, input_axes))] + pyramid_sizes = [ceil.(Int, spatialsize ./ 2^i) for i in 1:n_level] + allsizes = [spatialsize...,] + sizeperm = [DD.dimnum(data, input_axes)..., DD.dimnum(data, nonpyramiddims)...] + #permute!(allsizes, sizeperm) + @show allsizes + #outputs = if outtype == :zarr + # [output_zarr(n, input_axes, t, joinpath(path, string(n))) for n in 1:n_level] + #elseif outtype == :mem + # output_arrays(pyramid_sizes, t) + #else + # throw(ArgumentError("Output type not valid got $outtype, but expected :mem or :zarr")) + #end + + verbose && println("Start computation") n_level = length(outputs) + @show typeof(data) + @show n_level + @show size.(outputs) pixel_base_size = 2^n_level pyramid_sizes = size.(outputs) + # What is tmp_sizes supposed to be? + # Does this have to be tmp_sizes = [ceil(Int,pixel_base_size / 2^i) for i in 1:n_level] + windows = Any[Base.OneTo.(size(data))...] + #pseudocode + windows[DD.dimnum(data, XDim)] = RegularWindows(1,size(data, XDim),window=pixel_base_size) + windows[DD.dimnum(data, YDim)] = RegularWindows(1,size(data, YDim),window=pixel_base_size) +@show size.(windows) + + ia = InputArray(data;windows = Tuple(windows)) + # Replace + function outwindows(i, outputs, tmpsize) + outwins = Any[Base.OneTo.(size(data))...] + outwins[DD.dimnum(outputs[i], XDim)] = RegularWindows(1,size(outputs[i], XDim),window=tmpsize) + outwins[DD.dimnum(outputs[i], YDim)] = RegularWindows(1,size(outputs[i], YDim),window=tmpsize) + Tuple(outwins) + end - ia = InputArray(data, windows = arraywindows(size(data),pixel_base_size)) - - oa = ntuple(i->create_outwindows(pyramid_sizes[i],windows = arraywindows(pyramid_sizes[i],tmp_sizes[i])),n_level) + oa = ntuple(i->create_outwindows(pyramid_sizes[i],windows = outwindows(i,outputs, tmp_sizes[i])),n_level) func = DiskArrayEngine.create_userfunction(gen_pyr,ntuple(_->eltype(first(outputs)),length(outputs));is_mutating=true,kwargs = (;recursive),args = (func,)) op = GMDWop((ia,), oa, func) lr = DiskArrayEngine.optimize_loopranges(op,5e8,tol_low=0.2,tol_high=0.05,max_order=2) - r = runner(op,lr,outputs;kwargs...) + r = runner(op,lr,getproperty.(outputs, :data);kwargs...) run(r) end @@ -254,18 +294,21 @@ Construct a list of `RegularWindows` for the size list in `s` for windows `w`. ?? """ function arraywindows(s,w) - map(s) do l + @show s + @show w + windows = map(s) do l RegularWindows(1,l,window=w) end + windows end """ - compute_nlevels(data, tilesize=1024) + compute_nlevels(data, tilesize=256) -Compute the number of levels for the aggregation based on the size of `data`. +Compute the number of levels for the aggregation based on the tuple `datasize`. """ -compute_nlevels(data, tilesize=256) = max(0,ceil(Int,log2(maximum(size(data))/tilesize))) +compute_nlevels(datasize, tilesize=256) = max(0,ceil(Int,log2(maximum(datasize)/tilesize))) function agg_axis(d,n) # TODO this might be problematic for explicitly set axes @@ -284,7 +327,7 @@ function gen_output(t,s; path=tempname()) if outsize > 100e6 # This should be zgroup instead of zcreate, could use savedataset(skelton=true) # Dummy dataset with FillArrays with the shape of the pyramidlevel - zcreate(t,s...;path,chunks = (1024,1024),fill_value=zero(t)) + zcreate(t,s...;path,chunks=ntuple(i->min(256, s[i]), length(s)),fill_value=zero(t)) else zeros(t,s...) end @@ -295,9 +338,9 @@ function DiskArrayEngine.engine(dimarr::DD.AbstractDimArray) end DiskArrayEngine.engine(pyr::Pyramid) = Pyramid(engine(parent(pyr)), engine.(pyr.levels), DD.metadata(pyr)) """ - output_arrays(pyramid_sizes) + output_arrays(pyramid_sizes, T) -Create the output arrays for the given `pyramid_sizes` +Create the output arrays for the given `pyramid_sizes` with the element type T. """ output_arrays(pyramid_sizes, T) = [gen_output(T,p) for p in pyramid_sizes] @@ -307,6 +350,9 @@ Union of Dimensions which are assumed to be in space and are therefore used in t """ SpatialDim = Union{DD.Dimensions.XDim, DD.Dimensions.YDim} +pyramidedaxes(input) = filter(x-> x isa SpatialDim, DD.dims(input)) + + """ buildpyramids(path; resampling_method=mean) Build the pyramids for the zarr dataset at `path` and write the pyramid layers into the zarr folder. @@ -375,19 +421,23 @@ Compute the data of the pyramids of a given data cube `ras`. This returns the data of the pyramids and the dimension values of the aggregated axes. """ function getpyramids(reducefunc, ras;recursive=true) - input_axes = DD.dims(ras) - n_level = compute_nlevels(ras) + input_axes = pyramidedaxes(ras) + n_level = compute_nlevels(length.(input_axes)) if iszero(n_level) @info "Array is smaller than the tilesize no pyramids are computed" [ras], [dims(ras)] end - pyramid_sizes = [ceil.(Int, size(ras) ./ 2^i) for i in 1:n_level] - pyramid_axes = [agg_axis.(input_axes,2^i) for i in 1:n_level] + pyramid_axes = [map(x-> in(x, input_axes) ? agg_axis(x, 2^l) : x , DD.dims(ras)) for l in 1:n_level] + pyramid_sizes = size.(pyramid_axes) + @show pyramid_sizes + t = typeof(reducefunc(zeros(eltype(ras), 2,2))) - outmin = output_arrays(pyramid_sizes, Float32) - fill_pyramids(ras,outmin,reducefunc,recursive; threaded=true) + outmin = output_arrays(pyramid_sizes, t) + @show size.(outmin) + levels = DD.DimArray.(outmin, pyramid_axes) + fill_pyramids(ras,levels,reducefunc,recursive; threaded=true) - outmin, pyramid_axes + levels end """ diff --git a/test/runtests.jl b/test/runtests.jl index 114521b..790d79a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -45,9 +45,9 @@ end end @testitem "Helper functions" begin using PyramidScheme: PyramidScheme as PS - @test PS.compute_nlevels(zeros(1000)) == 2 - @test PS.compute_nlevels(zeros(1000,1025)) == 3 - @test PS.compute_nlevels(zeros(10000, 8000)) == 6 + @test PS.compute_nlevels((1000,)) == 2 + @test PS.compute_nlevels((1000,1025)) == 3 + @test PS.compute_nlevels((10000, 8000)) == 6 end @testitem "ArchGDAL Loading of Geotiff Overviews" begin @@ -123,3 +123,20 @@ end end =# +@testitem "Building pyramid with additional dimension in memory" begin + # The aim of this test is to check whether we can build a pyramid from a data cube with an extra dimension. + # We will only build the pyramids on the spatial dimensions and keep the other dimensions as is. + using YAXArrays + using Zarr + using PyramidScheme + using DimensionalData + orgpath = joinpath(@__DIR__, "data", "world.zarr") + path = tempname() * ".zarr" + cp(orgpath, path) + c = Cube(path) + @time "Building world pyramid" pyr = Pyramid(c) + @test pyr isa Pyramid + @test length(dims(pyr)) == 3 + @test size(pyr.levels[end]) == (256,128,3) + +end \ No newline at end of file