From cd1d2f3a5064172a7e74be196a1ff81766c5b9fa Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Fri, 4 Jul 2025 14:59:58 +0200 Subject: [PATCH 1/4] wip --- projects/RbQ10/Q10_lstm.jl | 88 +++++++++++++++++++++++++++++++ src/utils/slidingwindow_splits.jl | 10 ++++ 2 files changed, 98 insertions(+) create mode 100644 projects/RbQ10/Q10_lstm.jl create mode 100644 src/utils/slidingwindow_splits.jl diff --git a/projects/RbQ10/Q10_lstm.jl b/projects/RbQ10/Q10_lstm.jl new file mode 100644 index 00000000..58f89b71 --- /dev/null +++ b/projects/RbQ10/Q10_lstm.jl @@ -0,0 +1,88 @@ +# activate the project's environment and instantiate dependencies +using Pkg +Pkg.activate("projects/RbQ10") +Pkg.develop(path=pwd()) +Pkg.instantiate() + +# start using the package +using EasyHybrid +using EasyHybrid.MLUtils +using Random +# for Plotting +using GLMakie +using AlgebraOfGraphics + +script_dir = @__DIR__ +include(joinpath(script_dir, "data", "prec_process_data.jl")) + +# Common data preprocessing +df = dfall[!, Not(:timesteps)] +ds_keyed = to_keyedArray(Float32.(df)) + +target_names = [:R_soil] +forcing_names = [:cham_temp_filled] +predictor_names = [:moisture_filled, :rgpot2] + +# Define neural network +NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1)); +NN(rand(Float32, 2, 1)) + + +# instantiate Hybrid Model +RbQ10 = RespirationRbQ10(NN, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s +# train model +out = train(RbQ10, ds_keyed, (:Q10, ); nepochs=200, batchsize=512, opt=Adam(0.01)); + +## +output_file = joinpath(@__DIR__, "output_tmp/trained_model.jld2") +all_groups = get_all_groups(output_file) + +# c_lstm = LSTMCell(4 => 6) +# ps, st = Lux.setup(rng, c_lstm) +# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st) + +# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false) + +# ? Let's use `Recurrence` to stack LSTM cells and deal with sequences and batching at the same time! + +NN_Memory = Chain( + Recurrence(LSTMCell(2 => 6), return_sequence=true), + Recurrence(LSTMCell(6 => 2), return_sequence=false), + Dense(2 => 1) +) + + + + +ps, st = Lux.setup(rng, NN_Memory) +mock_data = rand(Float32, 2, 8, 5) #! n_features, n_timesteps (window size), n_samples (batch size) +y_, st_ = NN_Memory(mock_data, ps, st) + +RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s + +## legacy +# ? test lossfn +ps, st = LuxCore.setup(Random.default_rng(), RbQ10_Memory) +# the Tuple `ds_p, ds_t` is later used for batching in the `dataloader`. +ds_p_f, ds_t = EasyHybrid.prepare_data(RbQ10_Memory, ds_keyed) +ds_t_nan = .!isnan.(ds_t) + +ls = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss()) + +ls_logs = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss(train_mode=false)) + + + +function split_into_sequences(xin, y_target; window_size = 8) + features = size(xin)[1] + xdata = slidingwindow(xin, size = window_size, stride = 1) + # get the target values corresponding to the sliding windows, + # elements of `ydata` correspond to the last sliding window element. + ydata = y_target[window_size:length(xdata) + window_size - 1] + + xwindowed = zeros(Float32, ws, features, length(ydata)) + for i in 1:length(ydata) + xwindowed[:, :, i] = getobs(xdata, i)' + end + return xwindowed, ydata +end \ No newline at end of file diff --git a/src/utils/slidingwindow_splits.jl b/src/utils/slidingwindow_splits.jl new file mode 100644 index 00000000..a05996f6 --- /dev/null +++ b/src/utils/slidingwindow_splits.jl @@ -0,0 +1,10 @@ +function getWdata(xin, y_target; ws = 8) + features = size(xin)[1] + xdata = slidingwindow(xin, ws, stride = 1) + ydata = y_target[ws:length(xdata)+ws-1] + xwindowed = zeros(Float32, ws, features, length(ydata)) + for i in 1:length(ydata) + xwindowed[:, :, i] = getobs(xdata, i)' + end + return xwindowed, ydata +end \ No newline at end of file From 08048ebf25afedb736a751417852b2eefc96aa5c Mon Sep 17 00:00:00 2001 From: Lazaro Alonso Date: Mon, 7 Jul 2025 13:44:32 +0200 Subject: [PATCH 2/4] wip lstm --- projects/RbQ10/Q10_lstm.jl | 74 +++++++++++++++++++++----------- src/models/Respiration_Rb_Q10.jl | 21 ++++++--- 2 files changed, 66 insertions(+), 29 deletions(-) diff --git a/projects/RbQ10/Q10_lstm.jl b/projects/RbQ10/Q10_lstm.jl index 58f89b71..80d2ebb6 100644 --- a/projects/RbQ10/Q10_lstm.jl +++ b/projects/RbQ10/Q10_lstm.jl @@ -11,6 +11,23 @@ using Random # for Plotting using GLMakie using AlgebraOfGraphics +using EasyHybrid.AxisKeys +function split_into_sequences(xin, y_target; window_size = 8) + features = size(xin, 1) + xdata = slidingwindow(xin, size = window_size, stride = 1) + # get the target values corresponding to the sliding windows, + # elements of `ydata` correspond to the last sliding window element. + ydata = y_target[window_size:length(xdata) + window_size - 1] + + xwindowed = zeros(Float32, features, window_size, length(ydata)) + for i in 1:length(ydata) + xwindowed[:, :, i] = getobs(xdata, i) + end + xwindowed = KeyedArray(xwindowed; row=xin.row, window=1:window_size, col=1:length(ydata)) + ydata = KeyedArray(Array(ydata[:,:]'); row=ds_t.row, col=1:length(ydata)) + return xwindowed, ydata +end + script_dir = @__DIR__ include(joinpath(script_dir, "data", "prec_process_data.jl")) @@ -24,8 +41,8 @@ forcing_names = [:cham_temp_filled] predictor_names = [:moisture_filled, :rgpot2] # Define neural network -NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1)); -NN(rand(Float32, 2, 1)) +NN = Chain(Dense(2, 15, Lux.relu), Dense(15, 15, Lux.relu), Dense(15, 1)) +# NN(rand(Float32, 2,1)) #! needs to be instantiated # instantiate Hybrid Model @@ -37,12 +54,6 @@ out = train(RbQ10, ds_keyed, (:Q10, ); nepochs=200, batchsize=512, opt=Adam(0.01 output_file = joinpath(@__DIR__, "output_tmp/trained_model.jld2") all_groups = get_all_groups(output_file) -# c_lstm = LSTMCell(4 => 6) -# ps, st = Lux.setup(rng, c_lstm) -# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st) - -# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false) - # ? Let's use `Recurrence` to stack LSTM cells and deal with sequences and batching at the same time! NN_Memory = Chain( @@ -51,9 +62,12 @@ NN_Memory = Chain( Dense(2 => 1) ) +# c_lstm = LSTMCell(4 => 6) +# ps, st = Lux.setup(rng, c_lstm) +# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st) +# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false) - - +rng = Random.default_rng(1234) ps, st = Lux.setup(rng, NN_Memory) mock_data = rand(Float32, 2, 8, 5) #! n_features, n_timesteps (window size), n_samples (batch size) y_, st_ = NN_Memory(mock_data, ps, st) @@ -67,22 +81,34 @@ ps, st = LuxCore.setup(Random.default_rng(), RbQ10_Memory) ds_p_f, ds_t = EasyHybrid.prepare_data(RbQ10_Memory, ds_keyed) ds_t_nan = .!isnan.(ds_t) -ls = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss()) +# +xwindowed, ydata = split_into_sequences(ds_p_f, ds_t; window_size = 8) +ds_wt = ydata +ds_wt_nan = .!isnan.(ds_wt) +# xdata = slidingwindow(ds_p_f, size = 8, stride = 1) +# getobs(xdata, 1) +# split_test = rand(Float32, 3, 8, 5) +using EasyHybrid.AxisKeys +# A = KeyedArray(split_test; row=ds_p_f.row, window=1:8, col=1:5) -ls_logs = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss(train_mode=false)) +NN_Memory = Chain( + Recurrence(LSTMCell(2 => 6), return_sequence=true), + Recurrence(LSTMCell(6 => 2), return_sequence=false), + Dense(2 => 1) +) +RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s +#? this sets up initial ps for the hybrid model version +rng = Random.default_rng(1234) +ps, st = Lux.setup(rng, RbQ10_Memory) -function split_into_sequences(xin, y_target; window_size = 8) - features = size(xin)[1] - xdata = slidingwindow(xin, size = window_size, stride = 1) - # get the target values corresponding to the sliding windows, - # elements of `ydata` correspond to the last sliding window element. - ydata = y_target[window_size:length(xdata) + window_size - 1] +ls = EasyHybrid.lossfn(RbQ10_Memory, xwindowed, (ds_wt, ds_wt_nan), ps, st, LoggingLoss()) + +ls_logs = EasyHybrid.lossfn(RbQ10_Memory, xwindowed, (ds_wt, ds_wt_nan), ps, st, LoggingLoss(train_mode=false)) + + +# p = A(RbQ10_Memory.predictors) +# x = Array(A(RbQ10_Memory.forcing)) # don't propagate names after this +# Rb, st = LuxCore.apply(RbQ10_Memory.NN, p, ps.ps, st.st) - xwindowed = zeros(Float32, ws, features, length(ydata)) - for i in 1:length(ydata) - xwindowed[:, :, i] = getobs(xdata, i)' - end - return xwindowed, ydata -end \ No newline at end of file diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl index 16e95353..e9443275 100644 --- a/src/models/Respiration_Rb_Q10.jl +++ b/src/models/Respiration_Rb_Q10.jl @@ -51,13 +51,24 @@ ŷ (respiration rate) is computed as a function of the neural network output `R """ function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple) p = ds_k(hm.predictors) - x = Array(ds_k(hm.forcing)) # don't propagate names after this - - Rb, st = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) + x_forcing = ds_k(hm.forcing) + x = if hasproperty(x_forcing, :window) + last_idx = last(x_forcing.window) + @show last_idx # ! don't propagate names after this, hence the `Array` conversion + return Array(x_forcing[window=last_idx]) + else + return Array(x_forcing) + end + # x = Array(x_forcing) # don't propagate names after this + @show size(x) + # x = x[:, 8 , :] + Rb, st = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) + # @show size(Rb) + # @show size(x) + # @show size(p) #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc. R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable? return (; R_soil), (; Rb, st) -end - +end \ No newline at end of file From 1d5e9eb9dace69f19df854036c8d587bd4752b21 Mon Sep 17 00:00:00 2001 From: lalonso Date: Tue, 15 Jul 2025 16:17:27 +0100 Subject: [PATCH 3/4] update ignore --- .gitignore | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ae4f8f93..ccdfebf2 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,8 @@ build/ node_modules/ package-lock.json Manifest.toml -*_tmp \ No newline at end of file +*_tmp +*.pdf +*.pptx +*.nc +*.png \ No newline at end of file From c2da023cd487d20dcc47452cc9587d5a99f7d4e1 Mon Sep 17 00:00:00 2001 From: lalonso Date: Tue, 15 Jul 2025 16:36:41 +0100 Subject: [PATCH 4/4] back to normal --- src/models/Respiration_Rb_Q10.jl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl index 3b242174..d67cd3bd 100644 --- a/src/models/Respiration_Rb_Q10.jl +++ b/src/models/Respiration_Rb_Q10.jl @@ -51,17 +51,18 @@ ŷ (respiration rate) is computed as a function of the neural network output `R """ function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple) p = ds_k(hm.predictors) - x_forcing = ds_k(hm.forcing) - x = if hasproperty(x_forcing, :window) - last_idx = last(x_forcing.window) - @show last_idx # ! don't propagate names after this, hence the `Array` conversion - return Array(x_forcing[window=last_idx]) - else - return Array(x_forcing) - end + x_forcing = Array(ds_k(hm.forcing)) + x = x_forcing + # x = if hasproperty(x_forcing, :window) + # last_idx = last(x_forcing.window) + # @show last_idx # ! don't propagate names after this, hence the `Array` conversion + # return Array(x_forcing[window=last_idx]) + # else + # return Array(x_forcing) + # end - # x = Array(x_forcing) # don't propagate names after this - @show size(x) + # # x = Array(x_forcing) # don't propagate names after this + # @show size(x) # x = x[:, 8 , :] Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t)) # @show size(Rb)