From cd1d2f3a5064172a7e74be196a1ff81766c5b9fa Mon Sep 17 00:00:00 2001
From: Lazaro Alonso <lazarus.alon@gmail.com>
Date: Fri, 4 Jul 2025 14:59:58 +0200
Subject: [PATCH 1/4] wip

---
 projects/RbQ10/Q10_lstm.jl        | 88 +++++++++++++++++++++++++++++++
 src/utils/slidingwindow_splits.jl | 10 ++++
 2 files changed, 98 insertions(+)
 create mode 100644 projects/RbQ10/Q10_lstm.jl
 create mode 100644 src/utils/slidingwindow_splits.jl

diff --git a/projects/RbQ10/Q10_lstm.jl b/projects/RbQ10/Q10_lstm.jl
new file mode 100644
index 00000000..58f89b71
--- /dev/null
+++ b/projects/RbQ10/Q10_lstm.jl
@@ -0,0 +1,88 @@
+# activate the project's environment and instantiate dependencies
+using Pkg
+Pkg.activate("projects/RbQ10")
+Pkg.develop(path=pwd())
+Pkg.instantiate()
+
+# start using the package
+using EasyHybrid
+using EasyHybrid.MLUtils
+using Random
+# for Plotting
+using GLMakie
+using AlgebraOfGraphics
+
+script_dir = @__DIR__
+include(joinpath(script_dir, "data", "prec_process_data.jl"))
+
+# Common data preprocessing
+df = dfall[!, Not(:timesteps)]
+ds_keyed = to_keyedArray(Float32.(df))
+
+target_names = [:R_soil]
+forcing_names = [:cham_temp_filled]
+predictor_names = [:moisture_filled, :rgpot2]
+
+# Define neural network
+NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1));
+NN(rand(Float32, 2, 1))
+
+
+# instantiate Hybrid Model
+RbQ10 = RespirationRbQ10(NN, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s
+# train model
+out = train(RbQ10, ds_keyed, (:Q10, ); nepochs=200, batchsize=512, opt=Adam(0.01));
+
+##
+output_file = joinpath(@__DIR__, "output_tmp/trained_model.jld2")
+all_groups = get_all_groups(output_file)
+
+# c_lstm = LSTMCell(4 => 6)
+# ps, st = Lux.setup(rng, c_lstm)
+# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st)
+
+# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false)
+
+# ? Let's use `Recurrence` to stack LSTM cells and deal with sequences and batching at the same time!
+
+NN_Memory = Chain(
+    Recurrence(LSTMCell(2 => 6), return_sequence=true),
+    Recurrence(LSTMCell(6 => 2), return_sequence=false),
+    Dense(2 => 1)
+)
+
+
+
+
+ps, st = Lux.setup(rng, NN_Memory)
+mock_data = rand(Float32, 2, 8, 5) #! n_features, n_timesteps (window size), n_samples (batch size)
+y_, st_ = NN_Memory(mock_data, ps, st)
+
+RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s
+
+## legacy
+# ? test lossfn
+ps, st = LuxCore.setup(Random.default_rng(), RbQ10_Memory)
+# the Tuple `ds_p, ds_t` is later used for batching in the `dataloader`.
+ds_p_f, ds_t = EasyHybrid.prepare_data(RbQ10_Memory, ds_keyed)
+ds_t_nan = .!isnan.(ds_t)
+
+ls = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss())
+
+ls_logs = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss(train_mode=false))
+
+
+
+function split_into_sequences(xin, y_target; window_size = 8)
+    features = size(xin)[1]
+    xdata = slidingwindow(xin, size = window_size, stride = 1)
+    # get the target values corresponding to the sliding windows,
+    # elements of `ydata` correspond to the last sliding window element.
+    ydata = y_target[window_size:length(xdata) + window_size - 1]
+
+    xwindowed = zeros(Float32, ws, features, length(ydata))
+    for i in 1:length(ydata)
+        xwindowed[:, :, i] = getobs(xdata, i)'
+    end
+    return xwindowed, ydata
+end
\ No newline at end of file
diff --git a/src/utils/slidingwindow_splits.jl b/src/utils/slidingwindow_splits.jl
new file mode 100644
index 00000000..a05996f6
--- /dev/null
+++ b/src/utils/slidingwindow_splits.jl
@@ -0,0 +1,10 @@
+function getWdata(xin, y_target; ws = 8)
+    features = size(xin)[1]
+    xdata = slidingwindow(xin, ws, stride = 1)
+    ydata = y_target[ws:length(xdata)+ws-1]
+    xwindowed = zeros(Float32, ws, features, length(ydata))
+    for i in 1:length(ydata)
+        xwindowed[:, :, i] = getobs(xdata, i)'
+    end
+    return xwindowed, ydata
+end
\ No newline at end of file

From 08048ebf25afedb736a751417852b2eefc96aa5c Mon Sep 17 00:00:00 2001
From: Lazaro Alonso <lazarus.alon@gmail.com>
Date: Mon, 7 Jul 2025 13:44:32 +0200
Subject: [PATCH 2/4] wip lstm

---
 projects/RbQ10/Q10_lstm.jl       | 74 +++++++++++++++++++++-----------
 src/models/Respiration_Rb_Q10.jl | 21 ++++++---
 2 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/projects/RbQ10/Q10_lstm.jl b/projects/RbQ10/Q10_lstm.jl
index 58f89b71..80d2ebb6 100644
--- a/projects/RbQ10/Q10_lstm.jl
+++ b/projects/RbQ10/Q10_lstm.jl
@@ -11,6 +11,23 @@ using Random
 # for Plotting
 using GLMakie
 using AlgebraOfGraphics
+using EasyHybrid.AxisKeys
+function split_into_sequences(xin, y_target; window_size = 8)
+    features = size(xin, 1)
+    xdata = slidingwindow(xin, size = window_size, stride = 1)
+    # get the target values corresponding to the sliding windows,
+    # elements of `ydata` correspond to the last sliding window element.
+    ydata = y_target[window_size:length(xdata) + window_size - 1]
+
+    xwindowed = zeros(Float32, features, window_size, length(ydata))
+    for i in 1:length(ydata)
+        xwindowed[:, :, i] = getobs(xdata, i)
+    end
+    xwindowed = KeyedArray(xwindowed; row=xin.row, window=1:window_size, col=1:length(ydata))
+    ydata = KeyedArray(Array(ydata[:,:]'); row=ds_t.row, col=1:length(ydata))
+    return xwindowed, ydata
+end
+
 
 script_dir = @__DIR__
 include(joinpath(script_dir, "data", "prec_process_data.jl"))
@@ -24,8 +41,8 @@ forcing_names = [:cham_temp_filled]
 predictor_names = [:moisture_filled, :rgpot2]
 
 # Define neural network
-NN = Chain(Dense(2, 15, relu), Dense(15, 15, relu), Dense(15, 1));
-NN(rand(Float32, 2, 1))
+NN = Chain(Dense(2, 15, Lux.relu), Dense(15, 15, Lux.relu), Dense(15, 1))
+# NN(rand(Float32, 2,1)) #! needs to be instantiated
 
 
 # instantiate Hybrid Model
@@ -37,12 +54,6 @@ out = train(RbQ10, ds_keyed, (:Q10, ); nepochs=200, batchsize=512, opt=Adam(0.01
 output_file = joinpath(@__DIR__, "output_tmp/trained_model.jld2")
 all_groups = get_all_groups(output_file)
 
-# c_lstm = LSTMCell(4 => 6)
-# ps, st = Lux.setup(rng, c_lstm)
-# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st)
-
-# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false)
-
 # ? Let's use `Recurrence` to stack LSTM cells and deal with sequences and batching at the same time!
 
 NN_Memory = Chain(
@@ -51,9 +62,12 @@ NN_Memory = Chain(
     Dense(2 => 1)
 )
 
+# c_lstm = LSTMCell(4 => 6)
+# ps, st = Lux.setup(rng, c_lstm)
+# (y, carry), st_lstm = c_lstm(rand(Float32, 4), ps, st)
+# m_lstm = Recurrence(LSTMCell(4 => 6), return_sequence=false)
 
-
-
+rng = Random.default_rng(1234)
 ps, st = Lux.setup(rng, NN_Memory)
 mock_data = rand(Float32, 2, 8, 5) #! n_features, n_timesteps (window size), n_samples (batch size)
 y_, st_ = NN_Memory(mock_data, ps, st)
@@ -67,22 +81,34 @@ ps, st = LuxCore.setup(Random.default_rng(), RbQ10_Memory)
 ds_p_f, ds_t = EasyHybrid.prepare_data(RbQ10_Memory, ds_keyed)
 ds_t_nan = .!isnan.(ds_t)
 
-ls = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss())
+# 
+xwindowed, ydata = split_into_sequences(ds_p_f, ds_t; window_size = 8)
+ds_wt = ydata
+ds_wt_nan = .!isnan.(ds_wt)
+# xdata = slidingwindow(ds_p_f, size = 8, stride = 1)
+# getobs(xdata, 1)
+# split_test = rand(Float32, 3, 8, 5)
+using EasyHybrid.AxisKeys
+# A = KeyedArray(split_test; row=ds_p_f.row, window=1:8, col=1:5)
 
-ls_logs = EasyHybrid.lossfn(RbQ10_Memory, ds_p_f, (ds_t, ds_t_nan), ps, st, LoggingLoss(train_mode=false))
+NN_Memory = Chain(
+    Recurrence(LSTMCell(2 => 6), return_sequence=true),
+    Recurrence(LSTMCell(6 => 2), return_sequence=false),
+    Dense(2 => 1)
+)
 
+RbQ10_Memory = RespirationRbQ10(NN_Memory, predictor_names, target_names, forcing_names, 2.5f0) # ? do different initial Q10s
 
+#? this sets up initial ps for the hybrid model version
+rng = Random.default_rng(1234)
+ps, st = Lux.setup(rng, RbQ10_Memory)
 
-function split_into_sequences(xin, y_target; window_size = 8)
-    features = size(xin)[1]
-    xdata = slidingwindow(xin, size = window_size, stride = 1)
-    # get the target values corresponding to the sliding windows,
-    # elements of `ydata` correspond to the last sliding window element.
-    ydata = y_target[window_size:length(xdata) + window_size - 1]
+ls = EasyHybrid.lossfn(RbQ10_Memory, xwindowed, (ds_wt, ds_wt_nan), ps, st, LoggingLoss())
+
+ls_logs = EasyHybrid.lossfn(RbQ10_Memory, xwindowed, (ds_wt, ds_wt_nan), ps, st, LoggingLoss(train_mode=false))
+
+
+# p = A(RbQ10_Memory.predictors)
+# x = Array(A(RbQ10_Memory.forcing)) # don't propagate names after this
+# Rb, st = LuxCore.apply(RbQ10_Memory.NN, p, ps.ps, st.st)
 
-    xwindowed = zeros(Float32, ws, features, length(ydata))
-    for i in 1:length(ydata)
-        xwindowed[:, :, i] = getobs(xdata, i)'
-    end
-    return xwindowed, ydata
-end
\ No newline at end of file
diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl
index 16e95353..e9443275 100644
--- a/src/models/Respiration_Rb_Q10.jl
+++ b/src/models/Respiration_Rb_Q10.jl
@@ -51,13 +51,24 @@ ŷ (respiration rate) is computed as a function of the neural network output `R
 """
 function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple)
     p = ds_k(hm.predictors)
-    x = Array(ds_k(hm.forcing)) # don't propagate names after this
-    
-    Rb, st = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t))
+    x_forcing = ds_k(hm.forcing)
+    x = if hasproperty(x_forcing, :window)
+        last_idx = last(x_forcing.window)
+        @show last_idx # ! don't propagate names after this, hence the `Array` conversion
+        return Array(x_forcing[window=last_idx])
+    else
+        return Array(x_forcing)
+    end
 
+    # x = Array(x_forcing) # don't propagate names after this
+    @show size(x)
+    # x = x[:, 8 , :]
+    Rb, st = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t))
+    # @show size(Rb)
+    # @show size(x)
+    # @show size(p)
     #TODO output name flexible - could be R_soil, heterotrophic, autotrophic, etc.
     R_soil = mRbQ10(Rb, ps.Q10, x, 15.0f0) # ? should 15°C be the reference temperature also an input variable?
 
     return (; R_soil), (; Rb, st)
-end
-
+end
\ No newline at end of file

From 1d5e9eb9dace69f19df854036c8d587bd4752b21 Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Tue, 15 Jul 2025 16:17:27 +0100
Subject: [PATCH 3/4] update ignore

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index ae4f8f93..ccdfebf2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,8 @@ build/
 node_modules/
 package-lock.json
 Manifest.toml
-*_tmp
\ No newline at end of file
+*_tmp
+*.pdf
+*.pptx
+*.nc
+*.png
\ No newline at end of file

From c2da023cd487d20dcc47452cc9587d5a99f7d4e1 Mon Sep 17 00:00:00 2001
From: lalonso <lalonso@bgc-jena.mpg.de>
Date: Tue, 15 Jul 2025 16:36:41 +0100
Subject: [PATCH 4/4] back to normal

---
 src/models/Respiration_Rb_Q10.jl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/models/Respiration_Rb_Q10.jl b/src/models/Respiration_Rb_Q10.jl
index 3b242174..d67cd3bd 100644
--- a/src/models/Respiration_Rb_Q10.jl
+++ b/src/models/Respiration_Rb_Q10.jl
@@ -51,17 +51,18 @@ ŷ (respiration rate) is computed as a function of the neural network output `R
 """
 function (hm::RespirationRbQ10)(ds_k, ps, st::NamedTuple)
     p = ds_k(hm.predictors)
-    x_forcing = ds_k(hm.forcing)
-    x = if hasproperty(x_forcing, :window)
-        last_idx = last(x_forcing.window)
-        @show last_idx # ! don't propagate names after this, hence the `Array` conversion
-        return Array(x_forcing[window=last_idx])
-    else
-        return Array(x_forcing)
-    end
+    x_forcing = Array(ds_k(hm.forcing))
+    x = x_forcing
+    # x = if hasproperty(x_forcing, :window)
+    #     last_idx = last(x_forcing.window)
+    #     @show last_idx # ! don't propagate names after this, hence the `Array` conversion
+    #     return Array(x_forcing[window=last_idx])
+    # else
+    #     return Array(x_forcing)
+    # end
 
-    # x = Array(x_forcing) # don't propagate names after this
-    @show size(x)
+    # # x = Array(x_forcing) # don't propagate names after this
+    # @show size(x)
     # x = x[:, 8 , :]
     Rb, stQ10 = LuxCore.apply(hm.NN, p, ps.ps, st.st) #! NN(αᵢ(t)) ≡ Rb(T(t), M(t))
     # @show size(Rb)