diff --git a/opencog/atoms/aten/TensorEquation.cc b/opencog/atoms/aten/TensorEquation.cc index 6222011..6125db5 100644 --- a/opencog/atoms/aten/TensorEquation.cc +++ b/opencog/atoms/aten/TensorEquation.cc @@ -605,6 +605,15 @@ ATenValuePtr TensorEquation::execute( { result = apply_nonlinearity(result, Nonlinearity::THRESHOLD); } + // For Hybrid mode: apply sigmoid to squash to [0,1] if no nonlinearity + // is set, then threshold to produce binary output. The straight-through + // estimator in backward() allows gradients to flow through the threshold. + else if (_mode == ReasoningMode::HYBRID) + { + if (_nonlinearity == Nonlinearity::NONE) + result = apply_nonlinearity(result, Nonlinearity::SIGMOID); + result = apply_nonlinearity(result, Nonlinearity::THRESHOLD); + } return result; } @@ -863,6 +872,7 @@ TensorProgram::TensorProgram(const std::string& name, ReasoningMode mode) _convergence_threshold(1e-6), _learning_rate(0.01), _track_gradients(false), + _grad_clip(0.0), _forward_count(0), _backward_count(0) { @@ -1181,6 +1191,21 @@ void TensorProgram::backward(const std::string& output_name, } } +// Clip gradient vector in-place so its L2-norm does not exceed max_norm. +// No-op when max_norm <= 0. +static void clip_gradient_vector(std::vector& g_vec, double max_norm) +{ + if (max_norm <= 0.0) return; + double norm = 0.0; + for (double v : g_vec) norm += v * v; + norm = std::sqrt(norm); + if (norm > max_norm) + { + double scale = max_norm / norm; + for (double& v : g_vec) v *= scale; + } +} + void TensorProgram::update_parameters() { for (auto& eq : _equations) @@ -1194,6 +1219,8 @@ void TensorProgram::update_parameters() auto g_vec = eq->weight_grad()->to_vector(); auto w_shape = eq->weight()->shape(); + clip_gradient_vector(g_vec, _grad_clip); + for (size_t i = 0; i < w_vec.size() && i < g_vec.size(); i++) w_vec[i] -= _learning_rate * g_vec[i]; @@ -1207,6 +1234,8 @@ void TensorProgram::update_parameters() auto g_vec = eq->bias_grad()->to_vector(); auto b_shape = eq->bias()->shape(); + clip_gradient_vector(g_vec, _grad_clip); + for (size_t i = 0; i < b_vec.size() && i < g_vec.size(); i++) b_vec[i] -= _learning_rate * g_vec[i]; @@ -1227,6 +1256,7 @@ double TensorProgram::train( size_t epochs) { double final_loss = 0.0; + _loss_history.clear(); for (size_t epoch = 0; epoch < epochs; epoch++) { @@ -1237,13 +1267,18 @@ double TensorProgram::train( // Forward pass forward_to_fixpoint(); - // Compute loss + // Compute loss with current (pre-update) parameters and record it. + // Recording before update follows standard ML convention: the logged + // value is the loss the model had entering this epoch. final_loss = 0.0; for (const auto& [name, target] : targets) { final_loss += compute_loss(name, target); } + // Record loss for this epoch + _loss_history.push_back(final_loss); + // Backward pass (simplified) for (const auto& [name, target] : targets) { diff --git a/opencog/atoms/aten/TensorEquation.h b/opencog/atoms/aten/TensorEquation.h index 163b0bd..47c4cf4 100644 --- a/opencog/atoms/aten/TensorEquation.h +++ b/opencog/atoms/aten/TensorEquation.h @@ -302,10 +302,14 @@ class TensorProgram // For learning double _learning_rate; bool _track_gradients; + double _grad_clip; // Gradient clipping threshold (0 = disabled) // Accumulated tensor gradients (keyed by tensor name) std::map _tensor_grads; + // Training history + std::vector _loss_history; + // Statistics size_t _forward_count; size_t _backward_count; @@ -325,6 +329,20 @@ class TensorProgram ReasoningMode mode() const { return _mode; } void set_mode(ReasoningMode m) { _mode = m; } + /** + * Set maximum number of iterations for forward_to_fixpoint(). + */ + void set_max_iterations(size_t n) { _max_iterations = n; } + size_t max_iterations() const { return _max_iterations; } + + /** + * Set convergence threshold for forward_to_fixpoint(). + * Iteration stops when the maximum absolute change in any + * derived tensor value falls below this threshold. + */ + void set_convergence_threshold(double t) { _convergence_threshold = t; } + double convergence_threshold() const { return _convergence_threshold; } + // ======================================== // Fact Management @@ -455,6 +473,19 @@ class TensorProgram void set_track_gradients(bool t) { _track_gradients = t; } bool track_gradients() const { return _track_gradients; } + /** + * Set gradient clipping threshold. + * Gradients with L2-norm exceeding this value are scaled down. + * Set to 0 (default) to disable clipping. + */ + void set_grad_clip(double clip) { _grad_clip = clip; } + double grad_clip() const { return _grad_clip; } + + /** + * Return per-epoch loss values recorded by train(). + */ + const std::vector& loss_history() const { return _loss_history; } + /** * Compute loss between derived and target tensors. */ diff --git a/tests/atoms/aten/TensorLogicUTest.cxxtest b/tests/atoms/aten/TensorLogicUTest.cxxtest index 4e00832..f790741 100644 --- a/tests/atoms/aten/TensorLogicUTest.cxxtest +++ b/tests/atoms/aten/TensorLogicUTest.cxxtest @@ -1872,4 +1872,230 @@ public: // Loss should have decreased TS_ASSERT(final_loss < loss_before); } + + // ======================================== + // TensorProgram configuration accessors + // ======================================== + + void test_tensor_program_max_iterations() + { + // Verify set_max_iterations / max_iterations round-trip + TensorProgram prog("iter_test"); + TS_ASSERT_EQUALS(prog.max_iterations(), 100); // default + + prog.set_max_iterations(42); + TS_ASSERT_EQUALS(prog.max_iterations(), 42); + + // A program that doesn't converge should stop at max_iterations + ATenValuePtr v = createATenFromVector({0.0, 1.0}, {2}); + prog.add_fact("in", v); + // Two equations that alternate: each overwrites the other's output + prog.add_equation("eq1", "out1", {"in"}, "i->i"); + prog.add_equation("eq2", "out2", {"out1"}, "i->i"); + + prog.forward_to_fixpoint(); + + // Should have stopped: forward_count <= max_iterations + 1 + TS_ASSERT(prog.forward_count() <= 43); + } + + void test_tensor_program_convergence_threshold() + { + // Verify set_convergence_threshold / convergence_threshold + TensorProgram prog("conv_test"); + TS_ASSERT_DELTA(prog.convergence_threshold(), 1e-6, 1e-15); + + prog.set_convergence_threshold(0.1); + TS_ASSERT_DELTA(prog.convergence_threshold(), 0.1, 1e-15); + + // A quickly-converging program should stop early + ATenValuePtr stable = createATenFromVector({1.0, 1.0}, {2}); + prog.add_fact("in", stable); + prog.add_equation("eq1", "out", {"in"}, "i->i"); + + prog.forward_to_fixpoint(); + + // With a loose threshold the fixpoint should be reached quickly + TS_ASSERT(prog.forward_count() <= 3); + } + + void test_tensor_program_grad_clip_accessors() + { + // Verify set_grad_clip / grad_clip round-trip + TensorProgram prog("clip_test"); + TS_ASSERT_DELTA(prog.grad_clip(), 0.0, 1e-15); // disabled by default + + prog.set_grad_clip(1.0); + TS_ASSERT_DELTA(prog.grad_clip(), 1.0, 1e-15); + + prog.set_grad_clip(0.0); + TS_ASSERT_DELTA(prog.grad_clip(), 0.0, 1e-15); + } + + void test_tensor_program_grad_clip_limits_update() + { + // Verify that gradient clipping prevents weight from diverging when + // gradients would otherwise be very large. + TensorProgram prog("clip_limits"); + prog.set_learning_rate(1.0); // large lr → large updates without clipping + prog.set_grad_clip(0.01); // restrict gradient norm to 0.01 + + // Large input: gradient will be large without clipping + ATenValuePtr in = createATenFromVector({100.0, 100.0, 100.0}, {3}); + ATenValuePtr target = createATenFromVector({0.0, 0.0, 0.0}, {3}); + prog.add_fact("in", in); + prog.add_equation("eq1", "out", {"in"}, "i->i"); + + TensorEquationPtr eq = prog.get_equation("eq1"); + eq->set_learnable(true); + ATenValuePtr init_w = createATenFromVector({1.0, 1.0, 1.0}, {3}); + eq->set_weight(init_w); + + prog.forward(); + ATenValuePtr grad_out = ATenValueCast(prog.get_derived("out")->sub(*target)); + prog.backward("out", grad_out); + + auto w_before = eq->weight()->to_vector(); + prog.update_parameters(); + auto w_after = eq->weight()->to_vector(); + + // Each weight change should be <= lr * clip = 1.0 * 0.01 = 0.01. + // A small margin of 1e-9 covers floating-point rounding in the + // L2-norm computation and the subsequent rescaling. + const double max_expected_delta = 0.01 + 1e-9; + for (size_t i = 0; i < w_before.size(); i++) + { + double delta = std::abs(w_before[i] - w_after[i]); + TS_ASSERT(delta <= max_expected_delta); + } + } + + void test_tensor_program_loss_history() + { + // Verify that train() records per-epoch losses + TensorProgram prog("hist_test"); + prog.set_learning_rate(0.01); + + ATenValuePtr in = createATenFromVector({1.0, 2.0, 3.0}, {3}); + ATenValuePtr target = createATenFromVector({2.0, 4.0, 6.0}, {3}); + prog.add_fact("in", in); + prog.add_equation("eq1", "out", {"in"}, "i->i"); + + TensorEquationPtr eq = prog.get_equation("eq1"); + eq->set_learnable(true); + eq->set_weight(createATenFromVector({0.5, 0.5, 0.5}, {3})); + + TS_ASSERT_EQUALS(prog.loss_history().size(), 0); + + std::map targets_map = {{"out", target}}; + prog.train({}, targets_map, 10); + + // Should have exactly one loss entry per epoch + TS_ASSERT_EQUALS(prog.loss_history().size(), 10); + + // Loss entries should all be non-negative + for (double loss : prog.loss_history()) + TS_ASSERT(loss >= 0.0); + } + + void test_tensor_program_loss_history_decreasing() + { + // Loss should generally decrease over training epochs + TensorProgram prog("hist_decr"); + prog.set_learning_rate(0.01); + + ATenValuePtr in = createATenFromVector({1.0, 2.0, 3.0}, {3}); + ATenValuePtr target = createATenFromVector({2.0, 4.0, 6.0}, {3}); + prog.add_fact("in", in); + prog.add_equation("eq1", "out", {"in"}, "i->i"); + + TensorEquationPtr eq = prog.get_equation("eq1"); + eq->set_learnable(true); + eq->set_weight(createATenFromVector({0.5, 0.5, 0.5}, {3})); + + std::map targets_map = {{"out", target}}; + prog.train({}, targets_map, 30); + + const auto& hist = prog.loss_history(); + TS_ASSERT(!hist.empty()); + // Last loss should be less than the first + TS_ASSERT(hist.back() < hist.front()); + } + + // ======================================== + // HYBRID reasoning mode + // ======================================== + + void test_hybrid_mode_output_binary() + { + // HYBRID mode should produce binary {0,1} outputs like BOOLEAN mode + TensorEquation eq("hyb_eq", "out", {"in"}, + "i->i", Nonlinearity::NONE, ReasoningMode::HYBRID); + + // Inputs with values spread around 0 (before sigmoid) + ATenValuePtr in = createATenFromVector({2.0, -2.0, 0.5, -0.5}, {4}); + ATenValuePtr out = eq.execute({in}); + + auto data = out->to_vector(); + for (double v : data) + { + // Each value must be exactly 0 or 1 + TS_ASSERT(v == 0.0 || v == 1.0); + } + } + + void test_hybrid_mode_sigmoid_then_threshold() + { + // For HYBRID + NONE nonlinearity: sigmoid is applied before threshold. + // sigmoid(2.0) ≈ 0.88 → threshold → 1 + // sigmoid(-2.0) ≈ 0.12 → threshold → 0 + TensorEquation eq("hyb_sig", "out", {"in"}, + "i->i", Nonlinearity::NONE, ReasoningMode::HYBRID); + + ATenValuePtr in = createATenFromVector({2.0, -2.0}, {2}); + ATenValuePtr out = eq.execute({in}); + + auto data = out->to_vector(); + TS_ASSERT_EQUALS(data.size(), 2); + TS_ASSERT_DELTA(data[0], 1.0, 1e-10); // sigmoid(2) > 0.5 → 1 + TS_ASSERT_DELTA(data[1], 0.0, 1e-10); // sigmoid(-2) < 0.5 → 0 + } + + void test_hybrid_mode_explicit_nonlinearity() + { + // HYBRID with an explicit nonlinearity (e.g. RELU) skips the extra + // sigmoid and applies threshold directly to the RELU output. + // relu(1.0)=1.0 → threshold → 1; relu(-1.0)=0.0 → threshold → 0 + TensorEquation eq("hyb_relu", "out", {"in"}, + "i->i", Nonlinearity::RELU, ReasoningMode::HYBRID); + + ATenValuePtr in = createATenFromVector({1.0, -1.0}, {2}); + ATenValuePtr out = eq.execute({in}); + + auto data = out->to_vector(); + TS_ASSERT_EQUALS(data.size(), 2); + TS_ASSERT_DELTA(data[0], 1.0, 1e-10); // relu(1) = 1 > 0.5 → 1 + TS_ASSERT_DELTA(data[1], 0.0, 1e-10); // relu(-1) = 0 ≤ 0.5 → 0 + } + + void test_hybrid_mode_vs_boolean() + { + // For inputs that produce the same post-sigmoid values as direct + // threshold in BOOLEAN mode, both modes should give the same result + // when the pre-activation is already in [0,1]. + ATenValuePtr in = createATenFromVector({0.9, 0.1}, {2}); + + TensorEquation bool_eq("bool_eq", "out", {"in"}, + "i->i", Nonlinearity::NONE, ReasoningMode::BOOLEAN); + TensorEquation hyb_eq("hyb_eq", "out", {"in"}, + "i->i", Nonlinearity::NONE, ReasoningMode::HYBRID); + + // Both should give binary output; the actual values may differ because + // HYBRID routes through sigmoid first. Just verify both are binary. + auto bool_out = bool_eq.execute({in})->to_vector(); + auto hyb_out = hyb_eq.execute({in})->to_vector(); + + for (double v : bool_out) TS_ASSERT(v == 0.0 || v == 1.0); + for (double v : hyb_out) TS_ASSERT(v == 0.0 || v == 1.0); + } };