Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion opencog/atoms/aten/TensorEquation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,15 @@ ATenValuePtr TensorEquation::execute(
{
result = apply_nonlinearity(result, Nonlinearity::THRESHOLD);
}
// For Hybrid mode: apply sigmoid to squash to [0,1] if no nonlinearity
// is set, then threshold to produce binary output. The straight-through
// estimator in backward() allows gradients to flow through the threshold.
else if (_mode == ReasoningMode::HYBRID)
{
if (_nonlinearity == Nonlinearity::NONE)
result = apply_nonlinearity(result, Nonlinearity::SIGMOID);
result = apply_nonlinearity(result, Nonlinearity::THRESHOLD);
}

return result;
}
Expand Down Expand Up @@ -863,6 +872,7 @@ TensorProgram::TensorProgram(const std::string& name, ReasoningMode mode)
_convergence_threshold(1e-6),
_learning_rate(0.01),
_track_gradients(false),
_grad_clip(0.0),
_forward_count(0),
_backward_count(0)
{
Expand Down Expand Up @@ -1181,6 +1191,21 @@ void TensorProgram::backward(const std::string& output_name,
}
}

// Clip gradient vector in-place so its L2-norm does not exceed max_norm.
// No-op when max_norm <= 0.
static void clip_gradient_vector(std::vector<double>& g_vec, double max_norm)
{
if (max_norm <= 0.0) return;
double norm = 0.0;
for (double v : g_vec) norm += v * v;
norm = std::sqrt(norm);
if (norm > max_norm)
{
double scale = max_norm / norm;
for (double& v : g_vec) v *= scale;
}
}

void TensorProgram::update_parameters()
{
for (auto& eq : _equations)
Expand All @@ -1194,6 +1219,8 @@ void TensorProgram::update_parameters()
auto g_vec = eq->weight_grad()->to_vector();
auto w_shape = eq->weight()->shape();

clip_gradient_vector(g_vec, _grad_clip);

for (size_t i = 0; i < w_vec.size() && i < g_vec.size(); i++)
w_vec[i] -= _learning_rate * g_vec[i];

Expand All @@ -1207,6 +1234,8 @@ void TensorProgram::update_parameters()
auto g_vec = eq->bias_grad()->to_vector();
auto b_shape = eq->bias()->shape();

clip_gradient_vector(g_vec, _grad_clip);

for (size_t i = 0; i < b_vec.size() && i < g_vec.size(); i++)
b_vec[i] -= _learning_rate * g_vec[i];

Expand All @@ -1227,6 +1256,7 @@ double TensorProgram::train(
size_t epochs)
{
double final_loss = 0.0;
_loss_history.clear();

for (size_t epoch = 0; epoch < epochs; epoch++)
{
Expand All @@ -1237,13 +1267,18 @@ double TensorProgram::train(
// Forward pass
forward_to_fixpoint();

// Compute loss
// Compute loss with current (pre-update) parameters and record it.
// Recording before update follows standard ML convention: the logged
// value is the loss the model had entering this epoch.
final_loss = 0.0;
for (const auto& [name, target] : targets)
{
final_loss += compute_loss(name, target);
}

// Record loss for this epoch
_loss_history.push_back(final_loss);

// Backward pass (simplified)
for (const auto& [name, target] : targets)
{
Expand Down
31 changes: 31 additions & 0 deletions opencog/atoms/aten/TensorEquation.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,14 @@ class TensorProgram
// For learning
double _learning_rate;
bool _track_gradients;
double _grad_clip; // Gradient clipping threshold (0 = disabled)

// Accumulated tensor gradients (keyed by tensor name)
std::map<std::string, ATenValuePtr> _tensor_grads;

// Training history
std::vector<double> _loss_history;

// Statistics
size_t _forward_count;
size_t _backward_count;
Expand All @@ -325,6 +329,20 @@ class TensorProgram
ReasoningMode mode() const { return _mode; }
void set_mode(ReasoningMode m) { _mode = m; }

/**
* Set maximum number of iterations for forward_to_fixpoint().
*/
void set_max_iterations(size_t n) { _max_iterations = n; }
size_t max_iterations() const { return _max_iterations; }

/**
* Set convergence threshold for forward_to_fixpoint().
* Iteration stops when the maximum absolute change in any
* derived tensor value falls below this threshold.
*/
void set_convergence_threshold(double t) { _convergence_threshold = t; }
double convergence_threshold() const { return _convergence_threshold; }

// ========================================
// Fact Management

Expand Down Expand Up @@ -455,6 +473,19 @@ class TensorProgram
void set_track_gradients(bool t) { _track_gradients = t; }
bool track_gradients() const { return _track_gradients; }

/**
* Set gradient clipping threshold.
* Gradients with L2-norm exceeding this value are scaled down.
* Set to 0 (default) to disable clipping.
*/
void set_grad_clip(double clip) { _grad_clip = clip; }
double grad_clip() const { return _grad_clip; }

/**
* Return per-epoch loss values recorded by train().
*/
const std::vector<double>& loss_history() const { return _loss_history; }

/**
* Compute loss between derived and target tensors.
*/
Expand Down
226 changes: 226 additions & 0 deletions tests/atoms/aten/TensorLogicUTest.cxxtest
Original file line number Diff line number Diff line change
Expand Up @@ -1872,4 +1872,230 @@ public:
// Loss should have decreased
TS_ASSERT(final_loss < loss_before);
}

// ========================================
// TensorProgram configuration accessors
// ========================================

void test_tensor_program_max_iterations()
{
// Verify set_max_iterations / max_iterations round-trip
TensorProgram prog("iter_test");
TS_ASSERT_EQUALS(prog.max_iterations(), 100); // default

prog.set_max_iterations(42);
TS_ASSERT_EQUALS(prog.max_iterations(), 42);

// A program that doesn't converge should stop at max_iterations
ATenValuePtr v = createATenFromVector({0.0, 1.0}, {2});
prog.add_fact("in", v);
// Two equations that alternate: each overwrites the other's output
prog.add_equation("eq1", "out1", {"in"}, "i->i");
prog.add_equation("eq2", "out2", {"out1"}, "i->i");

prog.forward_to_fixpoint();

// Should have stopped: forward_count <= max_iterations + 1
TS_ASSERT(prog.forward_count() <= 43);
}

void test_tensor_program_convergence_threshold()
{
// Verify set_convergence_threshold / convergence_threshold
TensorProgram prog("conv_test");
TS_ASSERT_DELTA(prog.convergence_threshold(), 1e-6, 1e-15);

prog.set_convergence_threshold(0.1);
TS_ASSERT_DELTA(prog.convergence_threshold(), 0.1, 1e-15);

// A quickly-converging program should stop early
ATenValuePtr stable = createATenFromVector({1.0, 1.0}, {2});
prog.add_fact("in", stable);
prog.add_equation("eq1", "out", {"in"}, "i->i");

prog.forward_to_fixpoint();

// With a loose threshold the fixpoint should be reached quickly
TS_ASSERT(prog.forward_count() <= 3);
}

void test_tensor_program_grad_clip_accessors()
{
// Verify set_grad_clip / grad_clip round-trip
TensorProgram prog("clip_test");
TS_ASSERT_DELTA(prog.grad_clip(), 0.0, 1e-15); // disabled by default

prog.set_grad_clip(1.0);
TS_ASSERT_DELTA(prog.grad_clip(), 1.0, 1e-15);

prog.set_grad_clip(0.0);
TS_ASSERT_DELTA(prog.grad_clip(), 0.0, 1e-15);
}

void test_tensor_program_grad_clip_limits_update()
{
// Verify that gradient clipping prevents weight from diverging when
// gradients would otherwise be very large.
TensorProgram prog("clip_limits");
prog.set_learning_rate(1.0); // large lr → large updates without clipping
prog.set_grad_clip(0.01); // restrict gradient norm to 0.01

// Large input: gradient will be large without clipping
ATenValuePtr in = createATenFromVector({100.0, 100.0, 100.0}, {3});
ATenValuePtr target = createATenFromVector({0.0, 0.0, 0.0}, {3});
prog.add_fact("in", in);
prog.add_equation("eq1", "out", {"in"}, "i->i");

TensorEquationPtr eq = prog.get_equation("eq1");
eq->set_learnable(true);
ATenValuePtr init_w = createATenFromVector({1.0, 1.0, 1.0}, {3});
eq->set_weight(init_w);

prog.forward();
ATenValuePtr grad_out = ATenValueCast(prog.get_derived("out")->sub(*target));
prog.backward("out", grad_out);

auto w_before = eq->weight()->to_vector();
prog.update_parameters();
auto w_after = eq->weight()->to_vector();

// Each weight change should be <= lr * clip = 1.0 * 0.01 = 0.01.
// A small margin of 1e-9 covers floating-point rounding in the
// L2-norm computation and the subsequent rescaling.
const double max_expected_delta = 0.01 + 1e-9;
for (size_t i = 0; i < w_before.size(); i++)
{
double delta = std::abs(w_before[i] - w_after[i]);
TS_ASSERT(delta <= max_expected_delta);
}
}

void test_tensor_program_loss_history()
{
// Verify that train() records per-epoch losses
TensorProgram prog("hist_test");
prog.set_learning_rate(0.01);

ATenValuePtr in = createATenFromVector({1.0, 2.0, 3.0}, {3});
ATenValuePtr target = createATenFromVector({2.0, 4.0, 6.0}, {3});
prog.add_fact("in", in);
prog.add_equation("eq1", "out", {"in"}, "i->i");

TensorEquationPtr eq = prog.get_equation("eq1");
eq->set_learnable(true);
eq->set_weight(createATenFromVector({0.5, 0.5, 0.5}, {3}));

TS_ASSERT_EQUALS(prog.loss_history().size(), 0);

std::map<std::string, ATenValuePtr> targets_map = {{"out", target}};
prog.train({}, targets_map, 10);

// Should have exactly one loss entry per epoch
TS_ASSERT_EQUALS(prog.loss_history().size(), 10);

// Loss entries should all be non-negative
for (double loss : prog.loss_history())
TS_ASSERT(loss >= 0.0);
}

void test_tensor_program_loss_history_decreasing()
{
// Loss should generally decrease over training epochs
TensorProgram prog("hist_decr");
prog.set_learning_rate(0.01);

ATenValuePtr in = createATenFromVector({1.0, 2.0, 3.0}, {3});
ATenValuePtr target = createATenFromVector({2.0, 4.0, 6.0}, {3});
prog.add_fact("in", in);
prog.add_equation("eq1", "out", {"in"}, "i->i");

TensorEquationPtr eq = prog.get_equation("eq1");
eq->set_learnable(true);
eq->set_weight(createATenFromVector({0.5, 0.5, 0.5}, {3}));

std::map<std::string, ATenValuePtr> targets_map = {{"out", target}};
prog.train({}, targets_map, 30);

const auto& hist = prog.loss_history();
TS_ASSERT(!hist.empty());
// Last loss should be less than the first
TS_ASSERT(hist.back() < hist.front());
}

// ========================================
// HYBRID reasoning mode
// ========================================

void test_hybrid_mode_output_binary()
{
// HYBRID mode should produce binary {0,1} outputs like BOOLEAN mode
TensorEquation eq("hyb_eq", "out", {"in"},
"i->i", Nonlinearity::NONE, ReasoningMode::HYBRID);

// Inputs with values spread around 0 (before sigmoid)
ATenValuePtr in = createATenFromVector({2.0, -2.0, 0.5, -0.5}, {4});
ATenValuePtr out = eq.execute({in});

auto data = out->to_vector();
for (double v : data)
{
// Each value must be exactly 0 or 1
TS_ASSERT(v == 0.0 || v == 1.0);
}
}

void test_hybrid_mode_sigmoid_then_threshold()
{
// For HYBRID + NONE nonlinearity: sigmoid is applied before threshold.
// sigmoid(2.0) ≈ 0.88 → threshold → 1
// sigmoid(-2.0) ≈ 0.12 → threshold → 0
TensorEquation eq("hyb_sig", "out", {"in"},
"i->i", Nonlinearity::NONE, ReasoningMode::HYBRID);

ATenValuePtr in = createATenFromVector({2.0, -2.0}, {2});
ATenValuePtr out = eq.execute({in});

auto data = out->to_vector();
TS_ASSERT_EQUALS(data.size(), 2);
TS_ASSERT_DELTA(data[0], 1.0, 1e-10); // sigmoid(2) > 0.5 → 1
TS_ASSERT_DELTA(data[1], 0.0, 1e-10); // sigmoid(-2) < 0.5 → 0
}

void test_hybrid_mode_explicit_nonlinearity()
{
// HYBRID with an explicit nonlinearity (e.g. RELU) skips the extra
// sigmoid and applies threshold directly to the RELU output.
// relu(1.0)=1.0 → threshold → 1; relu(-1.0)=0.0 → threshold → 0
TensorEquation eq("hyb_relu", "out", {"in"},
"i->i", Nonlinearity::RELU, ReasoningMode::HYBRID);

ATenValuePtr in = createATenFromVector({1.0, -1.0}, {2});
ATenValuePtr out = eq.execute({in});

auto data = out->to_vector();
TS_ASSERT_EQUALS(data.size(), 2);
TS_ASSERT_DELTA(data[0], 1.0, 1e-10); // relu(1) = 1 > 0.5 → 1
TS_ASSERT_DELTA(data[1], 0.0, 1e-10); // relu(-1) = 0 ≤ 0.5 → 0
}

void test_hybrid_mode_vs_boolean()
{
// For inputs that produce the same post-sigmoid values as direct
// threshold in BOOLEAN mode, both modes should give the same result
// when the pre-activation is already in [0,1].
ATenValuePtr in = createATenFromVector({0.9, 0.1}, {2});

TensorEquation bool_eq("bool_eq", "out", {"in"},
"i->i", Nonlinearity::NONE, ReasoningMode::BOOLEAN);
TensorEquation hyb_eq("hyb_eq", "out", {"in"},
"i->i", Nonlinearity::NONE, ReasoningMode::HYBRID);

// Both should give binary output; the actual values may differ because
// HYBRID routes through sigmoid first. Just verify both are binary.
auto bool_out = bool_eq.execute({in})->to_vector();
auto hyb_out = hyb_eq.execute({in})->to_vector();

for (double v : bool_out) TS_ASSERT(v == 0.0 || v == 1.0);
for (double v : hyb_out) TS_ASSERT(v == 0.0 || v == 1.0);
}
};