Fix didConverge

FlorentinD · FlorentinD · commit e948920cbdd8 · 2022-05-06T09:37:54.000+02:00
Actually as we dont resample the neighbors per epoch, the previous convergence logic did not make an actual difference.
Also, before it was ood to check the tolerance twice (once inside the iterations loop and the epochs loop)
diff --git a/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainer.java b/algo/src/main/java/org/neo4j/gds/embeddings/graphsage/GraphSageModelTrainer.java
@@ -145,26 +145,17 @@ public ModelTrainResult train(Graph graph, HugeObjectArray<double[]> features) {
 
         progressTracker.endSubTask("Prepare batches");
 
-        double previousLoss = Double.MAX_VALUE;
         boolean converged = false;
         var iterationLossesPerEpoch = new ArrayList<List<Double>>();
 
         progressTracker.beginSubTask("Train model");
 
-        for (int epoch = 1; epoch <= epochs; epoch++) {
+        for (int epoch = 1; epoch <= epochs && !converged; epoch++) {
             progressTracker.beginSubTask("Epoch");
-
-
-            var iterationLosses = trainEpoch(batchTasks, weights);
-            iterationLossesPerEpoch.add(iterationLosses);
-            var newLoss = iterationLosses.get(iterationLosses.size() - 1);
-
+            var epochResult = trainEpoch(batchTasks, weights);
+            iterationLossesPerEpoch.add(epochResult.losses());
+            converged = epochResult.converged();
             progressTracker.endSubTask("Epoch");
-            if (Math.abs((newLoss - previousLoss) / previousLoss) < tolerance) {
-                converged = true;
-                break;
-            }
-            previousLoss = newLoss;
         }
 
         progressTracker.endSubTask("Train model");
@@ -203,11 +194,13 @@ private BatchTask createBatchTask(
         return new BatchTask(lossFunction, weights, tolerance, progressTracker);
     }
 
-    private List<Double> trainEpoch(List<BatchTask> batchTasks, List<Weights<? extends Tensor<?>>> weights) {
+    private EpochResult trainEpoch(List<BatchTask> batchTasks, List<Weights<? extends Tensor<?>>> weights) {
         var updater = new AdamOptimizer(weights, learningRate);
 
         int iteration = 1;
         var iterationLosses = new ArrayList<Double>();
+        var converged = false;
+
         for (;iteration <= maxIterations; iteration++) {
             progressTracker.beginSubTask("Iteration");
 
@@ -216,7 +209,7 @@ private List<Double> trainEpoch(List<BatchTask> batchTasks, List<Weights<? exten
             var avgLoss = batchTasks.stream().mapToDouble(BatchTask::loss).average().orElseThrow();
             iterationLosses.add(avgLoss);
 
-            var converged = batchTasks.stream().allMatch(task -> task.converged);
+            converged = batchTasks.stream().allMatch(task -> task.converged);
             if (converged) {
                 progressTracker.endSubTask();
                 break;
@@ -235,7 +228,14 @@ private List<Double> trainEpoch(List<BatchTask> batchTasks, List<Weights<? exten
             progressTracker.endSubTask("Iteration");
         }
 
-        return iterationLosses;
+        return ImmutableEpochResult.of(converged, iterationLosses);
+    }
+
+    @ValueClass
+    interface EpochResult {
+        boolean converged();
+
+        List<Double> losses();
     }
 
     static class BatchTask implements Runnable {
diff --git a/doc/asciidoc/machine-learning/node-embeddings/graph-sage/graph-sage.adoc b/doc/asciidoc/machine-learning/node-embeddings/graph-sage/graph-sage.adoc
@@ -278,7 +278,7 @@ RETURN
 .Results
 |===
 | modelName           | didConverge | ranEpochs | epochLosses
-| "exampleTrainModel" | false       | 1         | [186.04946807210226]
+| "exampleTrainModel" | true        | 1         | [186.04946807210226]
 |===
 --