Consider self gradient in LogisticLoss

FlorentinD · FlorentinD · commit f9247ede5ab7 · 2022-04-25T10:46:54.000+02:00
diff --git a/ml/ml-core/src/main/java/org/neo4j/gds/ml/core/functions/LogisticLoss.java b/ml/ml-core/src/main/java/org/neo4j/gds/ml/core/functions/LogisticLoss.java
@@ -128,6 +128,8 @@ else if (predicted == 1.0) {
 
     @Override
     public Tensor<?> gradient(Variable<?> parent, ComputationContext ctx) {
+        var selfGradient = ctx.gradient(this).value();
+
         if (parent == weights) {
             ctx.forward(predictions);
             var predVector = ctx.data(predictions);
@@ -141,7 +143,7 @@ public Tensor<?> gradient(Variable<?> parent, ComputationContext ctx) {
             for (int idx = 0; idx < numberOfExamples; idx++) {
                 double errorPerExample = (predVector.dataAt(idx) - targetVector.dataAt(idx)) / numberOfExamples;
                 for (int feature = 0; feature < featureCount; feature++) {
-                    gradient.addDataAt(feature, errorPerExample * featuresTensor.dataAt(idx, feature));
+                    gradient.addDataAt(feature, selfGradient * errorPerExample * featuresTensor.dataAt(idx, feature));
                 }
             }
             return gradient;
@@ -154,13 +156,14 @@ public Tensor<?> gradient(Variable<?> parent, ComputationContext ctx) {
 
             for (int idx = 0; idx < numberOfExamples; idx++) {
                 double errorPerExample = (predVector.dataAt(idx) - targetVector.dataAt(idx));
-                gradient.addDataAt(0, errorPerExample);
+                gradient.addDataAt(0, selfGradient * errorPerExample);
             }
 
             return gradient.scalarMultiplyMutate(1.0D / numberOfExamples);
         } else {
-            // assume feature and target variables do not require gradient
-            return ctx.data(parent).createWithSameDimensions();
+            throw new IllegalStateException(
+                "The gradient should only be computed for the bias and the weights parents, but got " + parent.render()
+            );
         }
     }
 
diff --git a/ml/ml-core/src/test/java/org/neo4j/gds/ml/core/functions/LogisticLossTest.java b/ml/ml-core/src/test/java/org/neo4j/gds/ml/core/functions/LogisticLossTest.java
@@ -20,6 +20,7 @@
 package org.neo4j.gds.ml.core.functions;
 
 import org.assertj.core.data.Offset;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.neo4j.gds.ml.core.ComputationContext;
@@ -71,6 +72,21 @@ void logisticLossApproximatesGradient(boolean withBias) {
         finiteDifferenceShouldApproximateGradient(weights, loss);
     }
 
+    @Test
+    void considerSelfGradient() {
+        var features = Constant.matrix(new double[]{0.23, 0.52, 0.62, 0.32, 0.64, 0.71}, 2, 3);
+        var targets = Constant.vector(new double[]{1.0, 0.0});
+        var weights = new Weights<>(new Matrix(new double[]{0.35, 0.41, 1.0}, 1, 3));
+        var bias = Weights.ofScalar(2.5);
+
+        var predictions = new Sigmoid<>(new MatrixMultiplyWithTransposedSecondOperand(features, weights));
+
+        var loss = new LogisticLoss(weights, bias, predictions, features, targets);
+        var chainedLoss = new Sigmoid<>(loss);
+
+        finiteDifferenceShouldApproximateGradient(weights, chainedLoss);
+    }
+
     @Override
     public double epsilon() {
         return 1e-7;