Skip to content

Commit 13d66d4

Browse files
authored
Merge pull request #5251 from FlorentinD/configure-nr-pipeline-procs
NodeRegression pipeline Create proc
2 parents 5b5b4f7 + 232ef82 commit 13d66d4

File tree

51 files changed

+477
-253
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+477
-253
lines changed

doc-test/src/test/java/org/neo4j/gds/doc/NodeClassificationPipelineDocTest.java

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,16 @@
2323
import org.neo4j.gds.catalog.GraphProjectProc;
2424
import org.neo4j.gds.catalog.GraphStreamNodePropertiesProc;
2525
import org.neo4j.gds.functions.AsNodeFunc;
26-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineAddStepProcs;
27-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineAddTrainerMethodProcs;
28-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineConfigureAutoTuningProc;
29-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineConfigureSplitProc;
30-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineCreateProc;
31-
import org.neo4j.gds.ml.nodemodels.pipeline.predict.NodeClassificationPipelineMutateProc;
32-
import org.neo4j.gds.ml.nodemodels.pipeline.predict.NodeClassificationPipelineStreamProc;
33-
import org.neo4j.gds.ml.nodemodels.pipeline.predict.NodeClassificationPipelineTrainProc;
34-
import org.neo4j.gds.ml.nodemodels.pipeline.predict.NodeClassificationPipelineWriteProc;
3526
import org.neo4j.gds.ml.pipeline.PipelineCatalog;
27+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineAddStepProcs;
28+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineAddTrainerMethodProcs;
29+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineConfigureAutoTuningProc;
30+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineConfigureSplitProc;
31+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineCreateProc;
32+
import org.neo4j.gds.ml.pipeline.node.classification.predict.NodeClassificationPipelineMutateProc;
33+
import org.neo4j.gds.ml.pipeline.node.classification.predict.NodeClassificationPipelineStreamProc;
34+
import org.neo4j.gds.ml.pipeline.node.classification.predict.NodeClassificationPipelineTrainProc;
35+
import org.neo4j.gds.ml.pipeline.node.classification.predict.NodeClassificationPipelineWriteProc;
3636
import org.neo4j.gds.scaling.ScalePropertiesMutateProc;
3737

3838
import java.util.List;

proc/machine-learning/src/main/java/org/neo4j/gds/ml/nodemodels/pipeline/NodeClassificationPipelineCreate.java renamed to doc-test/src/test/java/org/neo4j/gds/doc/NodeRegressionPipelineDocTest.java

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,30 @@
1717
* You should have received a copy of the GNU General Public License
1818
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1919
*/
20-
package org.neo4j.gds.ml.nodemodels.pipeline;
20+
package org.neo4j.gds.doc;
2121

22-
import org.neo4j.gds.core.StringIdentifierValidations;
22+
import org.junit.jupiter.api.AfterAll;
2323
import org.neo4j.gds.ml.pipeline.PipelineCatalog;
24-
import org.neo4j.gds.ml.pipeline.nodePipeline.classification.NodeClassificationTrainingPipeline;
24+
import org.neo4j.gds.ml.pipeline.node.regression.configure.NodeRegressionPipelineCreateProc;
2525

26-
public final class NodeClassificationPipelineCreate {
26+
import java.util.List;
2727

28-
private NodeClassificationPipelineCreate() {}
28+
class NodeRegressionPipelineDocTest extends DocTestBase {
2929

30-
public static PipelineInfoResult create(String username, String pipelineName) {
31-
StringIdentifierValidations.validateNoWhiteCharacter(pipelineName, "pipelineName");
32-
33-
var pipeline = new NodeClassificationTrainingPipeline();
30+
@AfterAll
31+
static void tearDown() {
32+
PipelineCatalog.removeAll();
33+
}
3434

35-
PipelineCatalog.set(username, pipelineName, pipeline);
35+
@Override
36+
protected List<Class<?>> procedures() {
37+
return List.of(
38+
NodeRegressionPipelineCreateProc.class
39+
);
40+
}
3641

37-
return new PipelineInfoResult(pipelineName, pipeline);
42+
@Override
43+
protected String adocFile() {
44+
return "machine-learning/node-property-prediction/noderegression-pipeline/noderegression.adoc";
3845
}
3946
}

doc-test/src/test/java/org/neo4j/gds/doc/PipelineCatalogExistsDocTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
package org.neo4j.gds.doc;
2121

2222
import org.junit.jupiter.api.AfterEach;
23-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineCreateProc;
2423
import org.neo4j.gds.ml.pipeline.PipelineCatalog;
24+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineCreateProc;
2525
import org.neo4j.gds.pipeline.catalog.PipelineExistsProc;
2626

2727
import java.util.List;

doc-test/src/test/java/org/neo4j/gds/doc/PipelineCatalogListDocTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
import org.junit.jupiter.api.AfterEach;
2323
import org.neo4j.gds.ml.linkmodels.pipeline.LinkPredictionPipelineCreateProc;
24-
import org.neo4j.gds.ml.nodemodels.pipeline.NodeClassificationPipelineCreateProc;
2524
import org.neo4j.gds.ml.pipeline.PipelineCatalog;
25+
import org.neo4j.gds.ml.pipeline.node.classification.NodeClassificationPipelineCreateProc;
2626
import org.neo4j.gds.pipeline.catalog.PipelineListProc;
2727

2828
import java.util.List;

proc/machine-learning/src/main/java/org/neo4j/gds/ml/nodemodels/NodeClassificationStreamResult.java renamed to doc-test/src/test/java/org/neo4j/gds/doc/syntax/NodeRegressionPipelineSyntaxTest.java

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,21 @@
1717
* You should have received a copy of the GNU General Public License
1818
* along with this program. If not, see <http://www.gnu.org/licenses/>.
1919
*/
20-
package org.neo4j.gds.ml.nodemodels;
20+
package org.neo4j.gds.doc.syntax;
2121

2222
import java.util.List;
2323

24-
@SuppressWarnings("unused")
25-
public final class NodeClassificationStreamResult {
24+
class NodeRegressionPipelineSyntaxTest extends SyntaxTestBase {
2625

27-
public long nodeId;
28-
public long predictedClass;
29-
public List<Double> predictedProbabilities;
26+
@Override
27+
protected Iterable<SyntaxModeMeta> syntaxModes() {
28+
return List.of(
29+
SyntaxModeMeta.of(SyntaxMode.PIPELINE_CREATE)
30+
);
31+
}
3032

31-
public NodeClassificationStreamResult(long nodeId, long predictedClass, List<Double> predictedProbabilities) {
32-
this.nodeId = nodeId;
33-
this.predictedClass = predictedClass;
34-
this.predictedProbabilities = predictedProbabilities;
33+
@Override
34+
protected String adocFile() {
35+
return "machine-learning/node-property-prediction/noderegression-pipeline/noderegression.adoc";
3536
}
3637
}

doc/antora/content-nav.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@
104104
**** xref:machine-learning/node-property-prediction/nodeclassification-pipelines/config/index.adoc[]
105105
**** xref:machine-learning/node-property-prediction/nodeclassification-pipelines/training/index.adoc[]
106106
**** xref:machine-learning/node-property-prediction/nodeclassification-pipelines/predict/index.adoc[]
107+
*** xref:machine-learning/node-property-prediction/noderegression-pipelines/index.adoc[]
108+
**** xref:machine-learning/node-property-prediction/noderegression-pipelines/config/index.adoc[]
107109
** xref:machine-learning/linkprediction-pipelines/index.adoc[]
108110
*** xref:machine-learning/linkprediction-pipelines/config/index.adoc[]
109111
*** xref:machine-learning/linkprediction-pipelines/training/index.adoc[]

doc/asciidoc/machine-learning/node-property-prediction/node-property-prediction.adoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,9 @@ The Neo4j Graph Data Science library support the following node property predict
1111

1212
* Beta
1313
** <<nodeclassification-pipelines>>
14+
* Alpha
15+
** <<noderegression-pipelines>>
1416

1517
include::nodeclassification-pipeline/nodeclassification.adoc[leveloffset=+1]
18+
include::noderegression-pipeline/noderegression.adoc[leveloffset=+1]
19+

doc/asciidoc/machine-learning/node-property-prediction/nodeclassification-pipeline/config.adoc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ YIELD
5050
| pipelineName | String | The name of the created pipeline.
5151
|===
5252

53-
include::pipelineInfoResult.adoc[]
53+
include::../pipelineInfoResult.adoc[]
5454
--
5555

5656

@@ -121,7 +121,7 @@ YIELD
121121
| procedureConfiguration | Map | The configuration of the procedure, excluding `graphName`, `nodeLabels` and `relationshipTypes`.
122122
|===
123123

124-
include::pipelineInfoResult.adoc[]
124+
include::../pipelineInfoResult.adoc[]
125125
--
126126

127127

@@ -187,7 +187,7 @@ YIELD
187187
| nodeProperties | List or String | Configuration for splitting the relationships.
188188
|===
189189

190-
include::pipelineInfoResult.adoc[]
190+
include::../pipelineInfoResult.adoc[]
191191
--
192192

193193

@@ -279,7 +279,7 @@ YIELD
279279
| testFraction | Double | 0.3 | Fraction of the graph reserved for testing. Must be in the range (0, 1). The fraction used for the training is `1 - testFraction`.
280280
|===
281281

282-
include::pipelineInfoResult.adoc[]
282+
include::../pipelineInfoResult.adoc[]
283283
--
284284

285285

@@ -366,7 +366,7 @@ YIELD
366366

367367
include::../../training-methods/logisticRegressionConfig.adoc[]
368368

369-
include::pipelineInfoResult.adoc[]
369+
include::../pipelineInfoResult.adoc[]
370370
--
371371
======
372372
@@ -401,7 +401,7 @@ YIELD
401401

402402
include::../../training-methods//randomForestConfig.adoc[]
403403

404-
include::pipelineInfoResult.adoc[]
404+
include::../pipelineInfoResult.adoc[]
405405
--
406406
======
407407
====
@@ -498,7 +498,7 @@ YIELD
498498
|===
499499

500500

501-
include::pipelineInfoResult.adoc[]
501+
include::../pipelineInfoResult.adoc[]
502502
--
503503

504504

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
[[noderegression-pipelines-config]]
2+
= Configuring the pipeline
3+
:max-trials: 10
4+
5+
This page explains how to create and configure a node regression pipeline.
6+
It consists of the following sections:
7+
8+
* <<noderegression-creating-a-pipeline, Creating the pipeline>>
9+
10+
11+
[[noderegression-creating-a-pipeline]]
12+
== Creating a pipeline
13+
14+
The first step of building a new pipeline is to create one using `gds.alpha.pipeline.nodeRegression.create`.
15+
This stores a trainable pipeline object in the pipeline catalog of type `Node regression training pipeline`.
16+
This represents a configurable pipeline that can later be invoked for training, which in turn creates a regression model.
17+
The latter is a model which is stored in the catalog with type `NodeRegression`.
18+
19+
20+
=== Syntax
21+
22+
[.pipeline-create-syntax]
23+
--
24+
.Create pipeline syntax
25+
[source, cypher, role=noplay]
26+
----
27+
CALL gds.alpha.pipeline.nodeRegression.create(
28+
pipelineName: String
29+
)
30+
YIELD
31+
name: String,
32+
nodePropertySteps: List of Map,
33+
featureProperties: List of String,
34+
splitConfig: Map,
35+
autoTuningConfig: Map,
36+
parameterSpace: List of Map
37+
----
38+
39+
.Parameters
40+
[opts="header",cols="1,1,4"]
41+
|===
42+
| Name | Type | Description
43+
| pipelineName | String | The name of the created pipeline.
44+
|===
45+
46+
include::../pipelineInfoResult.adoc[]
47+
--
48+
49+
50+
=== Example
51+
52+
[role=query-example,group=nr]
53+
--
54+
.The following will create a pipeline:
55+
[source, cypher, role=noplay]
56+
----
57+
CALL gds.alpha.pipeline.nodeRegression.create('pipe')
58+
----
59+
60+
.Results
61+
[opts="header",cols="1,1,1,1,1,1"]
62+
|===
63+
| name | nodePropertySteps | featureProperties | splitConfig | autoTuningConfig | parameterSpace
64+
| "pipe" | [] | []
65+
| {testFraction=0.3, validationFolds=3}
66+
| {maxTrials={max-trials}}
67+
| {RandomForest=[], LinearRegression=[]}
68+
|===
69+
--
70+
71+
This shows that the newly created pipeline does not contain any steps yet, and has defaults for the split and train parameters.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
[[noderegression-pipelines]]
2+
= Node regression pipelines
3+
:max-trials: 10
4+
5+
[abstract]
6+
--
7+
This section describes Node regression pipelines in the Neo4j Graph Data Science library.
8+
--
9+
10+
11+
Node Regression is a common machine learning task applied to graphs: training models to predict node property values.
12+
Concretely, Node Regression models are used to predict the value of node property based on other node properties.
13+
During training, the property to predict is referred to as the target property.
14+
15+
In GDS, we have Node Regression pipelines which offer an end-to-end workflow, from feature extraction to predicting node property values.
16+
The training pipelines reside in the <<pipeline-catalog-ops,pipeline catalog>>.
17+
When a training pipeline is <<nodeclassification-pipelines-train,executed>>, a regression model is created and stored in the <<model-catalog-ops,model catalog>>.
18+
19+
A training pipeline is a sequence of two phases:
20+
[upperroman]
21+
. The graph is augmented with new node properties in a series of steps.
22+
. The augmented graph is used for training a node regression model.
23+
24+
This segment is divided into the following pages:
25+
26+
* <<noderegression-pipelines-config, Configuring the pipeline>>
27+
28+
include::config.adoc[leveloffset =+ 1]

0 commit comments

Comments
 (0)