-
Notifications
You must be signed in to change notification settings - Fork 822
SOLR-18220 Add support for countDist in rollup for streaming expressions #4394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| title: "Support 'countDist' (count distinct) metric in rollup for streaming expressions" | ||
| type: added | ||
| authors: | ||
| - name: khushjain | ||
| links: | ||
| - name: SOLR-18220 | ||
| url: https://issues.apache.org/jira/browse/SOLR-18220 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| package org.apache.solr.client.solrj.io.stream.metrics; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.HashSet; | ||
| import java.util.Locale; | ||
| import org.apache.solr.client.solrj.io.Tuple; | ||
| import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; | ||
|
|
@@ -29,6 +30,7 @@ public class CountDistinctMetric extends Metric { | |
| public static final String APPROX_COUNT_DISTINCT = "hll"; | ||
|
|
||
| private String columnName; | ||
| private HashSet<Object> distinctValues = new HashSet<>(); | ||
|
|
||
| public CountDistinctMetric(String columnName) { | ||
| this(columnName, false); | ||
|
|
@@ -53,6 +55,10 @@ public CountDistinctMetric(StreamExpression expression, StreamFactory factory) | |
| expression, | ||
| functionName)); | ||
| } | ||
| if (1 != expression.getParameters().size()) { | ||
| throw new IOException( | ||
| String.format(Locale.ROOT, "Invalid expression %s - unknown operands found", expression)); | ||
| } | ||
|
|
||
| init(functionName, columnName); | ||
| } | ||
|
|
@@ -66,7 +72,10 @@ private void init(String functionName, String columnName) { | |
|
|
||
| @Override | ||
| public void update(Tuple tuple) { | ||
| // Nop for now | ||
| Object value = tuple.get(columnName); | ||
| if (value != null) { | ||
| distinctValues.add(value); | ||
| } | ||
|
Comment on lines
74
to
+78
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -81,14 +90,11 @@ public String[] getColumns() { | |
|
|
||
| @Override | ||
| public Number getValue() { | ||
| // No op for now | ||
| return null; | ||
| return distinctValues.size(); | ||
| } | ||
|
|
||
| @Override | ||
| public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { | ||
| return new StreamExpression(getFunctionName()) | ||
| .withParameter(columnName) | ||
| .withParameter(Boolean.toString(outputLong)); | ||
| return new StreamExpression(getFunctionName()).withParameter(columnName); | ||
|
Comment on lines
97
to
+98
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the SQL module's map-reduce path, not the streaming expression path. This PR targets countDist in streaming expression rollup()
KhushJain marked this conversation as resolved.
|
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The old constructor silently ignored the second parameter anyway, so no one could have depended on it. This PR fixes the serialization to match what the constructor actually accepts