Skip to content

Commit 6c12297

Browse files
authored
[FSTORE-618] Expand current java docstrings to cover new hsfs functionalities (#932)
1 parent 482c207 commit 6c12297

File tree

12 files changed

+3387
-410
lines changed

12 files changed

+3387
-410
lines changed

java/pom.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,9 @@
398398
<source>1.8</source>
399399
<target>1.8</target>
400400
<sourcepath>${delombok.output}</sourcepath>
401+
<sourceFileExcludes>
402+
<exclude>**/MainClass.java</exclude>
403+
</sourceFileExcludes>
401404
</configuration>
402405

403406
<executions>

java/src/main/java/com/logicalclocks/hsfs/Feature.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,23 @@ public Feature(String name, String type, String onlineType, Boolean primary, Boo
133133
this.description = description;
134134
}
135135

136+
/**
137+
* Returns true if the feature has a complex type.
138+
*
139+
* <pre>
140+
* {@code // get feature store handle
141+
* FeatureStore fs = ...;
142+
* // get feature group handle
143+
* FeatureGroup fg = ...;
144+
* // get feature
145+
* Feature ft = fg.getFeature("feature_name");
146+
* // check if the feature has a complex type.
147+
* ft.isComplex();
148+
* }
149+
* </pre>
150+
*
151+
* @return boolean
152+
*/
136153
@JsonIgnore
137154
public boolean isComplex() {
138155
return Constants.COMPLEX_FEATURE_TYPES.stream().anyMatch(c -> type.toUpperCase().startsWith(c));
@@ -198,6 +215,7 @@ public Filter ge(Feature value) {
198215
* @deprecated
199216
* `in` method is deprecated. Use `isin` instead.
200217
*/
218+
@Deprecated
201219
public Filter in(Collection<?> collection) {
202220
return isin(collection);
203221
}

java/src/main/java/com/logicalclocks/hsfs/FeatureGroup.java

Lines changed: 601 additions & 169 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/FeatureStore.java

Lines changed: 532 additions & 80 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/FeatureView.java

Lines changed: 1401 additions & 42 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/StorageConnector.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,20 @@ public abstract class StorageConnector {
7777

7878
protected StorageConnectorApi storageConnectorApi = new StorageConnectorApi();
7979

80+
/**
81+
* Reads a query or a path into a dataframe using the storage connector.
82+
*
83+
* @param query By default, the storage connector will read the table configured together with the connector, if any.
84+
* It's possible to overwrite this by passing a SQL query here.
85+
* @param dataFormat When reading from object stores such as S3, HopsFS and ADLS, specify the file format to be read,
86+
* e.g. `csv`, `parquet`.
87+
* @param options Any additional key/value options to be passed to the connector.
88+
* @param path Path to be read from within the bucket of the storage connector. Not relevant for JDBC or database
89+
* based connectors such as Snowflake, JDBC or Redshift.
90+
* @return DataFrame.
91+
* @throws FeatureStoreException If ...
92+
* @throws IOException If ...
93+
*/
8094
public Object read(String query, String dataFormat, Map<String, String> options, String path)
8195
throws FeatureStoreException, IOException {
8296
return SparkEngine.getInstance().read(this, dataFormat, options, path);

java/src/main/java/com/logicalclocks/hsfs/StreamFeatureGroup.java

Lines changed: 763 additions & 69 deletions
Large diffs are not rendered by default.

java/src/main/java/com/logicalclocks/hsfs/engine/FeatureGroupEngine.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,10 @@ public class FeatureGroupEngine {
5858
* key.
5959
* @param writeOptions Additional write options as key-value pairs, defaults to empty Map.
6060
* @return Feature Group metadata object
61-
* @throws FeatureStoreException FeatureStoreException
62-
* @throws IOException IOException
63-
* @throws ParseException ParseException
61+
* @throws FeatureStoreException In case Client is not connected to Hopsworks, unable to identify date format and/or
62+
* no commit information was found for the feature group;
63+
* @throws IOException Generic IO exception.
64+
* @throws ParseException In case it's unable to parse HUDI and or statistics commit date string to date type.
6465
*/
6566
public FeatureGroup save(FeatureGroup featureGroup, Dataset<Row> dataset, List<String> partitionKeys,
6667
String hudiPrecombineKey, Map<String, String> writeOptions)

java/src/main/java/com/logicalclocks/hsfs/engine/SparkEngine.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,8 @@ public void registerHudiTemporaryTable(HudiFeatureGroupAlias hudiFeatureGroupAli
257257
* @param writeOptions Additional write options as key-value pairs, defaults to empty Map
258258
* @param saveMode org.apache.spark.sql.saveMode: Append, Overwrite, ErrorIfExists, Ignore
259259
* @return Spark dataframe
260-
* @throws FeatureStoreException FeatureStoreException
261-
* @throws IOException IOException
260+
* @throws FeatureStoreException If Client is not connected to Hopsworks
261+
* @throws IOException Generic IO exception.
262262
*/
263263
public Dataset<Row>[] write(TrainingDataset trainingDataset, Query query, Map<String, String> queryReadOptions,
264264
Map<String, String> writeOptions, SaveMode saveMode) throws FeatureStoreException, IOException {
@@ -561,8 +561,8 @@ public <S> StreamingQuery writeStreamDataframe(FeatureGroupBase featureGroupBase
561561
* @param featureGroupBase FeatureGroupBase Feature Group base metadata object
562562
* @param dataset Spark DataFrame or RDD.
563563
* @return Spark DataFrame.
564-
* @throws FeatureStoreException FeatureStoreException
565-
* @throws IOException IOException
564+
* @throws FeatureStoreException If Client is not connected to Hopsworks
565+
* @throws IOException Generic IO exception.
566566
*/
567567
public Dataset<Row> encodeComplexFeatures(FeatureGroupBase featureGroupBase, Dataset<Row> dataset)
568568
throws FeatureStoreException, IOException {
@@ -584,8 +584,8 @@ public Dataset<Row> encodeComplexFeatures(FeatureGroupBase featureGroupBase, Dat
584584
* @param featureGroupBase FeatureGroupBase Feature Group base metadata object
585585
* @param dataset Spark DataFrame or RDD.
586586
* @return Spark DataFrame.
587-
* @throws FeatureStoreException FeatureStoreException
588-
* @throws IOException IOException
587+
* @throws FeatureStoreException If Client is not connected to Hopsworks
588+
* @throws IOException Generic IO exception.
589589
*/
590590
private Dataset<Row> onlineFeatureGroupToAvro(FeatureGroupBase featureGroupBase, Dataset<Row> dataset)
591591
throws FeatureStoreException, IOException {
@@ -676,7 +676,7 @@ public String profile(Dataset<Row> df) {
676676
}
677677

678678
public void setupConnectorHadoopConf(StorageConnector storageConnector)
679-
throws FeatureStoreException, IOException {
679+
throws IOException {
680680
if (storageConnector == null) {
681681
return;
682682
}

java/src/main/java/com/logicalclocks/hsfs/engine/hudi/HudiEngine.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ private FeatureGroupCommit getLastCommitMetadata(SparkSession sparkSession, Stri
211211

212212
private Map<String, String> setupHudiWriteOpts(FeatureGroupBase featureGroup, HudiOperationType operation,
213213
Map<String, String> writeOptions)
214-
throws IOException, FeatureStoreException {
214+
throws FeatureStoreException {
215215
Map<String, String> hudiArgs = new HashMap<String, String>();
216216

217217
hudiArgs.put(HUDI_TABLE_STORAGE_TYPE, HUDI_COPY_ON_WRITE);

0 commit comments

Comments
 (0)