Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ dependencies {
nmcpAggregation(project(":isthmus"))
}

// Ensure all Spark variants are published before aggregation
tasks.named("publishAggregationToCentralPortal") {
dependsOn(":spark:publishAllVariantsToCentralPortal")
}

allprojects {
repositories { mavenCentral() }

Expand Down
25 changes: 20 additions & 5 deletions examples/substrait-spark/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,32 @@ repositories {
mavenCentral()
}

// Get the Spark variant property - determines which spark subproject to use
val sparkVariantProp = findProperty("sparkVariant")?.toString() ?: "spark40_2.13"

// Map variants to their subproject paths and versions
val variantConfig =
mapOf(
"spark34_2.12" to Triple(":spark:spark-3.4_2.12", "3.4.4", "2.12"),
"spark35_2.12" to Triple(":spark:spark-3.5_2.12", "3.5.4", "2.12"),
"spark40_2.13" to Triple(":spark:spark-4.0_2.13", "4.0.2", "2.13"),
)

val (sparkProject, sparkVersion, scalaBinary) =
variantConfig[sparkVariantProp] ?: variantConfig["spark40_2.13"]!!

dependencies {
implementation(project(":spark"))
// Depend on the specific spark variant subproject
implementation(project(sparkProject))

// For a real Spark application, these would not be required since they would be in the Spark
// server classpath
runtimeOnly(libs.spark.core)
runtimeOnly(libs.spark.hive)
// server classpath. Use direct Maven coordinates to match the spark module's variant.
runtimeOnly("org.apache.spark:spark-core_${scalaBinary}:${sparkVersion}")
runtimeOnly("org.apache.spark:spark-hive_${scalaBinary}:${sparkVersion}")
}

tasks.jar {
dependsOn(":spark:jar", ":core:jar", ":core:shadowJar")
dependsOn("$sparkProject:jar", ":core:jar", ":core:shadowJar")

isZip64 = true
exclude("META-INF/*.RSA")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.classic.Dataset;
import org.apache.spark.sql.classic.SparkSession;

/** Minimal Spark application */
public class SparkConsumeSubstrait implements App.Action {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.substrait.examples;

import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.classic.SparkSession;

/** Collection of helper fns */
public final class SparkHelper {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public void run(String arg) {
Dataset<Row> result = spark.sql(sqlQuery);
result.show();

LogicalPlan logical = result.logicalPlan();
LogicalPlan logical = result.queryExecution().logical();
System.out.println(logical);

LogicalPlan optimised = result.queryExecution().optimizedPlan();
Expand Down
38 changes: 27 additions & 11 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@ nmcp = "1.4.4"
picocli = "4.7.7"
protobuf-plugin = "0.9.6"
protobuf = "3.25.8"
scala-library = "2.13.18"
scalatest = "3.2.19"
scalatestplus-junit5 = "3.2.19.0"
scala-2-12 = "2.12.20"
scala-2-13 = "2.13.18"
scalatest-2-12 = "3.2.19"
scalatest-2-13 = "3.2.19"
scalatestplus-junit5-2-12 = "3.2.19.0"
scalatestplus-junit5-2-13 = "3.2.19.0"
shadow = "9.3.1"
slf4j = "2.0.17"
spark = "3.4.4"
spark-3-4 = "3.4.4"
spark-3-5 = "3.5.4"
spark-4-0 = "4.0.2"
spotless = "8.2.1"
validator = "3.0.0"

Expand Down Expand Up @@ -59,15 +64,26 @@ picocli-codegen = { module = "info.picocli:picocli-codegen", version.ref = "pico
protobuf-java = { module = "com.google.protobuf:protobuf-java", version.ref = "protobuf" }
protobuf-java-util = { module = "com.google.protobuf:protobuf-java-util", version.ref = "protobuf" }
protoc = { module = "com.google.protobuf:protoc", version.ref = "protobuf" }
scala-library = { module = "org.scala-lang:scala-library", version.ref = "scala-library" }
scalatest = { module = "org.scalatest:scalatest_2.13", version.ref = "scalatest" }
scalatestplus-junit5 = { module = "org.scalatestplus:junit-5-13_2.13", version.ref = "scalatestplus-junit5" }
scala-library-2-12 = { module = "org.scala-lang:scala-library", version.ref = "scala-2-12" }
scala-library-2-13 = { module = "org.scala-lang:scala-library", version.ref = "scala-2-13" }
scalatest-2-12 = { module = "org.scalatest:scalatest_2.12", version.ref = "scalatest-2-12" }
scalatest-2-13 = { module = "org.scalatest:scalatest_2.13", version.ref = "scalatest-2-13" }
scalatestplus-junit5-2-12 = { module = "org.scalatestplus:junit-5-12_2.12", version.ref = "scalatestplus-junit5-2-12" }
scalatestplus-junit5-2-13 = { module = "org.scalatestplus:junit-5-13_2.13", version.ref = "scalatestplus-junit5-2-13" }
slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }
slf4j-jdk14 = { module = "org.slf4j:slf4j-jdk14", version.ref = "slf4j" }
spark-catalyst = { module = "org.apache.spark:spark-catalyst_2.13", version.ref = "spark" }
spark-core = { module = "org.apache.spark:spark-core_2.13", version.ref = "spark" }
spark-hive = { module = "org.apache.spark:spark-hive_2.13", version.ref = "spark" }
spark-sql = { module = "org.apache.spark:spark-sql_2.13", version.ref = "spark" }
spark-catalyst-3-4-2-12 = { module = "org.apache.spark:spark-catalyst_2.12", version.ref = "spark-3-4" }
spark-core-3-4-2-12 = { module = "org.apache.spark:spark-core_2.12", version.ref = "spark-3-4" }
spark-hive-3-4-2-12 = { module = "org.apache.spark:spark-hive_2.12", version.ref = "spark-3-4" }
spark-sql-3-4-2-12 = { module = "org.apache.spark:spark-sql_2.12", version.ref = "spark-3-4" }
spark-catalyst-3-5-2-12 = { module = "org.apache.spark:spark-catalyst_2.12", version.ref = "spark-3-5" }
spark-core-3-5-2-12 = { module = "org.apache.spark:spark-core_2.12", version.ref = "spark-3-5" }
spark-hive-3-5-2-12 = { module = "org.apache.spark:spark-hive_2.12", version.ref = "spark-3-5" }
spark-sql-3-5-2-12 = { module = "org.apache.spark:spark-sql_2.12", version.ref = "spark-3-5" }
spark-catalyst-4-0-2-13 = { module = "org.apache.spark:spark-catalyst_2.13", version.ref = "spark-4-0" }
spark-core-4-0-2-13 = { module = "org.apache.spark:spark-core_2.13", version.ref = "spark-4-0" }
spark-hive-4-0-2-13 = { module = "org.apache.spark:spark-hive_2.13", version.ref = "spark-4-0" }
spark-sql-4-0-2-13 = { module = "org.apache.spark:spark-sql_2.13", version.ref = "spark-4-0" }

[bundles]
jackson = [ "jackson-databind", "jackson-annotations", "jackson-datatype-jdk8", "jackson-dataformat-yaml" ]
Expand Down
3 changes: 3 additions & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ include(
"isthmus",
"isthmus-cli",
"spark",
"spark:spark-3.4_2.12",
"spark:spark-3.5_2.12",
"spark:spark-4.0_2.13",
"examples:substrait-spark",
"examples:isthmus-api",
)
2 changes: 1 addition & 1 deletion spark/.scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
runner.dialect = scala212
runner.dialect = scala213

# Version is required to make sure IntelliJ picks the right version
version = 3.8.1
Expand Down
241 changes: 241 additions & 0 deletions spark/README-MULTI-VARIANT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
# Multi-Variant Spark/Scala Build System

This document describes how to build and publish multiple Spark/Scala variants of the substrait-spark module.

## Supported Variants

The substrait-spark module supports three build variants:

| Variant | Spark Version | Scala Version | Classifier | Subproject |
|---------|---------------|---------------|------------|------------|
| Spark 3.4 | 3.4.4 | 2.12.20 | `spark34_2.12` | `:spark:spark-3.4_2.12` |
| Spark 3.5 | 3.5.4 | 2.12.20 | `spark35_2.12` | `:spark:spark-3.5_2.12` |
| Spark 4.0 | 4.0.2 | 2.13.18 | `spark40_2.13` | `:spark:spark-4.0_2.13` |

## Architecture

The build system uses **Gradle subprojects** for each variant.

### Project Structure

```
spark/
├── build.gradle.kts # Orchestrator project
├── src/ # Shared source code
│ ├── main/
│ │ ├── scala/ # Common code for all versions
│ │ ├── spark-3.4/ # Spark 3.4 specific implementations
│ │ ├── spark-3.5/ # Spark 3.5 specific implementations
│ │ └── spark-4.0/ # Spark 4.0 specific implementations
│ └── test/
│ ├── scala/ # Common test code
│ ├── spark-3.4/ # Spark 3.4 specific tests
│ ├── spark-3.5/ # Spark 3.5 specific tests
│ └── spark-4.0/ # Spark 4.0 specific tests
├── spark-3.4_2.12/
│ └── build.gradle.kts # Spark 3.4 variant build
├── spark-3.5_2.12/
│ └── build.gradle.kts # Spark 3.5 variant build
└── spark-4.0_2.13/
└── build.gradle.kts # Spark 4.0 variant build
```

Each subproject references the shared source code in `../src/` using Gradle's source set configuration.

## Building Variants

### Build a Specific Variant

Build a specific variant using its subproject path:

```bash
# Build Spark 3.4 with Scala 2.12
./gradlew :spark:spark-3.4_2.12:build

# Build Spark 3.5 with Scala 2.12
./gradlew :spark:spark-3.5_2.12:build

# Build Spark 4.0 with Scala 2.13
./gradlew :spark:spark-4.0_2.13:build
```

### Build All Variants

To build all variants:

```bash
./gradlew :spark:build
```

## Publishing Variants

### Publish to Local Maven Repository

Publish a specific variant:

```bash
# Publish Spark 3.4 with Scala 2.12
./gradlew :spark:spark-3.4_2.12:publishToMavenLocal

# Publish Spark 3.5 with Scala 2.12
./gradlew :spark:spark-3.5_2.12:publishToMavenLocal

# Publish Spark 4.0 with Scala 2.13
./gradlew :spark:spark-4.0_2.13:publishToMavenLocal
```

### Publish All Variants

To publish all variants to your local Maven repository:

```bash
./gradlew :spark:publishAllVariants
```

Published artifacts will be available at:
```
~/.m2/repository/io/substrait/{classifier}/{version}/
```

For example:
- `~/.m2/repository/io/substrait/spark34_2.12/0.78.0/`
- `~/.m2/repository/io/substrait/spark35_2.12/0.78.0/`
- `~/.m2/repository/io/substrait/spark40_2.13/0.78.0/`

### Publish to Maven Central Portal

Publish all variants to Maven Central:

```bash
./gradlew :spark:publishAllVariantsToCentralPortal
```

Or publish a specific variant:

```bash
./gradlew :spark:spark-4.0_2.13:publishMaven-publishPublicationToNmcpRepository
```

## Using Published Artifacts

### Maven

Add the appropriate variant as a dependency in your `pom.xml`:

```xml
<!-- Spark 3.4 with Scala 2.12 -->
<dependency>
<groupId>io.substrait</groupId>
<artifactId>spark34_2.12</artifactId>
<version>0.80.0</version>
</dependency>

<!-- Spark 3.5 with Scala 2.12 -->
<dependency>
<groupId>io.substrait</groupId>
<artifactId>spark35_2.12</artifactId>
<version>0.80.0</version>
</dependency>

<!-- Spark 4.0 with Scala 2.13 -->
<dependency>
<groupId>io.substrait</groupId>
<artifactId>spark40_2.13</artifactId>
<version>0.80.0</version>
</dependency>
```

### Gradle

Add the appropriate variant as a dependency in your `build.gradle.kts`:

```kotlin
dependencies {
// Spark 3.4 with Scala 2.12
implementation("io.substrait:spark34_2.12:0.80.0")

// Spark 3.5 with Scala 2.12
implementation("io.substrait:spark35_2.12:0.80.0")

// Spark 4.0 with Scala 2.13
implementation("io.substrait:spark40_2.13:0.80.0")
}
```

## Development Workflow

### Adding Support for a New Spark Version

1. **Create a new subproject directory**:
```bash
mkdir -p spark/spark-4.1_2.13
```

2. **Copy and modify a build.gradle.kts** from an existing variant:
```bash
cp spark/spark-4.0_2.13/build.gradle.kts spark/spark-4.1_2.13/
```

3. **Update the variant configuration** in the new `build.gradle.kts`:
```kotlin
val sparkVersion = "4.1.0"
val scalaVersion = "2.13.18"
val sparkMajorMinor = "4.1"
val scalaBinary = "2.13"
val classifier = "spark41_2.13"
```

4. **Add the subproject** to `settings.gradle.kts`:
```kotlin
include(
// ... existing projects
"spark:spark-4.1_2.13",
)
```

5. **Update the orchestrator** in `spark/build.gradle.kts`:
```kotlin
tasks.register("buildAllVariants") {
dependsOn(
// ... existing variants
":spark:spark-4.1_2.13:build"
)
}
```

6. **Create version-specific source directory**:
```bash
mkdir -p spark/src/main/spark-4.1
mkdir -p spark/src/test/spark-4.1
```

7. **Add version-specific implementations** for classes with API differences

8. **Test the new variant**:
```bash
./gradlew :spark:spark-4.1_2.13:build
```

### Testing Changes Across All Variants

When making changes to common code, test all variants:

```bash
# Quick compilation test for all variants
./gradlew :spark:spark-3.4_2.12:compileScala
./gradlew :spark:spark-3.5_2.12:compileScala
./gradlew :spark:spark-4.0_2.13:compileScala

# Or run full build for all variants
./gradlew :spark:buildAllVariants
```

### Cleaning Build Artifacts

```bash
# Clean a specific variant
./gradlew :spark:spark-4.0_2.13:clean

# Clean all variants
./gradlew :spark:clean
```
Loading