Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions .github/workflows/build-run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
uses: gradle/actions/setup-gradle@v5

- name: Build with Gradle
run: ./gradlew clean build
run: ./gradlew clean build :services:tables:check

- name: Start Docker Containers
run: docker compose -f infra/recipes/docker-compose/oh-only/docker-compose.yml up -d --build
Expand All @@ -34,11 +34,20 @@ jobs:
with:
python-version: '3.12'

- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true

- name: Run Iceberg REST Catalog Smoke Test (PyIceberg)
working-directory: integrations/python/dataloader
run: uv run python scripts/iceberg_rest_catalog_smoke.py ../../../tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/resources/dummy.token

- name: Install dependencies
run: pip install -r scripts/python/requirements.txt

- name: Run Integration Tests
run: python scripts/python/integration_test.py ./tables-test-fixtures/tables-test-fixtures-iceberg-1.2/src/main/resources/dummy.token

- name: Stop Docker Containers
run: docker compose -f infra/recipes/docker-compose/oh-only/docker-compose.yml down
run: docker compose -f infra/recipes/docker-compose/oh-only/docker-compose.yml down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from __future__ import annotations

import pathlib
import sys
import uuid

import requests
from pyiceberg.catalog.rest import RestCatalog

DATABASE_ID = "d3"
BASE_URL = "http://localhost:8000"


def read_token(path: str) -> str:
token = pathlib.Path(path).read_text(encoding="utf-8").strip()
if not token:
raise ValueError(f"Token file is empty: {path}")
return token


def create_table(token: str, table_id: str) -> None:
create_table_url = f"{BASE_URL}/v1/databases/{DATABASE_ID}/tables/"
create_table_payload = {
"tableId": table_id,
"databaseId": DATABASE_ID,
"baseTableVersion": "INITIAL_VERSION",
"clusterId": "LocalFSCluster",
"schema": '{"type": "struct", "fields": [{"id": 1,"required": true,"name": "id","type": "string"},{"id": 2,"required": true,"name": "name","type": "string"},{"id": 3,"required": true,"name": "ts","type": "timestamp"}]}',
"timePartitioning": {"columnName": "ts", "granularity": "HOUR"},
"clustering": [{"columnName": "name"}],
"tableProperties": {"key": "value"},
}
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}

response = requests.post(create_table_url, json=create_table_payload, headers=headers, timeout=30)
if response.status_code != 201:
raise RuntimeError(f"Failed to create smoke table: {response.status_code} {response.text}")


def drop_table(token: str, table_id: str) -> None:
delete_table_url = f"{BASE_URL}/v1/databases/{DATABASE_ID}/tables/{table_id}"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}

response = requests.delete(delete_table_url, headers=headers, timeout=30)
if response.status_code not in (204, 404):
raise RuntimeError(f"Failed to drop smoke table: {response.status_code} {response.text}")


def run_smoke(token: str) -> None:
table_id = f"iceberg_rest_smoke_{uuid.uuid4().hex[:8]}"
table_identifier = (DATABASE_ID, table_id)

create_table(token, table_id)
try:
catalog = RestCatalog(name="openhouse", uri=BASE_URL, token=token, warehouse="openhouse")

exists = catalog.table_exists(table_identifier)
if not exists:
raise AssertionError(f"table_exists returned false for {table_identifier}")

tables = catalog.list_tables((DATABASE_ID,))
if table_identifier not in tables:
raise AssertionError(f"list_tables missing {table_identifier}; got: {tables}")

loaded_table = catalog.load_table(table_identifier)
if loaded_table is None:
raise AssertionError(f"load_table returned None for {table_identifier}")
finally:
drop_table(token, table_id)


def main() -> None:
if len(sys.argv) != 2:
raise SystemExit("Usage: uv run python scripts/iceberg_rest_catalog_smoke.py <token_file>")

token = read_token(sys.argv[1])
run_smoke(token)
print("Iceberg REST catalog smoke test passed")


if __name__ == "__main__":
main()
193 changes: 193 additions & 0 deletions services/tables/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,92 @@ openApi {
apiDocsUrl.set("http://localhost:8000/v3/api-docs")
}

def canonicalIcebergRestSpec = "${rootDir}/spec/iceberg-rest-catalog-open-api.yaml"
def icebergRestSpecVersion = "apache-iceberg-1.10.0"
def icebergRestSpecUrl = "https://raw.githubusercontent.com/apache/iceberg/refs/tags/${icebergRestSpecVersion}/open-api/rest-catalog-open-api.yaml"
def openApiCliVersion = "7.12.0"
def openApiCliDir = "${buildDir}/openapi-cli"
def openApiCliJar = "${openApiCliDir}/openapi-generator-cli-${openApiCliVersion}.jar"
def generatedIcebergRestDir = "${buildDir}/generated/openapi/iceberg-rest-server"

/**
* The Iceberg library has its own hand-written classes (ConfigResponse, LoadTableResponse, etc.)
* that predate the REST OpenAPI spec. These library classes use builders, custom Jackson
* serializers, and kebab-case — they don't follow JavaBean conventions. The OpenAPI codegen
* would normally generate a separate set of standard POJO model classes for the same types.
*
* Rather than maintaining adapters between two representations of the same wire format, we tell
* the codegen to skip model generation (models=false) and reference the Iceberg library classes
* directly in the generated interfaces. This is the same approach used by Apache Polaris
* (see polaris/api/iceberg-service/build.gradle.kts).
*
* Every schema in the spec needs an explicit mapping, otherwise the generated code references
* a class that was never generated. Types added in Iceberg 1.10+ that don't exist in our 1.5.2
* fork are mapped to Object — the generated default-501 methods need something to compile against.
* These Object mappings can be removed when OpenHouse upgrades to Iceberg 1.7+.
*/
// importMappings controls the `import` statement in generated files.
// typeMappings controls the inline type name in method signatures and @Schema annotations.
// Both are needed due to an openapi-generator bug — setting one does not update the other.
def icebergImportMappings = [
// Iceberg REST responses
"CatalogConfig=org.apache.iceberg.rest.responses.ConfigResponse",
"CommitTableResponse=org.apache.iceberg.rest.responses.LoadTableResponse",
"CreateNamespaceResponse=org.apache.iceberg.rest.responses.CreateNamespaceResponse",
"ErrorModel=org.apache.iceberg.rest.responses.ErrorResponse",
"GetNamespaceResponse=org.apache.iceberg.rest.responses.GetNamespaceResponse",
"IcebergErrorResponse=org.apache.iceberg.rest.responses.ErrorResponse",
"ListNamespacesResponse=org.apache.iceberg.rest.responses.ListNamespacesResponse",
"ListTablesResponse=org.apache.iceberg.rest.responses.ListTablesResponse",
"LoadTableResult=org.apache.iceberg.rest.responses.LoadTableResponse",
"LoadViewResult=org.apache.iceberg.rest.responses.LoadTableResponse",
"OAuthError=org.apache.iceberg.rest.responses.ErrorResponse",
"OAuthErrorResponse=org.apache.iceberg.rest.responses.OAuthErrorResponse",
"OAuthTokenResponse=org.apache.iceberg.rest.responses.OAuthTokenResponse",
"UpdateNamespacePropertiesResponse=org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse",
// Iceberg REST requests
"CreateNamespaceRequest=org.apache.iceberg.rest.requests.CreateNamespaceRequest",
"CreateTableRequest=org.apache.iceberg.rest.requests.CreateTableRequest",
"CreateViewRequest=org.apache.iceberg.rest.requests.CreateViewRequest",
"CommitTransactionRequest=org.apache.iceberg.rest.requests.CommitTransactionRequest",
"RegisterTableRequest=org.apache.iceberg.rest.requests.RegisterTableRequest",
"RenameTableRequest=org.apache.iceberg.rest.requests.RenameTableRequest",
"ReportMetricsRequest=org.apache.iceberg.rest.requests.ReportMetricsRequest",
"UpdateNamespacePropertiesRequest=org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest",
// Types not in Iceberg 1.5.2 — mapped to Object so unimplemented default methods compile
"CommitTableRequest=java.lang.Object",
"CommitViewRequest=java.lang.Object",
"FetchPlanningResult=java.lang.Object",
"FetchScanTasksRequest=java.lang.Object",
"FetchScanTasksResult=java.lang.Object",
"LoadCredentialsResponse=java.lang.Object",
"PlanTableScanRequest=java.lang.Object",
"PlanTableScanResult=java.lang.Object",
// OAuth2 model type suppressed by models=false
"TokenType=java.lang.String",
].join(",")

def icebergTypeMappings = [
"CatalogConfig=org.apache.iceberg.rest.responses.ConfigResponse",
"CommitTableResponse=org.apache.iceberg.rest.responses.LoadTableResponse",
"ErrorModel=org.apache.iceberg.rest.responses.ErrorResponse",
"IcebergErrorResponse=org.apache.iceberg.rest.responses.ErrorResponse",
"LoadTableResult=org.apache.iceberg.rest.responses.LoadTableResponse",
"LoadViewResult=org.apache.iceberg.rest.responses.LoadTableResponse",
"OAuthError=org.apache.iceberg.rest.responses.ErrorResponse",
// Types not in Iceberg 1.5.2 — must also be in typeMappings for @Schema annotations
"CommitTableRequest=java.lang.Object",
"CommitViewRequest=java.lang.Object",
"FetchPlanningResult=java.lang.Object",
"FetchScanTasksRequest=java.lang.Object",
"FetchScanTasksResult=java.lang.Object",
"LoadCredentialsResponse=java.lang.Object",
"PlanTableScanRequest=java.lang.Object",
"PlanTableScanResult=java.lang.Object",
"TokenType=java.lang.String",
].join(",")


dependencies {
compileOnly "io.opentelemetry.instrumentation:opentelemetry-instrumentation-annotations:${otel_annotations_version}"
api project(':services:common')
Expand All @@ -41,10 +127,117 @@ dependencies {
api 'org.springframework.security:spring-security-config:5.7.2'
api 'org.springframework.boot:spring-boot-starter-webflux:2.7.8'
implementation 'com.cronutils:cron-utils:9.2.0'
implementation 'org.openapitools:jackson-databind-nullable:0.2.1'
testImplementation 'org.junit.jupiter:junit-jupiter-engine:' + junit_version
testImplementation 'org.springframework.security:spring-security-test:5.7.3'
testImplementation(testFixtures(project(':services:common')))
testImplementation (project(':tables-test-fixtures:tables-test-fixtures_2.12')) {
exclude group: 'com.linkedin.iceberg'
}
}

tasks.register('icebergRestDownloadCodegenCli', Exec) {
commandLine "sh", "${project(':client:common').projectDir}/jar_download.sh",
"-o", openApiCliDir,
"-v", openApiCliVersion
outputs.file(openApiCliJar)
}

tasks.register('icebergRestValidateSpec', Exec) {
dependsOn tasks.named('icebergRestDownloadCodegenCli')
commandLine "java", "-jar", openApiCliJar, "validate", "-i", canonicalIcebergRestSpec
inputs.file(canonicalIcebergRestSpec)
inputs.file(openApiCliJar)
}

tasks.register('icebergRestCodegen', Exec) {
dependsOn tasks.named('icebergRestValidateSpec')
doFirst {
delete generatedIcebergRestDir
}
commandLine "java", "-jar", openApiCliJar, "generate",
"-g", "spring",
"-i", canonicalIcebergRestSpec,
"-o", generatedIcebergRestDir,
"--api-package", "com.linkedin.openhouse.tables.generated.iceberg.api",
"--model-package", "com.linkedin.openhouse.tables.generated.iceberg.model",
"--import-mappings", icebergImportMappings,
"--type-mappings", icebergTypeMappings,
"--global-property", "apis,models=false,apiTests=false,apiDocs=false,modelTests=false,modelDocs=false,supportingFiles=",
"--additional-properties", "interfaceOnly=true,useTags=true,skipDefaultInterface=false,hideGenerationTimestamp=true,useSpringBoot3=false"
// Post-process generated code for Iceberg 1.5.2 compatibility.
//
// Problem: importMappings/typeMappings handle return types and @Schema annotations, but the
// openapi-generator does NOT apply typeMappings to @RequestBody parameter types when
// models=false. This leaves bare references to types that don't exist in Iceberg 1.5.2
// (they were added in 1.10+), causing compilation failures.
//
// Fix: regex-replace those bare type names with Object in generated .java files.
// The regex `\bTypeName\b(?=\s+\w)` matches "TypeName paramName" patterns (i.e., a type
// followed by whitespace and a parameter name) without touching import statements, comments,
// or string literals where the type name isn't used as a declaration.
//
// This workaround can be removed when OpenHouse upgrades to Iceberg 1.7+ where these types
// exist natively. Track via: https://github.com/linkedin/openhouse/issues/XXX
doLast {
def replacements = [
'CommitTableRequest': 'Object',
'CommitViewRequest': 'Object',
'FetchScanTasksRequest': 'Object',
'PlanTableScanRequest': 'Object',
]
fileTree(generatedIcebergRestDir).matching { include '**/*.java' }.each { File f ->
def text = f.text
def original = text
replacements.each { from, to ->
text = text.replaceAll("\\b${from}\\b(?=\\s+\\w)", to)
}
if (text != original) {
f.text = text
}
}
}
inputs.file(canonicalIcebergRestSpec)
inputs.file(openApiCliJar)
outputs.dir(generatedIcebergRestDir)
}

tasks.named('compileJava') {
dependsOn tasks.named('icebergRestCodegen')
source "${generatedIcebergRestDir}/src/main/java"
}

tasks.register('icebergRestDownloadUpstreamSpec') {
description = 'Download upstream Iceberg REST spec for sync verification'
def upstreamFile = new File("${buildDir}/iceberg-rest-spec-upstream-${icebergRestSpecVersion}.yaml")
outputs.file(upstreamFile)
onlyIf { !upstreamFile.exists() }
doLast {
upstreamFile.parentFile.mkdirs()
new URL(icebergRestSpecUrl).withInputStream { is -> upstreamFile.bytes = is.bytes }
}
}

tasks.register('icebergRestVerifySpecSync') {
description = 'Verify vendored Iceberg REST spec matches upstream tag'
dependsOn tasks.named('icebergRestDownloadUpstreamSpec')
def upstreamFile = new File("${buildDir}/iceberg-rest-spec-upstream-${icebergRestSpecVersion}.yaml")
inputs.file(canonicalIcebergRestSpec)
inputs.file(upstreamFile)
outputs.file("${buildDir}/iceberg-rest-spec-sync-verified")
doLast {
def vendored = file(canonicalIcebergRestSpec).text
def upstream = upstreamFile.text
if (vendored != upstream) {
throw new GradleException(
"Vendored spec ${canonicalIcebergRestSpec} does not match upstream ${icebergRestSpecUrl}. " +
"Run: curl -o spec/iceberg-rest-catalog-open-api.yaml ${icebergRestSpecUrl}")
}
file("${buildDir}/iceberg-rest-spec-sync-verified").text = "OK"
}
}

tasks.named('check') {
dependsOn tasks.named('icebergRestValidateSpec')
dependsOn tasks.named('icebergRestVerifySpecSync')
}
Loading
Loading