Skip to content

Commit 4239a20

Browse files
committed
feat(sqllogictest): support dynamic catalog configuration per engine
This PR implements issue #1780 by allowing each engine in the sqllogictest framework to configure its own catalog. Changes: - Remove global [catalog] section from schedule parsing - Each engine now creates its own catalog based on engine-specific config - DataFusionEngine reads 'catalog_type' and 'catalog_properties' from config - Default catalog type is 'memory' with a temp warehouse for testing - Support for all catalog types via iceberg-catalog-loader (rest, glue, hms, sql, s3tables) Example configuration: ```toml [engines] df = { type = "datafusion", catalog_type = "rest", catalog_properties = { uri = "http://localhost:8181" } } ``` Closes #1780
1 parent b7ba2e8 commit 4239a20

File tree

6 files changed

+148
-24
lines changed

6 files changed

+148
-24
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ http = "1.2"
8181
iceberg = { version = "0.8.0", path = "./crates/iceberg" }
8282
iceberg-catalog-glue = { version = "0.8.0", path = "./crates/catalog/glue" }
8383
iceberg-catalog-hms = { version = "0.8.0", path = "./crates/catalog/hms" }
84+
iceberg-catalog-loader = { version = "0.8.0", path = "./crates/catalog/loader" }
8485
iceberg-catalog-rest = { version = "0.8.0", path = "./crates/catalog/rest" }
8586
iceberg-catalog-s3tables = { version = "0.8.0", path = "./crates/catalog/s3tables" }
8687
iceberg-catalog-sql = { version = "0.8.0", path = "./crates/catalog/sql" }

crates/sqllogictest/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ datafusion-sqllogictest = { workspace = true }
3232
enum-ordinalize = { workspace = true }
3333
env_logger = { workspace = true }
3434
iceberg = { workspace = true }
35+
iceberg-catalog-loader = { workspace = true }
3536
iceberg-datafusion = { workspace = true }
3637
indicatif = { workspace = true }
3738
log = { workspace = true }

crates/sqllogictest/src/engine/datafusion.rs

Lines changed: 101 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,21 @@ use std::collections::HashMap;
1919
use std::path::{Path, PathBuf};
2020
use std::sync::Arc;
2121

22-
use datafusion::catalog::CatalogProvider;
2322
use datafusion::prelude::{SessionConfig, SessionContext};
2423
use datafusion_sqllogictest::DataFusion;
2524
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
2625
use iceberg::spec::{NestedField, PrimitiveType, Schema, Transform, Type, UnboundPartitionSpec};
2726
use iceberg::{Catalog, CatalogBuilder, NamespaceIdent, TableCreation};
27+
use iceberg_catalog_loader::CatalogLoader;
2828
use iceberg_datafusion::IcebergCatalogProvider;
2929
use indicatif::ProgressBar;
3030
use toml::Table as TomlTable;
3131

3232
use crate::engine::{EngineRunner, run_slt_with_runner};
3333
use crate::error::Result;
3434

35+
const DEFAULT_CATALOG_TYPE: &str = "memory";
36+
3537
pub struct DataFusionEngine {
3638
test_data_path: PathBuf,
3739
session_context: SessionContext,
@@ -59,49 +61,126 @@ impl EngineRunner for DataFusionEngine {
5961
}
6062

6163
impl DataFusionEngine {
64+
/// Create a new DataFusion engine with catalog configuration from the TOML config.
65+
///
66+
/// # Configuration
67+
///
68+
/// The engine reads catalog configuration from the TOML config:
69+
/// - `catalog_type`: The type of catalog to use (e.g., "memory", "rest"). Defaults to "memory".
70+
/// - `catalog_properties`: Additional properties for the catalog (optional).
71+
///
72+
/// # Example configuration
73+
///
74+
/// ```toml
75+
/// [engines]
76+
/// df = { type = "datafusion", catalog_type = "rest", catalog_properties = { uri = "http://localhost:8181" } }
77+
/// ```
6278
pub async fn new(config: TomlTable) -> Result<Self> {
79+
let catalog = Self::create_catalog(&config).await?;
80+
6381
let session_config = SessionConfig::new()
6482
.with_target_partitions(4)
6583
.with_information_schema(true);
6684
let ctx = SessionContext::new_with_config(session_config);
67-
ctx.register_catalog("default", Self::create_catalog(&config).await?);
85+
86+
// Create test namespace and tables in the catalog
87+
Self::setup_test_data(&catalog).await?;
88+
89+
// Register the catalog with DataFusion
90+
let catalog_provider = IcebergCatalogProvider::try_new(catalog)
91+
.await
92+
.map_err(|e| {
93+
crate::error::Error(anyhow::anyhow!("Failed to create catalog provider: {e}"))
94+
})?;
95+
ctx.register_catalog("default", Arc::new(catalog_provider));
6896

6997
Ok(Self {
7098
test_data_path: PathBuf::from("testdata"),
7199
session_context: ctx,
72100
})
73101
}
74102

75-
async fn create_catalog(_: &TomlTable) -> anyhow::Result<Arc<dyn CatalogProvider>> {
76-
// TODO: support dynamic catalog configuration
77-
// See: https://github.com/apache/iceberg-rust/issues/1780
78-
let catalog = MemoryCatalogBuilder::default()
79-
.load(
80-
"memory",
81-
HashMap::from([(
103+
/// Create a catalog from the engine configuration.
104+
///
105+
/// Supported catalog types:
106+
/// - "memory": In-memory catalog (default), useful for testing
107+
/// - "rest": REST catalog
108+
/// - "glue": AWS Glue catalog
109+
/// - "hms": Hive Metastore catalog
110+
/// - "s3tables": S3 Tables catalog
111+
/// - "sql": SQL catalog
112+
async fn create_catalog(config: &TomlTable) -> Result<Arc<dyn Catalog>> {
113+
let catalog_type = config
114+
.get("catalog_type")
115+
.and_then(|v| v.as_str())
116+
.unwrap_or(DEFAULT_CATALOG_TYPE);
117+
118+
let catalog_properties: HashMap<String, String> = config
119+
.get("catalog_properties")
120+
.and_then(|v| v.as_table())
121+
.map(|t| {
122+
t.iter()
123+
.filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string())))
124+
.collect()
125+
})
126+
.unwrap_or_default();
127+
128+
if catalog_type == "memory" {
129+
// Memory catalog is built-in to iceberg crate, not in catalog-loader
130+
// Ensure warehouse is set for memory catalog
131+
let mut props = catalog_properties;
132+
if !props.contains_key(MEMORY_CATALOG_WAREHOUSE) {
133+
// Use a temp directory as default warehouse for testing
134+
props.insert(
82135
MEMORY_CATALOG_WAREHOUSE.to_string(),
83-
"memory://".to_string(),
84-
)]),
85-
)
86-
.await?;
136+
std::env::temp_dir()
137+
.join("iceberg-sqllogictest")
138+
.to_string_lossy()
139+
.to_string(),
140+
);
141+
}
142+
let catalog = MemoryCatalogBuilder::default()
143+
.load("default", props)
144+
.await
145+
.map_err(|e| {
146+
crate::error::Error(anyhow::anyhow!("Failed to load memory catalog: {e}"))
147+
})?;
148+
Ok(Arc::new(catalog))
149+
} else {
150+
// Use catalog-loader for other catalog types
151+
let catalog = CatalogLoader::from(catalog_type)
152+
.load("default".to_string(), catalog_properties)
153+
.await
154+
.map_err(|e| crate::error::Error(anyhow::anyhow!("Failed to load catalog: {e}")))?;
155+
Ok(catalog)
156+
}
157+
}
87158

159+
/// Set up the test namespace and tables in the catalog.
160+
async fn setup_test_data(catalog: &Arc<dyn Catalog>) -> anyhow::Result<()> {
88161
// Create a test namespace for INSERT INTO tests
89162
let namespace = NamespaceIdent::new("default".to_string());
90-
catalog.create_namespace(&namespace, HashMap::new()).await?;
91163

92-
// Create test tables
93-
Self::create_unpartitioned_table(&catalog, &namespace).await?;
94-
Self::create_partitioned_table(&catalog, &namespace).await?;
164+
// Try to create the namespace, ignore if it already exists
165+
if catalog
166+
.create_namespace(&namespace, HashMap::new())
167+
.await
168+
.is_err()
169+
{
170+
// Namespace might already exist, that's ok
171+
}
95172

96-
Ok(Arc::new(
97-
IcebergCatalogProvider::try_new(Arc::new(catalog)).await?,
98-
))
173+
// Create test tables (ignore errors if they already exist)
174+
let _ = Self::create_unpartitioned_table(catalog, &namespace).await;
175+
let _ = Self::create_partitioned_table(catalog, &namespace).await;
176+
177+
Ok(())
99178
}
100179

101180
/// Create an unpartitioned test table with id and name columns
102181
/// TODO: this can be removed when we support CREATE TABLE
103182
async fn create_unpartitioned_table(
104-
catalog: &impl Catalog,
183+
catalog: &Arc<dyn Catalog>,
105184
namespace: &NamespaceIdent,
106185
) -> anyhow::Result<()> {
107186
let schema = Schema::builder()
@@ -128,7 +207,7 @@ impl DataFusionEngine {
128207
/// Partitioned by category using identity transform
129208
/// TODO: this can be removed when we support CREATE TABLE
130209
async fn create_partitioned_table(
131-
catalog: &impl Catalog,
210+
catalog: &Arc<dyn Catalog>,
132211
namespace: &NamespaceIdent,
133212
) -> anyhow::Result<()> {
134213
let schema = Schema::builder()

crates/sqllogictest/src/engine/mod.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ pub trait EngineRunner: Send {
3333
async fn run_slt_file(&mut self, path: &Path) -> Result<()>;
3434
}
3535

36+
/// Load an engine runner based on the engine type and configuration.
37+
/// Each engine is responsible for creating its own catalog based on the
38+
/// `catalog_type` and `catalog_properties` fields in the config.
3639
pub async fn load_engine_runner(
3740
engine_type: &str,
3841
cfg: TomlTable,
@@ -80,14 +83,26 @@ mod tests {
8083
}
8184

8285
#[tokio::test]
83-
async fn test_load_datafusion() {
86+
async fn test_load_datafusion_default_catalog() {
8487
let input = r#"
8588
[engines]
8689
df = { type = "datafusion" }
8790
"#;
8891
let tbl = toml::from_str(input).unwrap();
8992
let result = load_engine_runner(TYPE_DATAFUSION, tbl).await;
9093

91-
assert!(result.is_ok());
94+
assert!(result.is_ok(), "Failed to load engine: {:?}", result.err());
95+
}
96+
97+
#[tokio::test]
98+
async fn test_load_datafusion_with_memory_catalog() {
99+
let input = r#"
100+
[engines]
101+
df = { type = "datafusion", catalog_type = "memory" }
102+
"#;
103+
let tbl = toml::from_str(input).unwrap();
104+
let result = load_engine_runner(TYPE_DATAFUSION, tbl).await;
105+
106+
assert!(result.is_ok(), "Failed to load engine: {:?}", result.err());
92107
}
93108
}

crates/sqllogictest/src/schedule.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,4 +204,31 @@ mod tests {
204204

205205
assert!(result.is_err());
206206
}
207+
208+
#[tokio::test]
209+
async fn test_parse_engines_with_catalog_config() {
210+
let toml_content = r#"
211+
[engines]
212+
df = { type = "datafusion", catalog_type = "memory" }
213+
"#;
214+
215+
let table: TomlTable = toml::from_str(toml_content).unwrap();
216+
let result = Schedule::parse_engines(&table).await;
217+
218+
assert!(result.is_ok());
219+
}
220+
221+
#[tokio::test]
222+
async fn test_parse_engines_default_catalog() {
223+
let toml_content = r#"
224+
[engines]
225+
df = { type = "datafusion" }
226+
"#;
227+
228+
let table: TomlTable = toml::from_str(toml_content).unwrap();
229+
let result = Schedule::parse_engines(&table).await;
230+
231+
// Should default to memory catalog
232+
assert!(result.is_ok());
233+
}
207234
}

0 commit comments

Comments
 (0)