Skip to content

Commit e8a0829

Browse files
batmnnnpaleolimbotalamb
authored
Allow Logical expression ScalarVariable to represent an extension type or metadata (apache#18243)
## Which issue does this PR close? - Closes apache#18230 ## Rationale for this change Add richer variable metadata by switching Expr::ScalarVariable to store an Arrow Field, allowing planners to retain nullability and metadata when handling @var expressions. ## What changes are included in this PR? This PR updates ScalarVariable to use FieldRef so it can represent extension types and metadata in logical expressions. ## Are these changes tested? Yes ## Are there any user-facing changes? When planning queries with variables (e.g., @foo), the resulting logical expressions carry full field metadata instead of only a data type. This can affect downstream components that inspect nullability or custom metadata. --------- Co-authored-by: Dewey Dunnington <dewey@dunnington.ca> Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent d85595e commit e8a0829

File tree

5 files changed

+60
-22
lines changed

5 files changed

+60
-22
lines changed

datafusion/expr/src/expr.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ pub enum Expr {
316316
/// A named reference to a qualified field in a schema.
317317
Column(Column),
318318
/// A named reference to a variable in a registry.
319-
ScalarVariable(DataType, Vec<String>),
319+
ScalarVariable(FieldRef, Vec<String>),
320320
/// A constant value along with associated [`FieldMetadata`].
321321
Literal(ScalarValue, Option<FieldMetadata>),
322322
/// A binary expression such as "age > 21"
@@ -2529,8 +2529,8 @@ impl HashNode for Expr {
25292529
Expr::Column(column) => {
25302530
column.hash(state);
25312531
}
2532-
Expr::ScalarVariable(data_type, name) => {
2533-
data_type.hash(state);
2532+
Expr::ScalarVariable(field, name) => {
2533+
field.hash(state);
25342534
name.hash(state);
25352535
}
25362536
Expr::Literal(scalar_value, _) => {

datafusion/expr/src/expr_schema.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ impl ExprSchemable for Expr {
121121
Expr::Negative(expr) => expr.get_type(schema),
122122
Expr::Column(c) => Ok(schema.data_type(c)?.clone()),
123123
Expr::OuterReferenceColumn(field, _) => Ok(field.data_type().clone()),
124-
Expr::ScalarVariable(ty, _) => Ok(ty.clone()),
124+
Expr::ScalarVariable(field, _) => Ok(field.data_type().clone()),
125125
Expr::Literal(l, _) => Ok(l.data_type()),
126126
Expr::Case(case) => {
127127
for (_, then_expr) in &case.when_then_expr {
@@ -365,12 +365,8 @@ impl ExprSchemable for Expr {
365365
window_function,
366366
)
367367
.map(|(_, nullable)| nullable),
368-
Expr::Placeholder(Placeholder { id: _, field }) => {
369-
Ok(field.as_ref().map(|f| f.is_nullable()).unwrap_or(true))
370-
}
371-
Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) => {
372-
Ok(true)
373-
}
368+
Expr::ScalarVariable(field, _) => Ok(field.is_nullable()),
369+
Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) => Ok(true),
374370
Expr::IsNull(_)
375371
| Expr::IsNotNull(_)
376372
| Expr::IsTrue(_)
@@ -503,9 +499,7 @@ impl ExprSchemable for Expr {
503499
Expr::OuterReferenceColumn(field, _) => {
504500
Ok(Arc::clone(field).renamed(&schema_name))
505501
}
506-
Expr::ScalarVariable(ty, _) => {
507-
Ok(Arc::new(Field::new(&schema_name, ty.clone(), true)))
508-
}
502+
Expr::ScalarVariable(field, _) => Ok(Arc::clone(field).renamed(&schema_name)),
509503
Expr::Literal(l, metadata) => Ok(Arc::new(
510504
Field::new(&schema_name, l.data_type(), l.is_null())
511505
.with_field_metadata_opt(metadata.as_ref()),
@@ -1206,4 +1200,21 @@ mod tests {
12061200
Ok(&self.field)
12071201
}
12081202
}
1203+
1204+
#[test]
1205+
fn test_scalar_variable() {
1206+
let mut meta = HashMap::new();
1207+
meta.insert("bar".to_string(), "buzz".to_string());
1208+
let meta = FieldMetadata::from(meta);
1209+
1210+
let field = Field::new("foo", DataType::Int32, true);
1211+
let field = meta.add_to_field(field);
1212+
let field = Arc::new(field);
1213+
1214+
let expr = Expr::ScalarVariable(field, vec!["foo".to_string()]);
1215+
1216+
let schema = MockExprSchema::new();
1217+
1218+
assert_eq!(meta, expr.metadata(&schema).unwrap());
1219+
}
12091220
}

datafusion/expr/src/planner.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ use crate::{
2727
AggregateUDF, Expr, GetFieldAccess, ScalarUDF, SortExpr, TableSource, WindowFrame,
2828
WindowFunctionDefinition, WindowUDF,
2929
};
30-
use arrow::datatypes::{DataType, Field, SchemaRef};
30+
use arrow::datatypes::{DataType, Field, FieldRef, SchemaRef};
31+
use datafusion_common::datatype::DataTypeExt;
3132
use datafusion_common::{
3233
DFSchema, Result, TableReference, config::ConfigOptions,
3334
file_options::file_type::FileType, not_impl_err,
@@ -113,6 +114,17 @@ pub trait ContextProvider {
113114
/// A user defined variable is typically accessed via `@var_name`
114115
fn get_variable_type(&self, variable_names: &[String]) -> Option<DataType>;
115116

117+
/// Return metadata about a system/user-defined variable, if any.
118+
///
119+
/// By default, this wraps [`Self::get_variable_type`] in an Arrow [`Field`]
120+
/// with nullable set to `true` and no metadata. Implementations that can
121+
/// provide richer information (such as nullability or extension metadata)
122+
/// should override this method.
123+
fn get_variable_field(&self, variable_names: &[String]) -> Option<FieldRef> {
124+
self.get_variable_type(variable_names)
125+
.map(|data_type| data_type.into_nullable_field_ref())
126+
}
127+
116128
/// Return overall configuration options
117129
fn options(&self) -> &ConfigOptions;
118130

datafusion/sql/src/expr/identifier.rs

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
use arrow::datatypes::FieldRef;
19+
use datafusion_common::datatype::DataTypeExt;
1920
use datafusion_common::{
2021
assert_or_internal_err, exec_datafusion_err, internal_err, not_impl_err,
2122
plan_datafusion_err, plan_err, Column, DFSchema, Result, Span, TableReference,
@@ -39,13 +40,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
3940
if id.value.starts_with('@') {
4041
// TODO: figure out if ScalarVariables should be insensitive.
4142
let var_names = vec![id.value];
42-
let ty = self
43+
let field = self
4344
.context_provider
44-
.get_variable_type(&var_names)
45+
.get_variable_field(&var_names)
46+
.or_else(|| {
47+
self.context_provider
48+
.get_variable_type(&var_names)
49+
.map(|ty| ty.into_nullable_field_ref())
50+
})
4551
.ok_or_else(|| {
4652
plan_datafusion_err!("variable {var_names:?} has no type information")
4753
})?;
48-
Ok(Expr::ScalarVariable(ty, var_names))
54+
Ok(Expr::ScalarVariable(field, var_names))
4955
} else {
5056
// Don't use `col()` here because it will try to
5157
// interpret names with '.' as if they were
@@ -111,13 +117,18 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
111117
.into_iter()
112118
.map(|id| self.ident_normalizer.normalize(id))
113119
.collect();
114-
let ty = self
120+
let field = self
115121
.context_provider
116-
.get_variable_type(&var_names)
122+
.get_variable_field(&var_names)
123+
.or_else(|| {
124+
self.context_provider
125+
.get_variable_type(&var_names)
126+
.map(|ty| ty.into_nullable_field_ref())
127+
})
117128
.ok_or_else(|| {
118129
exec_datafusion_err!("variable {var_names:?} has no type information")
119130
})?;
120-
Ok(Expr::ScalarVariable(ty, var_names))
131+
Ok(Expr::ScalarVariable(field, var_names))
121132
} else {
122133
let ids = ids
123134
.into_iter()

datafusion/sql/src/unparser/expr.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,6 +1783,7 @@ mod tests {
17831783
use arrow::array::{LargeListArray, ListArray};
17841784
use arrow::datatypes::{DataType::Int8, Field, Int32Type, Schema, TimeUnit};
17851785
use ast::ObjectName;
1786+
use datafusion_common::datatype::DataTypeExt;
17861787
use datafusion_common::{Spans, TableReference};
17871788
use datafusion_expr::expr::WildcardOptions;
17881789
use datafusion_expr::{
@@ -2169,12 +2170,15 @@ mod tests {
21692170
r#"TRY_CAST(a AS INTEGER UNSIGNED)"#,
21702171
),
21712172
(
2172-
Expr::ScalarVariable(Int8, vec![String::from("@a")]),
2173+
Expr::ScalarVariable(
2174+
Int8.into_nullable_field_ref(),
2175+
vec![String::from("@a")],
2176+
),
21732177
r#"@a"#,
21742178
),
21752179
(
21762180
Expr::ScalarVariable(
2177-
Int8,
2181+
Int8.into_nullable_field_ref(),
21782182
vec![String::from("@root"), String::from("foo")],
21792183
),
21802184
r#"@root.foo"#,

0 commit comments

Comments
 (0)