From e2b183947abc8fe171ba83898102c1245b223db1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 19:29:56 +0000 Subject: [PATCH 01/38] Add semantic fuzz testing suite with quickcheck Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/Cargo.toml | 1 + mongosql/src/ast/mod.rs | 2 + mongosql/src/ast/semantic_fuzz_test.rs | 371 +++++++++++++++++++++++++ 3 files changed, 374 insertions(+) create mode 100644 mongosql/src/ast/semantic_fuzz_test.rs diff --git a/mongosql/Cargo.toml b/mongosql/Cargo.toml index db6e19bb8..fa61eba9a 100644 --- a/mongosql/Cargo.toml +++ b/mongosql/Cargo.toml @@ -33,6 +33,7 @@ edit-distance = "2.1.0" usererrordisplay-impl = { path = "../usererrordisplay-impl" } derive-new = "0.5.9" base64 = { workspace = true } +mongodb = { workspace = true, features = ["sync"] } [target.'cfg(unix)'.dev-dependencies] criterion = "0.3" diff --git a/mongosql/src/ast/mod.rs b/mongosql/src/ast/mod.rs index fb66b81df..81a1e1b73 100644 --- a/mongosql/src/ast/mod.rs +++ b/mongosql/src/ast/mod.rs @@ -8,3 +8,5 @@ pub use definitions::*; mod pretty_print_fuzz_test; #[cfg(test)] mod pretty_print_test; +#[cfg(test)] +mod semantic_fuzz_test; diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs new file mode 100644 index 000000000..98437fa7c --- /dev/null +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -0,0 +1,371 @@ +#[cfg(test)] +mod tests { + use crate::{ + ast::{ + definitions::*, + pretty_print::PrettyPrint, + }, + build_catalog_from_catalog_schema, + catalog::Catalog, + json_schema::Schema as JsonSchema, + options::{ExcludeNamespacesOption, SqlOptions}, + translate_sql, SchemaCheckingMode, + }; + use lazy_static::lazy_static; + use mongodb::{bson, sync::Client}; + use quickcheck::{Arbitrary, Gen, TestResult}; + use std::collections::BTreeMap; + + const TEST_DB: &str = "test_db"; + const ALL_TYPES_COLLECTION: &str = "all_types"; + const RELATED_DATA_COLLECTION: &str = "related_data"; + + const INT_FIELD: &str = "int_field"; + const LONG_FIELD: &str = "long_field"; + const DOUBLE_FIELD: &str = "double_field"; + const DECIMAL_FIELD: &str = "decimal_field"; + const STRING_FIELD: &str = "string_field"; + const BOOL_FIELD: &str = "bool_field"; + const DATE_FIELD: &str = "date_field"; + const OBJECT_FIELD: &str = "object_field"; + const ARRAY_FIELD: &str = "array_field"; + const NULL_FIELD: &str = "null_field"; + const ID_FIELD: &str = "id"; + const ALL_TYPES_ID_FIELD: &str = "all_types_id"; + const DESCRIPTION_FIELD: &str = "description"; + + fn make_query_semantic(query: &mut Query) { + match query { + Query::Select(select) => make_select_query_semantic(select), + Query::Set(set) => { + make_query_semantic(set.left.as_mut()); + make_query_semantic(set.right.as_mut()); + }, + Query::With(with) => { + make_query_semantic(&mut with.body); + for query in &mut with.queries { + make_query_semantic(&mut query.query); + } + }, + } + } + + fn make_select_query_semantic(query: &mut SelectQuery) { + if query.from_clause.is_some() { + let collection = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION + } else { + RELATED_DATA_COLLECTION + }; + + query.from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: collection.to_string(), + alias: None, + })); + } + + if let SelectBody::Standard(exprs) = &mut query.select_clause.body { + for expr in exprs { + match expr { + SelectExpression::Star => {}, + SelectExpression::Substar(substar) => { + substar.datasource = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION.to_string() + } else { + RELATED_DATA_COLLECTION.to_string() + }; + }, + SelectExpression::Expression(opt_aliased) => { + match opt_aliased { + OptionallyAliasedExpr::Aliased(aliased) => { + make_expression_semantic(&mut aliased.expr); + }, + OptionallyAliasedExpr::Unaliased(expr) => { + make_expression_semantic(expr); + }, + } + }, + } + } + } + + if let Some(expr) = &mut query.where_clause { + make_expression_semantic(expr); + } + + if let Some(group_by) = &mut query.group_by_clause { + for key in &mut group_by.keys { + match key { + OptionallyAliasedExpr::Aliased(aliased) => { + make_expression_semantic(&mut aliased.expr); + }, + OptionallyAliasedExpr::Unaliased(expr) => { + make_expression_semantic(expr); + }, + } + } + + for agg in &mut group_by.aggregations { + make_expression_semantic(&mut agg.expr); + } + } + + if let Some(expr) = &mut query.having_clause { + make_expression_semantic(expr); + } + + if let Some(order_by) = &mut query.order_by_clause { + for sort_spec in &mut order_by.sort_specs { + if let SortKey::Simple(expr) = &mut sort_spec.key { + make_expression_semantic(expr); + } + } + } + + if query.limit.is_some() { + query.limit = Some(10); // Use a reasonable limit + } + + if query.offset.is_some() { + query.offset = Some(0); // Use a reasonable offset + } + } + + fn make_expression_semantic(expr: &mut Expression) { + match expr { + Expression::Identifier(_) => { + let collection = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION + } else { + RELATED_DATA_COLLECTION + }; + + let field = match collection { + ALL_TYPES_COLLECTION => { + let fields = [ + INT_FIELD, LONG_FIELD, DOUBLE_FIELD, DECIMAL_FIELD, + STRING_FIELD, BOOL_FIELD, DATE_FIELD, OBJECT_FIELD, + ARRAY_FIELD, NULL_FIELD + ]; + fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] + }, + _ => { + let fields = [ID_FIELD, ALL_TYPES_ID_FIELD, DESCRIPTION_FIELD]; + fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] + } + }; + + *expr = Expression::Identifier(field.to_string()); + }, + Expression::Binary(binary) => { + make_expression_semantic(&mut binary.left); + make_expression_semantic(&mut binary.right); + + binary.op = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => BinaryOp::Add, + 1 => BinaryOp::And, + _ => BinaryOp::Or, + }; + }, + Expression::Unary(unary) => { + make_expression_semantic(&mut unary.expr); + unary.op = UnaryOp::Not; // Only use Not as it's definitely supported + }, + Expression::Function(func) => { + if let FunctionArguments::Args(args) = &mut func.args { + for arg in args { + make_expression_semantic(arg); + } + } + + func.function = FunctionName::Count; + }, + Expression::Cast(cast) => { + make_expression_semantic(&mut cast.expr); + + cast.to = Type::Int32; + }, + Expression::Case(case) => { + if let Some(expr) = &mut case.expr { + make_expression_semantic(expr); + } + + for branch in &mut case.when_branch { + make_expression_semantic(&mut branch.when); + make_expression_semantic(&mut branch.then); + } + + if let Some(expr) = &mut case.else_branch { + make_expression_semantic(expr); + } + }, + Expression::Literal(lit) => { + *lit = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => Literal::Integer(42), + 1 => Literal::Double(std::f64::consts::PI), + 2 => Literal::Boolean(true), + _ => Literal::Null, + }; + }, + _ => { + *expr = Expression::Identifier(INT_FIELD.to_string()); + } + } + } + + #[test] + fn prop_semantic_queries_translate() { + fn property(mut query: Query) -> TestResult { + make_query_semantic(&mut query); + + let sql = match query.pretty_print() { + Err(_) => return TestResult::discard(), + Ok(sql) => sql, + }; + + let sql_options = SqlOptions { + schema_checking_mode: SchemaCheckingMode::Strict, + exclude_namespaces: ExcludeNamespacesOption::IncludeNamespaces, + allow_order_by_missing_columns: false, + }; + + let result = translate_sql(TEST_DB, &sql, &TEST_CATALOG, sql_options); + + TestResult::from_bool(result.is_ok()) + } + + quickcheck::QuickCheck::new() + .gen(Gen::new(0)) + .quickcheck(property as fn(Query) -> TestResult); + } + + lazy_static! { + static ref MONGODB_URI: String = format!( + "mongodb://localhost:{}", + std::env::var("MDB_TEST_LOCAL_PORT").unwrap_or_else(|_| "27017".to_string()) + ); + } + + fn get_mongodb_client() -> Option { + Client::with_uri_str(&*MONGODB_URI).ok() + } + + #[test] + fn prop_aggregation_pipelines_run() { + // Skip test if MongoDB connection fails + let _client = match get_mongodb_client() { + Some(client) => client, + None => { + println!("Skipping test: MongoDB connection failed"); + return; + } + }; + + fn property(mut query: Query) -> TestResult { + make_query_semantic(&mut query); + + let client = match get_mongodb_client() { + Some(client) => client, + None => return TestResult::discard(), // Skip if no MongoDB connection + }; + + let sql = match query.pretty_print() { + Err(_) => return TestResult::discard(), + Ok(sql) => sql, + }; + + let sql_options = SqlOptions { + schema_checking_mode: SchemaCheckingMode::Strict, + exclude_namespaces: ExcludeNamespacesOption::IncludeNamespaces, + allow_order_by_missing_columns: false, + }; + + let translation = match translate_sql(TEST_DB, &sql, &TEST_CATALOG, sql_options) { + Ok(t) => t, + Err(_) => return TestResult::discard(), // Skip if translation fails + }; + + let target_db = translation.target_db; + let target_collection = translation.target_collection.unwrap_or_else(|| "unknown".to_string()); + + let pipeline_docs = match translation.pipeline { + bson::Bson::Array(array) => { + let mut docs = Vec::new(); + for value in array { + if let bson::Bson::Document(doc) = value { + docs.push(doc); + } else { + return TestResult::discard(); // Not a valid pipeline + } + } + docs + }, + _ => return TestResult::discard(), // Not a valid pipeline + }; + + let result = client + .database(&target_db) + .collection::(&target_collection) + .aggregate(pipeline_docs) + .run(); + + TestResult::from_bool(result.is_ok()) + } + + quickcheck::QuickCheck::new() + .gen(Gen::new(0)) + .quickcheck(property as fn(Query) -> TestResult); + } + + lazy_static! { + static ref TEST_CATALOG: Catalog = { + let mut catalog_schema: BTreeMap> = BTreeMap::new(); + let mut db_schema: BTreeMap = BTreeMap::new(); + + db_schema.insert( + "all_types".to_string(), + serde_json::from_str(r#"{ + "bsonType": "object", + "properties": { + "int_field": { "bsonType": "int" }, + "long_field": { "bsonType": "long" }, + "double_field": { "bsonType": "double" }, + "decimal_field": { "bsonType": "decimal" }, + "string_field": { "bsonType": "string" }, + "bool_field": { "bsonType": "bool" }, + "date_field": { "bsonType": "date" }, + "object_field": { + "bsonType": "object", + "properties": { + "nested_field": { "bsonType": "string" } + } + }, + "array_field": { + "bsonType": "array", + "items": { "bsonType": "int" } + }, + "null_field": { "bsonType": "null" } + }, + "additionalProperties": false + }"#).unwrap(), + ); + + db_schema.insert( + "related_data".to_string(), + serde_json::from_str(r#"{ + "bsonType": "object", + "properties": { + "id": { "bsonType": "int" }, + "all_types_id": { "bsonType": "int" }, + "description": { "bsonType": "string" } + }, + "additionalProperties": false + }"#).unwrap(), + ); + + catalog_schema.insert("test_db".to_string(), db_schema); + build_catalog_from_catalog_schema(catalog_schema).unwrap() + }; + } +} From 9829e29c12678c4699924055b4e52d101af6f523 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 19:58:27 +0000 Subject: [PATCH 02/38] Update semantic fuzz tests to ensure query validity Co-Authored-By: matthew.chiaravalloti@mongodb.com --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 23f472730..596d3270c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2464,6 +2464,7 @@ dependencies = [ "lalrpop-util", "lazy_static", "linked-hash-map", + "mongodb", "mongosql-datastructures", "pprof 0.4.5", "quickcheck", From c13d8835c87b6eebb69b85003fe7d99d51cb46e2 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 21:40:03 +0000 Subject: [PATCH 03/38] Enhance make_expression_semantic to ensure type compatibility Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 833 ++++++++++++++++++++++--- 1 file changed, 761 insertions(+), 72 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 98437fa7c..f6447708b 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -19,73 +19,236 @@ mod tests { const TEST_DB: &str = "test_db"; const ALL_TYPES_COLLECTION: &str = "all_types"; const RELATED_DATA_COLLECTION: &str = "related_data"; + const NUMERIC_COLLECTION: &str = "numeric_data"; + const ARRAY_COLLECTION: &str = "array_data"; - const INT_FIELD: &str = "int_field"; - const LONG_FIELD: &str = "long_field"; - const DOUBLE_FIELD: &str = "double_field"; - const DECIMAL_FIELD: &str = "decimal_field"; - const STRING_FIELD: &str = "string_field"; - const BOOL_FIELD: &str = "bool_field"; - const DATE_FIELD: &str = "date_field"; - const OBJECT_FIELD: &str = "object_field"; - const ARRAY_FIELD: &str = "array_field"; - const NULL_FIELD: &str = "null_field"; - const ID_FIELD: &str = "id"; - const ALL_TYPES_ID_FIELD: &str = "all_types_id"; - const DESCRIPTION_FIELD: &str = "description"; + const INT_FIELD: &str = "int_field"; // Int32 + const LONG_FIELD: &str = "long_field"; // Int64 + const DOUBLE_FIELD: &str = "double_field"; // Double + const DECIMAL_FIELD: &str = "decimal_field"; // Decimal128 + const NEGATIVE_INT_FIELD: &str = "neg_int_field"; // Int32 (negative) + const ZERO_INT_FIELD: &str = "zero_int_field"; // Int32 (zero) + + const STRING_FIELD: &str = "string_field"; // String + const EMPTY_STRING_FIELD: &str = "empty_string_field"; // String (empty) + const DESCRIPTION_FIELD: &str = "description"; // String + + const BOOL_FIELD: &str = "bool_field"; // Boolean + const TRUE_FIELD: &str = "true_field"; // Boolean (true) + const FALSE_FIELD: &str = "false_field"; // Boolean (false) + + const DATE_FIELD: &str = "date_field"; // Date + const TIMESTAMP_FIELD: &str = "timestamp_field"; // Timestamp + const TIME_FIELD: &str = "time_field"; // Time + + const OBJECT_FIELD: &str = "object_field"; // Document + const NESTED_OBJECT_FIELD: &str = "nested_object_field"; // Document with nested fields + const ARRAY_FIELD: &str = "array_field"; // Array of Int32 + const STRING_ARRAY_FIELD: &str = "string_array_field"; // Array of String + const MIXED_ARRAY_FIELD: &str = "mixed_array_field"; // Array of mixed types + + const NULL_FIELD: &str = "null_field"; // Null + const OBJECTID_FIELD: &str = "objectid_field"; // ObjectId + const ID_FIELD: &str = "id"; // Int32 (for related_data) + const ALL_TYPES_ID_FIELD: &str = "all_types_id"; // Int32 (foreign key) + + fn field_type(field_name: &str) -> Type { + match field_name { + INT_FIELD | NEGATIVE_INT_FIELD | ZERO_INT_FIELD => Type::Int32, + LONG_FIELD => Type::Int64, + DOUBLE_FIELD => Type::Double, + DECIMAL_FIELD => Type::Decimal128, + + STRING_FIELD | EMPTY_STRING_FIELD | DESCRIPTION_FIELD => Type::String, + + BOOL_FIELD | TRUE_FIELD | FALSE_FIELD => Type::Boolean, + + DATE_FIELD => Type::Date, + TIMESTAMP_FIELD => Type::Timestamp, + TIME_FIELD => Type::Time, + + OBJECT_FIELD | NESTED_OBJECT_FIELD => Type::Document, + ARRAY_FIELD | STRING_ARRAY_FIELD | MIXED_ARRAY_FIELD => Type::Array, + + NULL_FIELD => Type::Null, + OBJECTID_FIELD => Type::ObjectId, + ID_FIELD | ALL_TYPES_ID_FIELD => Type::Int32, + + _ => Type::String, + } + } + + #[allow(dead_code)] + fn is_numeric_field(field_name: &str) -> bool { + matches!(field_type(field_name), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128) + } + + #[allow(dead_code)] + fn is_boolean_field(field_name: &str) -> bool { + field_type(field_name) == Type::Boolean + } + + #[allow(dead_code)] + fn is_string_field(field_name: &str) -> bool { + field_type(field_name) == Type::String + } fn make_query_semantic(query: &mut Query) { match query { - Query::Select(select) => make_select_query_semantic(select), + Query::Select(select) => { + if select.from_clause.is_none() { + let collection = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION + } else { + RELATED_DATA_COLLECTION + }; + + select.from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: collection.to_string(), + alias: None, + })); + } + make_select_query_semantic(select); + }, Query::Set(set) => { make_query_semantic(set.left.as_mut()); make_query_semantic(set.right.as_mut()); + + match set.op { + SetOperator::Union | SetOperator::UnionAll => { + set.op = SetOperator::Union; + }, + } }, Query::With(with) => { + if with.queries.is_empty() { + with.queries.push(NamedQuery { + name: format!("cte_{}", usize::arbitrary(&mut Gen::new(0)) % 100), + query: Query::Select(SelectQuery { + select_clause: SelectClause { + set_quantifier: SetQuantifier::All, + body: SelectBody::Standard(vec![ + SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( + make_numeric_expression() + )) + ]), + }, + from_clause: Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: ALL_TYPES_COLLECTION.to_string(), + alias: None, + })), + where_clause: None, + group_by_clause: None, + having_clause: None, + order_by_clause: None, + limit: None, + offset: None, + }), + }); + } + + if let Query::Select(select) = &mut *with.body { + if select.from_clause.is_none() { + let collection = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION + } else { + RELATED_DATA_COLLECTION + }; + + select.from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: collection.to_string(), + alias: None, + })); + } + } + make_query_semantic(&mut with.body); + for query in &mut with.queries { make_query_semantic(&mut query.query); + + if query.name.is_empty() { + query.name = format!("cte_{}", usize::arbitrary(&mut Gen::new(0)) % 100); + } } }, } } fn make_select_query_semantic(query: &mut SelectQuery) { - if query.from_clause.is_some() { - let collection = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION - } else { - RELATED_DATA_COLLECTION - }; - - query.from_clause = Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: collection.to_string(), - alias: None, - })); - } + let collection = if bool::arbitrary(&mut Gen::new(0)) { + ALL_TYPES_COLLECTION + } else { + RELATED_DATA_COLLECTION + }; + + query.from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: collection.to_string(), + alias: None, + })); - if let SelectBody::Standard(exprs) = &mut query.select_clause.body { - for expr in exprs { - match expr { - SelectExpression::Star => {}, - SelectExpression::Substar(substar) => { - substar.datasource = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION.to_string() - } else { - RELATED_DATA_COLLECTION.to_string() - }; - }, - SelectExpression::Expression(opt_aliased) => { - match opt_aliased { - OptionallyAliasedExpr::Aliased(aliased) => { - make_expression_semantic(&mut aliased.expr); - }, - OptionallyAliasedExpr::Unaliased(expr) => { - make_expression_semantic(expr); - }, + match &mut query.select_clause.body { + SelectBody::Standard(exprs) => { + if exprs.is_empty() { + exprs.push(SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( + make_numeric_expression() + ))); + } + + for expr in exprs { + match expr { + SelectExpression::Star => {}, + SelectExpression::Substar(substar) => { + if substar.datasource.is_empty() || + !substar.datasource.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { + substar.datasource = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => ALL_TYPES_COLLECTION.to_string(), + 1 => RELATED_DATA_COLLECTION.to_string(), + 2 => NUMERIC_COLLECTION.to_string(), + _ => ARRAY_COLLECTION.to_string(), + }; + } + }, + SelectExpression::Expression(opt_aliased) => { + match opt_aliased { + OptionallyAliasedExpr::Aliased(aliased) => { + make_expression_semantic(&mut aliased.expr); + }, + OptionallyAliasedExpr::Unaliased(expr) => { + make_expression_semantic(expr); + }, + } + }, + } + } + }, + SelectBody::Values(values) => { + if values.is_empty() { + values.push(SelectValuesExpression::Expression(make_numeric_expression())); + } + + for value in values { + match value { + SelectValuesExpression::Expression(expr) => { + make_expression_semantic(expr); + }, + SelectValuesExpression::Substar(substar) => { + if substar.datasource.is_empty() || + !substar.datasource.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { + substar.datasource = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => ALL_TYPES_COLLECTION.to_string(), + 1 => RELATED_DATA_COLLECTION.to_string(), + 2 => NUMERIC_COLLECTION.to_string(), + _ => ARRAY_COLLECTION.to_string(), + }; + } } - }, + } } } } @@ -132,27 +295,115 @@ mod tests { } } + // Generate a numeric expression (Int32, Int64, Double, Decimal128) + fn make_numeric_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 8 { + 0 => Expression::Identifier(INT_FIELD.to_string()), + 1 => Expression::Identifier(LONG_FIELD.to_string()), + 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), + 3 => Expression::Identifier(DECIMAL_FIELD.to_string()), + 4 => Expression::Literal(Literal::Integer(42)), + 5 => Expression::Literal(Literal::Integer(-10)), + 6 => Expression::Literal(Literal::Long(1000000)), + _ => Expression::Literal(Literal::Double(std::f64::consts::PI)), + } + } + + fn make_boolean_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => Expression::Identifier(BOOL_FIELD.to_string()), + 1 => Expression::Identifier(TRUE_FIELD.to_string()), + 2 => Expression::Identifier(FALSE_FIELD.to_string()), + _ => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), + } + } + + fn make_string_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => Expression::Identifier(STRING_FIELD.to_string()), + 1 => Expression::Identifier(EMPTY_STRING_FIELD.to_string()), + _ => Expression::Identifier(DESCRIPTION_FIELD.to_string()), + } + } + + fn make_array_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => Expression::Identifier(ARRAY_FIELD.to_string()), + 1 => Expression::Identifier(STRING_ARRAY_FIELD.to_string()), + _ => Expression::Identifier(MIXED_ARRAY_FIELD.to_string()), + } + } + + fn make_date_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => Expression::Identifier(DATE_FIELD.to_string()), + 1 => Expression::Identifier(TIMESTAMP_FIELD.to_string()), + _ => Expression::Identifier(TIME_FIELD.to_string()), + } + } + + #[allow(dead_code)] + fn make_object_expression() -> Expression { + match usize::arbitrary(&mut Gen::new(0)) % 2 { + 0 => Expression::Identifier(OBJECT_FIELD.to_string()), + _ => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), + } + } + + #[allow(dead_code)] + fn make_comparison_expression() -> Expression { + let left = make_numeric_expression(); + let right = make_numeric_expression(); + + let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => ComparisonOp::Eq, + 1 => ComparisonOp::Neq, + 2 => ComparisonOp::Lt, + 3 => ComparisonOp::Lte, + 4 => ComparisonOp::Gt, + _ => ComparisonOp::Gte, + }; + + Expression::Binary(BinaryExpr { + left: Box::new(left), + op: BinaryOp::Comparison(comp_op), + right: Box::new(right), + }) + } + fn make_expression_semantic(expr: &mut Expression) { match expr { Expression::Identifier(_) => { - let collection = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION - } else { - RELATED_DATA_COLLECTION + let collection = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => ALL_TYPES_COLLECTION, + 1 => RELATED_DATA_COLLECTION, + 2 => NUMERIC_COLLECTION, + _ => ARRAY_COLLECTION, }; let field = match collection { ALL_TYPES_COLLECTION => { let fields = [ INT_FIELD, LONG_FIELD, DOUBLE_FIELD, DECIMAL_FIELD, - STRING_FIELD, BOOL_FIELD, DATE_FIELD, OBJECT_FIELD, - ARRAY_FIELD, NULL_FIELD + NEGATIVE_INT_FIELD, ZERO_INT_FIELD, STRING_FIELD, + EMPTY_STRING_FIELD, BOOL_FIELD, TRUE_FIELD, FALSE_FIELD, + DATE_FIELD, TIMESTAMP_FIELD, TIME_FIELD, OBJECT_FIELD, + NESTED_OBJECT_FIELD, ARRAY_FIELD, STRING_ARRAY_FIELD, + MIXED_ARRAY_FIELD, NULL_FIELD, OBJECTID_FIELD ]; fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] }, - _ => { + RELATED_DATA_COLLECTION => { let fields = [ID_FIELD, ALL_TYPES_ID_FIELD, DESCRIPTION_FIELD]; fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] + }, + NUMERIC_COLLECTION => { + let fields = ["id", "int_value", "long_value", "double_value", "decimal_value", "calculated_field"]; + fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] + }, + _ => { // ARRAY_COLLECTION + let fields = ["id", "int_array", "string_array", "object_array", "nested_array"]; + fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] } }; @@ -162,61 +413,415 @@ mod tests { make_expression_semantic(&mut binary.left); make_expression_semantic(&mut binary.right); - binary.op = match usize::arbitrary(&mut Gen::new(0)) % 3 { + // Generate a more diverse set of binary operations + let op = match usize::arbitrary(&mut Gen::new(0)) % 8 { 0 => BinaryOp::Add, - 1 => BinaryOp::And, - _ => BinaryOp::Or, + 1 => BinaryOp::Sub, + 2 => BinaryOp::Mul, + 3 => BinaryOp::Div, + 4 => BinaryOp::And, + 5 => BinaryOp::Or, + 6 => BinaryOp::Concat, + _ => BinaryOp::Comparison(ComparisonOp::Eq), }; + + binary.op = op; + + match op { + BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { + // Ensure numeric operands for arithmetic operations + *binary.left = Box::new(make_numeric_expression()); + *binary.right = Box::new(make_numeric_expression()); + }, + BinaryOp::And | BinaryOp::Or => { + // Ensure boolean operands for logical operations + *binary.left = Box::new(make_boolean_expression()); + *binary.right = Box::new(make_boolean_expression()); + }, + BinaryOp::Concat => { + *binary.left = Box::new(make_string_expression()); + *binary.right = Box::new(make_string_expression()); + }, + BinaryOp::In | BinaryOp::NotIn => { + *binary.right = Box::new(make_array_expression()); + *binary.left = Box::new(make_numeric_expression()); + }, + BinaryOp::Comparison(comp_op) => { + let left_type = expression_type(&binary.left); + let right_type = expression_type(&binary.right); + + if !are_types_compatible(left_type, right_type) { + match comp_op { + ComparisonOp::Eq | ComparisonOp::Neq => { + *binary.left = Box::new(make_numeric_expression()); + *binary.right = Box::new(make_numeric_expression()); + }, + ComparisonOp::Lt | ComparisonOp::Lte | + ComparisonOp::Gt | ComparisonOp::Gte => { + *binary.left = Box::new(make_numeric_expression()); + *binary.right = Box::new(make_numeric_expression()); + } + } + } + } + } }, Expression::Unary(unary) => { make_expression_semantic(&mut unary.expr); - unary.op = UnaryOp::Not; // Only use Not as it's definitely supported - }, - Expression::Function(func) => { - if let FunctionArguments::Args(args) = &mut func.args { - for arg in args { - make_expression_semantic(arg); - } - } - func.function = FunctionName::Count; + let op = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => UnaryOp::Not, + 1 => UnaryOp::Neg, + _ => UnaryOp::Pos, + }; + + unary.op = op; + + match op { + UnaryOp::Not => { + *unary.expr = Box::new(make_boolean_expression()); + }, + UnaryOp::Neg | UnaryOp::Pos => { + *unary.expr = Box::new(make_numeric_expression()); + }, + } }, Expression::Cast(cast) => { make_expression_semantic(&mut cast.expr); - cast.to = Type::Int32; + let source_type = expression_type(&cast.expr); + + cast.to = match source_type { + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => Type::Int32, + 1 => Type::Int64, + 2 => Type::Double, + _ => Type::Decimal128, + } + }, + Type::String => { + Type::Int32 + }, + Type::Boolean => { + Type::Int32 + }, + _ => { + Type::Int32 + } + }; }, Expression::Case(case) => { if let Some(expr) = &mut case.expr { make_expression_semantic(expr); } + if case.when_branch.is_empty() { + case.when_branch.push(WhenBranch { + when: Box::new(make_boolean_expression()), + then: Box::new(make_numeric_expression()), + }); + } + for branch in &mut case.when_branch { - make_expression_semantic(&mut branch.when); + *branch.when = make_boolean_expression(); + make_expression_semantic(&mut branch.then); } if let Some(expr) = &mut case.else_branch { make_expression_semantic(expr); + + if !case.when_branch.is_empty() { + let then_type = expression_type(&case.when_branch[0].then); + let else_type = expression_type(expr); + + if !are_types_compatible(then_type, else_type) { + match then_type { + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + *expr = Box::new(make_numeric_expression()); + }, + Type::Boolean => { + *expr = Box::new(make_boolean_expression()); + }, + Type::String => { + *expr = Box::new(make_string_expression()); + }, + _ => { + *expr = Box::new(make_numeric_expression()); + } + } + } + } } }, Expression::Literal(lit) => { - *lit = match usize::arbitrary(&mut Gen::new(0)) % 4 { + *lit = match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => Literal::Integer(42), - 1 => Literal::Double(std::f64::consts::PI), - 2 => Literal::Boolean(true), + 1 => Literal::Integer(-10), + 2 => Literal::Long(1000000), + 3 => Literal::Double(std::f64::consts::PI), + 4 => Literal::Boolean(bool::arbitrary(&mut Gen::new(0))), _ => Literal::Null, }; }, + Expression::Array(array) => { + if array.is_empty() { + array.push(make_numeric_expression()); + } + + for elem in &mut *array { + make_expression_semantic(elem); + } + + if !array.is_empty() { + let first_type = expression_type(&array[0]); + for elem in array.iter_mut().skip(1) { + let elem_type = expression_type(elem); + if !are_types_compatible(first_type, elem_type) { + match first_type { + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + *elem = make_numeric_expression(); + }, + Type::Boolean => { + *elem = make_boolean_expression(); + }, + Type::String => { + *elem = make_string_expression(); + }, + _ => { + *elem = make_numeric_expression(); + } + } + } + } + } + }, + Expression::StringConstructor(_str_constructor) => { + *expr = make_string_expression(); + }, + Expression::Function(func) => { + if let FunctionArguments::Args(args) = &mut func.args { + for arg in &mut *args { + make_expression_semantic(arg); + } + + if !args.is_empty() { + match func.function { + FunctionName::Split | FunctionName::LTrim | FunctionName::RTrim => { + args[0] = make_string_expression(); + }, + FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => { + args[0] = make_numeric_expression(); + }, + _ => { + args[0] = make_numeric_expression(); + } + } + } + } + }, + Expression::TypeAssertion(type_assertion) => { + make_expression_semantic(&mut type_assertion.expr); + }, + Expression::Between(between) => { + make_expression_semantic(&mut between.arg); + make_expression_semantic(&mut between.min); + make_expression_semantic(&mut between.max); + + *between.arg = make_numeric_expression(); + *between.min = make_numeric_expression(); + *between.max = make_numeric_expression(); + }, + Expression::Tuple(_) => { + *expr = make_numeric_expression(); + }, + Expression::Trim(trim) => { + make_expression_semantic(&mut trim.arg); + *trim.arg = make_string_expression(); + }, + Expression::Is(is_expr) => { + make_expression_semantic(&mut is_expr.expr); + + match is_expr.target_type { + TypeOrMissing::Missing => { + }, + TypeOrMissing::Number => { + is_expr.expr = Box::new(make_numeric_expression()); + }, + TypeOrMissing::Type(typ) => { + match typ { + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + is_expr.expr = Box::new(make_numeric_expression()); + }, + Type::String => { + is_expr.expr = Box::new(make_string_expression()); + }, + Type::Boolean => { + is_expr.expr = Box::new(make_boolean_expression()); + }, + Type::Date | Type::Timestamp | Type::Time => { + is_expr.expr = Box::new(make_date_expression()); + }, + Type::Array => { + is_expr.expr = Box::new(make_array_expression()); + }, + _ => { + } + } + } + } + }, + Expression::Extract(extract) => { + make_expression_semantic(&mut extract.arg); + *extract.arg = make_date_expression(); + + extract.extract_spec = match usize::arbitrary(&mut Gen::new(0)) % 7 { + 0 => DatePart::Year, + 1 => DatePart::Month, + 2 => DatePart::Day, + 3 => DatePart::Hour, + 4 => DatePart::Minute, + 5 => DatePart::Second, + _ => DatePart::Millisecond, + }; + }, + Expression::Subpath(subpath) => { + make_expression_semantic(&mut subpath.expr); + + if !matches!(*subpath.expr, Expression::Identifier(_) | Expression::Document(_)) { + *subpath.expr = Expression::Identifier(INT_FIELD.to_string()); + } + + if subpath.subpath.is_empty() || !subpath.subpath.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { + subpath.subpath = INT_FIELD.to_string(); + } + }, _ => { *expr = Expression::Identifier(INT_FIELD.to_string()); } } } + + + fn expression_type(expr: &Expression) -> Type { + match expr { + Expression::Identifier(name) => field_type(name), + Expression::Literal(lit) => match lit { + Literal::Integer(_) => Type::Int32, + Literal::Long(_) => Type::Int64, + Literal::Double(_) => Type::Double, + Literal::Boolean(_) => Type::Boolean, + Literal::Null => Type::Null, + }, + Expression::Binary(binary) => { + match binary.op { + BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { + let left_type = expression_type(&binary.left); + let right_type = expression_type(&binary.right); + + if left_type == Type::Decimal128 || right_type == Type::Decimal128 { + Type::Decimal128 + } else if left_type == Type::Double || right_type == Type::Double { + Type::Double + } else if left_type == Type::Int64 || right_type == Type::Int64 { + Type::Int64 + } else { + Type::Int32 + } + }, + BinaryOp::And | BinaryOp::Or => Type::Boolean, + BinaryOp::Comparison(_) => Type::Boolean, + _ => Type::String, // Default for other operations + } + }, + Expression::Unary(unary) => { + match unary.op { + UnaryOp::Not => Type::Boolean, + UnaryOp::Neg => expression_type(&unary.expr), + UnaryOp::Pos => expression_type(&unary.expr), + } + }, + Expression::Cast(cast) => cast.to, + _ => Type::String, // Default for other expression types + } + } + + fn are_types_compatible(type1: Type, type2: Type) -> bool { + if type1 == type2 { + return true; + } + + let is_type1_numeric = matches!(type1, Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128); + let is_type2_numeric = matches!(type2, Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128); + + if is_type1_numeric && is_type2_numeric { + return true; + } + + + false + } + + #[allow(dead_code)] + fn ensure_numeric_expression(expr: &mut Expression) { + if !matches!(expression_type(expr), Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128) { + *expr = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => Expression::Identifier(INT_FIELD.to_string()), + 1 => Expression::Identifier(LONG_FIELD.to_string()), + 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), + _ => Expression::Literal(Literal::Integer(42)), + }; + } + } + + #[allow(dead_code)] + fn ensure_boolean_expression(expr: &mut Expression) { + if expression_type(expr) != Type::Boolean { + *expr = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => Expression::Identifier(BOOL_FIELD.to_string()), + 1 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), + _ => { + Expression::Binary(BinaryExpr { + left: Box::new(Expression::Identifier(INT_FIELD.to_string())), + op: BinaryOp::Comparison(ComparisonOp::Eq), + right: Box::new(Expression::Literal(Literal::Integer(42))), + }) + } + }; + } + } + + fn contains_invalid_select_query(query: &Query) -> bool { + match query { + Query::Select(select) => { + select.from_clause.is_none() && matches!(select.select_clause.body, SelectBody::Values(_)) + }, + Query::Set(set) => { + contains_invalid_select_query(&set.left) || contains_invalid_select_query(&set.right) + }, + Query::With(with) => { + if contains_invalid_select_query(&with.body) { + return true; + } + + for named_query in &with.queries { + if contains_invalid_select_query(&named_query.query) { + return true; + } + } + false + } + } + } #[test] fn prop_semantic_queries_translate() { fn property(mut query: Query) -> TestResult { + if contains_invalid_select_query(&query) { + return TestResult::discard(); + } + make_query_semantic(&mut query); let sql = match query.pretty_print() { @@ -263,6 +868,10 @@ mod tests { }; fn property(mut query: Query) -> TestResult { + if contains_invalid_select_query(&query) { + return TestResult::discard(); + } + make_query_semantic(&mut query); let client = match get_mongodb_client() { @@ -332,20 +941,48 @@ mod tests { "long_field": { "bsonType": "long" }, "double_field": { "bsonType": "double" }, "decimal_field": { "bsonType": "decimal" }, + "neg_int_field": { "bsonType": "int" }, + "zero_int_field": { "bsonType": "int" }, "string_field": { "bsonType": "string" }, + "empty_string_field": { "bsonType": "string" }, "bool_field": { "bsonType": "bool" }, + "true_field": { "bsonType": "bool" }, + "false_field": { "bsonType": "bool" }, "date_field": { "bsonType": "date" }, + "timestamp_field": { "bsonType": "timestamp" }, + "time_field": { "bsonType": "timestamp" }, "object_field": { "bsonType": "object", "properties": { "nested_field": { "bsonType": "string" } } }, + "nested_object_field": { + "bsonType": "object", + "properties": { + "nested_int": { "bsonType": "int" }, + "nested_string": { "bsonType": "string" }, + "nested_object": { + "bsonType": "object", + "properties": { + "deeply_nested": { "bsonType": "bool" } + } + } + } + }, "array_field": { "bsonType": "array", "items": { "bsonType": "int" } }, - "null_field": { "bsonType": "null" } + "string_array_field": { + "bsonType": "array", + "items": { "bsonType": "string" } + }, + "mixed_array_field": { + "bsonType": "array" + }, + "null_field": { "bsonType": "null" }, + "objectid_field": { "bsonType": "objectId" } }, "additionalProperties": false }"#).unwrap(), @@ -364,6 +1001,58 @@ mod tests { }"#).unwrap(), ); + db_schema.insert( + "numeric_data".to_string(), + serde_json::from_str(r#"{ + "bsonType": "object", + "properties": { + "id": { "bsonType": "int" }, + "int_value": { "bsonType": "int" }, + "long_value": { "bsonType": "long" }, + "double_value": { "bsonType": "double" }, + "decimal_value": { "bsonType": "decimal" }, + "calculated_field": { "bsonType": "double" } + }, + "additionalProperties": false + }"#).unwrap(), + ); + + db_schema.insert( + "array_data".to_string(), + serde_json::from_str(r#"{ + "bsonType": "object", + "properties": { + "id": { "bsonType": "int" }, + "int_array": { + "bsonType": "array", + "items": { "bsonType": "int" } + }, + "string_array": { + "bsonType": "array", + "items": { "bsonType": "string" } + }, + "object_array": { + "bsonType": "array", + "items": { + "bsonType": "object", + "properties": { + "key": { "bsonType": "string" }, + "value": { "bsonType": "int" } + } + } + }, + "nested_array": { + "bsonType": "array", + "items": { + "bsonType": "array", + "items": { "bsonType": "int" } + } + } + }, + "additionalProperties": false + }"#).unwrap(), + ); + catalog_schema.insert("test_db".to_string(), db_schema); build_catalog_from_catalog_schema(catalog_schema).unwrap() }; From ba3d9c1265b1e4ecf83d3dde3a5384a79f7b476e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 21:46:57 +0000 Subject: [PATCH 04/38] Fix Box::new() type errors in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index f6447708b..6c6e1f9b1 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -430,21 +430,21 @@ mod tests { match op { BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { // Ensure numeric operands for arithmetic operations - *binary.left = Box::new(make_numeric_expression()); - *binary.right = Box::new(make_numeric_expression()); + *binary.left = make_numeric_expression(); + *binary.right = make_numeric_expression(); }, BinaryOp::And | BinaryOp::Or => { // Ensure boolean operands for logical operations - *binary.left = Box::new(make_boolean_expression()); - *binary.right = Box::new(make_boolean_expression()); + *binary.left = make_boolean_expression(); + *binary.right = make_boolean_expression(); }, BinaryOp::Concat => { - *binary.left = Box::new(make_string_expression()); - *binary.right = Box::new(make_string_expression()); + *binary.left = make_string_expression(); + *binary.right = make_string_expression(); }, BinaryOp::In | BinaryOp::NotIn => { - *binary.right = Box::new(make_array_expression()); - *binary.left = Box::new(make_numeric_expression()); + *binary.right = make_array_expression(); + *binary.left = make_numeric_expression(); }, BinaryOp::Comparison(comp_op) => { let left_type = expression_type(&binary.left); @@ -453,13 +453,13 @@ mod tests { if !are_types_compatible(left_type, right_type) { match comp_op { ComparisonOp::Eq | ComparisonOp::Neq => { - *binary.left = Box::new(make_numeric_expression()); - *binary.right = Box::new(make_numeric_expression()); + *binary.left = make_numeric_expression(); + *binary.right = make_numeric_expression(); }, ComparisonOp::Lt | ComparisonOp::Lte | ComparisonOp::Gt | ComparisonOp::Gte => { - *binary.left = Box::new(make_numeric_expression()); - *binary.right = Box::new(make_numeric_expression()); + *binary.left = make_numeric_expression(); + *binary.right = make_numeric_expression(); } } } @@ -479,10 +479,10 @@ mod tests { match op { UnaryOp::Not => { - *unary.expr = Box::new(make_boolean_expression()); + *unary.expr = make_boolean_expression(); }, UnaryOp::Neg | UnaryOp::Pos => { - *unary.expr = Box::new(make_numeric_expression()); + *unary.expr = make_numeric_expression(); }, } }, From 7849d65201d7ed742734b4741268f970a255620e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 18:17:44 +0000 Subject: [PATCH 05/38] Update ORDER BY clause handling to replace SortKey::Positional with SortKey::Simple Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 47 ++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 6c6e1f9b1..79ac12ad5 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -279,9 +279,52 @@ mod tests { } if let Some(order_by) = &mut query.order_by_clause { + let mut valid_identifiers = Vec::new(); + match &query.select_clause.body { + SelectBody::Standard(exprs) => { + for expr in exprs { + match expr { + SelectExpression::Expression(opt_aliased) => { + match opt_aliased { + OptionallyAliasedExpr::Aliased(aliased) => { + valid_identifiers.push(Expression::Identifier(aliased.alias.clone())); + }, + OptionallyAliasedExpr::Unaliased(expr) => { + if let Expression::Identifier(id) = expr { + valid_identifiers.push(Expression::Identifier(id.clone())); + } + }, + } + }, + _ => {}, // Skip Star and Substar expressions + } + } + }, + SelectBody::Values(values) => { + for value in values { + if let SelectValuesExpression::Expression(expr) = value { + if let Expression::Identifier(id) = expr { + valid_identifiers.push(Expression::Identifier(id.clone())); + } + } + } + } + } + + if valid_identifiers.is_empty() { + valid_identifiers.push(Expression::Identifier("_id".to_string())); + } + for sort_spec in &mut order_by.sort_specs { - if let SortKey::Simple(expr) = &mut sort_spec.key { - make_expression_semantic(expr); + match &mut sort_spec.key { + SortKey::Simple(expr) => { + make_expression_semantic(expr); + }, + SortKey::Positional(pos) => { + let idx = (*pos as usize) % valid_identifiers.len(); + let identifier = valid_identifiers[idx].clone(); + sort_spec.key = SortKey::Simple(identifier); + } } } } From 00da8531a400ed408d0df22b52dbda313c70f5e8 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 19:54:01 +0000 Subject: [PATCH 06/38] Implement SemanticVisitor for semantic fuzz testing Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 328 ++++++++++++++++++++++++- 1 file changed, 318 insertions(+), 10 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 79ac12ad5..124f2ae68 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -3,6 +3,7 @@ mod tests { use crate::{ ast::{ definitions::*, + definitions::visitor::Visitor, pretty_print::PrettyPrint, }, build_catalog_from_catalog_schema, @@ -94,6 +95,7 @@ mod tests { field_type(field_name) == Type::String } + #[allow(dead_code)] fn make_query_semantic(query: &mut Query) { match query { Query::Select(select) => { @@ -179,6 +181,7 @@ mod tests { } } + #[allow(dead_code)] fn make_select_query_semantic(query: &mut SelectQuery) { let collection = if bool::arbitrary(&mut Gen::new(0)) { ALL_TYPES_COLLECTION @@ -302,10 +305,8 @@ mod tests { }, SelectBody::Values(values) => { for value in values { - if let SelectValuesExpression::Expression(expr) = value { - if let Expression::Identifier(id) = expr { - valid_identifiers.push(Expression::Identifier(id.clone())); - } + if let SelectValuesExpression::Expression(Expression::Identifier(id)) = value { + valid_identifiers.push(Expression::Identifier(id.clone())); } } } @@ -414,6 +415,7 @@ mod tests { }) } + #[allow(dead_code)] fn make_expression_semantic(expr: &mut Expression) { match expr { Expression::Identifier(_) => { @@ -775,18 +777,80 @@ mod tests { }, BinaryOp::And | BinaryOp::Or => Type::Boolean, BinaryOp::Comparison(_) => Type::Boolean, - _ => Type::String, // Default for other operations + BinaryOp::In | BinaryOp::NotIn => Type::Boolean, + BinaryOp::Concat => Type::String, } }, Expression::Unary(unary) => { match unary.op { UnaryOp::Not => Type::Boolean, - UnaryOp::Neg => expression_type(&unary.expr), - UnaryOp::Pos => expression_type(&unary.expr), + UnaryOp::Neg | UnaryOp::Pos => expression_type(&unary.expr), } }, Expression::Cast(cast) => cast.to, - _ => Type::String, // Default for other expression types + Expression::Between(_) => Type::Boolean, + Expression::Case(case) => case.else_branch.as_ref() + .map_or_else( + || case.when_branch.first().map_or(Type::Null, |wb| expression_type(&wb.then)), + |else_expr| expression_type(else_expr) + ), + Expression::Function(func) => match func.function { + // Aggregation functions + FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => Type::Double, + FunctionName::Count => Type::Int64, + FunctionName::AddToSet | FunctionName::AddToArray => Type::Array, + FunctionName::First | FunctionName::Last => Type::String, // Depends on the argument type + + // String functions + FunctionName::Substring => Type::String, + FunctionName::Lower | FunctionName::Upper => Type::String, + FunctionName::LTrim | FunctionName::RTrim => Type::String, + FunctionName::Replace => Type::String, + + // Date functions + FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => Type::Date, + FunctionName::CurrentTimestamp => Type::Date, + FunctionName::Year | FunctionName::Month | FunctionName::Week => Type::Int32, + FunctionName::DayOfWeek | FunctionName::DayOfMonth | FunctionName::DayOfYear => Type::Int32, + FunctionName::Hour | FunctionName::Minute | FunctionName::Second | FunctionName::Millisecond => Type::Int32, + + // Numeric functions + FunctionName::Abs | FunctionName::Ceil | FunctionName::Floor | FunctionName::Round => Type::Double, + FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Type::Double, + FunctionName::Pow => Type::Double, + FunctionName::Mod => Type::Int32, + + // Other functions + FunctionName::Coalesce => Type::String, // Depends on arguments + FunctionName::NullIf => Type::String, // Depends on arguments + FunctionName::Size => Type::Int32, + + _ => Type::String, // Default for other functions + }, + Expression::Array(_) => Type::Array, + Expression::Document(_) => Type::Document, + Expression::Access(access) => { + let parent_type = expression_type(&access.expr); + if parent_type == Type::Document { + Type::String // Field access from a document, assuming String for simplicity + } else if parent_type == Type::Array { + Type::Int32 // Array access assumes numeric index + } else { + Type::String // Default case + } + }, + Expression::Subquery(_) => Type::Array, + Expression::Exists(_) => Type::Boolean, + Expression::SubqueryComparison(_) => Type::Boolean, + Expression::Subpath(_) => Type::String, + Expression::Is(_) => Type::Boolean, + Expression::Like(_) => Type::Boolean, + Expression::StringConstructor(_) => Type::String, + Expression::Tuple(_) => Type::Array, + Expression::TypeAssertion(type_assertion) => type_assertion.target_type, + Expression::Trim(_) => Type::String, + Expression::DateFunction(_) => Type::Date, + Expression::Extract(_) => Type::Int32, } } @@ -806,6 +870,248 @@ mod tests { false } + struct SemanticVisitor { + target_type: Option, + } + + impl SemanticVisitor { + fn visit_select_query(&mut self, node: SelectQuery) -> SelectQuery { + let select_clause = node.select_clause.walk(self); + + let from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: ALL_TYPES_COLLECTION.to_string(), + alias: None, + })); + + let old_target_type = self.target_type; + self.target_type = Some(Type::Boolean); + let where_clause = node.where_clause.map(|wc| wc.walk(self)); + self.target_type = old_target_type; + + let group_by_clause = node.group_by_clause.map(|gbc| gbc.walk(self)); + + let old_target_type = self.target_type; + self.target_type = Some(Type::Boolean); + let having_clause = node.having_clause.map(|hc| hc.walk(self)); + self.target_type = old_target_type; + + let order_by_clause = node.order_by_clause.map(|obc| obc.walk(self)); + + let limit = node.limit.map(|_| 10); + let offset = node.offset.map(|_| 0); + + SelectQuery { + select_clause, + from_clause, + where_clause, + group_by_clause, + having_clause, + order_by_clause, + limit, + offset, + } + } + + fn determine_child_target_type(&self, node: &Expression) -> Option { + match node { + Expression::Binary(binary) => { + match binary.op { + BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { + Some(Type::Double) + }, + BinaryOp::And | BinaryOp::Or => { + Some(Type::Boolean) + }, + BinaryOp::Comparison(_) => { + None + }, + BinaryOp::In | BinaryOp::NotIn => { + None + }, + BinaryOp::Concat => { + Some(Type::String) + }, + } + }, + Expression::Unary(unary) => { + match unary.op { + UnaryOp::Not => Some(Type::Boolean), + UnaryOp::Neg | UnaryOp::Pos => Some(Type::Double), + } + }, + Expression::Function(func) => { + match func.function { + // Aggregation functions + FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => Some(Type::Double), + FunctionName::Count => None, // Count can take any type + FunctionName::AddToSet | FunctionName::AddToArray => None, // Can add any type to arrays + + // String functions + FunctionName::Substring | FunctionName::Lower | FunctionName::Upper => Some(Type::String), + FunctionName::LTrim | FunctionName::RTrim => Some(Type::String), + FunctionName::Replace => Some(Type::String), + + // Date functions + FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => Some(Type::Date), + FunctionName::CurrentTimestamp => Some(Type::Date), + + // Numeric functions + FunctionName::Abs | FunctionName::Ceil | FunctionName::Floor | FunctionName::Round => Some(Type::Double), + FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Some(Type::Double), + FunctionName::Pow => Some(Type::Double), + + // Other functions + FunctionName::Coalesce | FunctionName::NullIf => None, + FunctionName::Size => None, + + _ => None, // Default for other functions + } + }, + Expression::Case(_case) => { + Some(Type::Boolean) + }, + Expression::Between(_) => { + None + }, + Expression::Is(_) | Expression::Like(_) | Expression::Exists(_) => { + None + }, + _ => None, // Default for other expression types + } + } + + #[allow(dead_code)] + fn visit_expression(&mut self, node: Expression) -> Expression { + if self.target_type.is_none() { + return node.walk(self); + } + + let node_type = expression_type(&node); + let target_type = self.target_type.unwrap(); + + let node = if node_type != target_type && !are_types_compatible(node_type, target_type) { + match target_type { + Type::Boolean => make_boolean_expression(), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), + Type::String => make_string_expression(), + Type::Array => make_array_expression(), + Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), + Type::Document => make_object_expression(), + _ => node, // Keep the original node for other types + } + } else { + node + }; + + let child_target_type = self.determine_child_target_type(&node); + + let old_target_type = self.target_type; + self.target_type = child_target_type; + let new_node = node.walk(self); + self.target_type = old_target_type; + + new_node + } + } + + impl visitor::Visitor for SemanticVisitor { + fn visit_query(&mut self, node: Query) -> Query { + match node { + Query::Select(select_query) => { + Query::Select(self.visit_select_query(select_query)) + }, + Query::Set(set_query) => { + let old_target_type = self.target_type; + self.target_type = None; // Clear target_type when walking set operations + let walked = Query::Set(set_query.walk(self)); + self.target_type = old_target_type; + walked + }, + Query::With(with_query) => { + let old_target_type = self.target_type; + self.target_type = None; // Clear target_type when walking with queries + let walked = Query::With(with_query.walk(self)); + self.target_type = old_target_type; + walked + }, + } + } + + #[allow(dead_code)] + fn visit_expression(&mut self, node: Expression) -> Expression { + let mut expr = node.clone(); + self.visit_expression_custom(&mut expr); + expr + } + + fn visit_sort_key(&mut self, node: SortKey) -> SortKey { + match node { + SortKey::Positional(_) => { + SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())) + }, + _ => node.walk(self), + } + } + } + + impl SemanticVisitor { + fn visit_expression_custom(&mut self, node: &mut Expression) { + if self.target_type.is_none() { + return; + } + + let node_type = expression_type(node); + let target_type = self.target_type.unwrap(); + + if node_type != target_type && !are_types_compatible(node_type, target_type) { + *node = match target_type { + Type::Boolean => make_boolean_expression(), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), + Type::String => make_string_expression(), + Type::Array => make_array_expression(), + Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), + Type::Document => make_object_expression(), + _ => node.clone(), // Keep the original node for other types + }; + } + + let child_target_type = self.determine_child_target_type(node); + + let old_target_type = self.target_type; + self.target_type = child_target_type; + + match node { + Expression::Binary(bin) => { + self.visit_expression_custom(&mut bin.left); + self.visit_expression_custom(&mut bin.right); + }, + Expression::Unary(un) => { + self.visit_expression_custom(&mut un.expr); + }, + Expression::Function(func) => { + if let FunctionArguments::Args(args) = &mut func.args { + for arg in args { + self.visit_expression_custom(arg); + } + } + }, + Expression::Case(case) => { + for branch in &mut case.when_branch { + self.visit_expression_custom(&mut branch.when); + self.visit_expression_custom(&mut branch.then); + } + if let Some(else_branch) = &mut case.else_branch { + self.visit_expression_custom(else_branch); + } + }, + _ => {} + } + + self.target_type = old_target_type; + } + } + #[allow(dead_code)] fn ensure_numeric_expression(expr: &mut Expression) { if !matches!(expression_type(expr), Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128) { @@ -865,7 +1171,8 @@ mod tests { return TestResult::discard(); } - make_query_semantic(&mut query); + let mut v = SemanticVisitor { target_type: None }; + query = v.visit_query(query); let sql = match query.pretty_print() { Err(_) => return TestResult::discard(), @@ -915,7 +1222,8 @@ mod tests { return TestResult::discard(); } - make_query_semantic(&mut query); + let mut v = SemanticVisitor { target_type: None }; + query = v.visit_query(query); let client = match get_mongodb_client() { Some(client) => client, From fce2a9db1dc144023f36820ce207a046c0719c6a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 20:28:33 +0000 Subject: [PATCH 07/38] Remove unused functions marked as #[allow(dead_code)] Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 587 +------------------------ 1 file changed, 1 insertion(+), 586 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 124f2ae68..d5d015f06 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -79,265 +79,11 @@ mod tests { } } - #[allow(dead_code)] - fn is_numeric_field(field_name: &str) -> bool { - matches!(field_type(field_name), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128) - } - - #[allow(dead_code)] - fn is_boolean_field(field_name: &str) -> bool { - field_type(field_name) == Type::Boolean - } - - #[allow(dead_code)] - fn is_string_field(field_name: &str) -> bool { - field_type(field_name) == Type::String - } - #[allow(dead_code)] - fn make_query_semantic(query: &mut Query) { - match query { - Query::Select(select) => { - if select.from_clause.is_none() { - let collection = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION - } else { - RELATED_DATA_COLLECTION - }; - - select.from_clause = Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: collection.to_string(), - alias: None, - })); - } - make_select_query_semantic(select); - }, - Query::Set(set) => { - make_query_semantic(set.left.as_mut()); - make_query_semantic(set.right.as_mut()); - - match set.op { - SetOperator::Union | SetOperator::UnionAll => { - set.op = SetOperator::Union; - }, - } - }, - Query::With(with) => { - if with.queries.is_empty() { - with.queries.push(NamedQuery { - name: format!("cte_{}", usize::arbitrary(&mut Gen::new(0)) % 100), - query: Query::Select(SelectQuery { - select_clause: SelectClause { - set_quantifier: SetQuantifier::All, - body: SelectBody::Standard(vec![ - SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( - make_numeric_expression() - )) - ]), - }, - from_clause: Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: ALL_TYPES_COLLECTION.to_string(), - alias: None, - })), - where_clause: None, - group_by_clause: None, - having_clause: None, - order_by_clause: None, - limit: None, - offset: None, - }), - }); - } - - if let Query::Select(select) = &mut *with.body { - if select.from_clause.is_none() { - let collection = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION - } else { - RELATED_DATA_COLLECTION - }; - - select.from_clause = Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: collection.to_string(), - alias: None, - })); - } - } - - make_query_semantic(&mut with.body); - - for query in &mut with.queries { - make_query_semantic(&mut query.query); - - if query.name.is_empty() { - query.name = format!("cte_{}", usize::arbitrary(&mut Gen::new(0)) % 100); - } - } - }, - } - } - #[allow(dead_code)] - fn make_select_query_semantic(query: &mut SelectQuery) { - let collection = if bool::arbitrary(&mut Gen::new(0)) { - ALL_TYPES_COLLECTION - } else { - RELATED_DATA_COLLECTION - }; - - query.from_clause = Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: collection.to_string(), - alias: None, - })); - match &mut query.select_clause.body { - SelectBody::Standard(exprs) => { - if exprs.is_empty() { - exprs.push(SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( - make_numeric_expression() - ))); - } - - for expr in exprs { - match expr { - SelectExpression::Star => {}, - SelectExpression::Substar(substar) => { - if substar.datasource.is_empty() || - !substar.datasource.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { - substar.datasource = match usize::arbitrary(&mut Gen::new(0)) % 4 { - 0 => ALL_TYPES_COLLECTION.to_string(), - 1 => RELATED_DATA_COLLECTION.to_string(), - 2 => NUMERIC_COLLECTION.to_string(), - _ => ARRAY_COLLECTION.to_string(), - }; - } - }, - SelectExpression::Expression(opt_aliased) => { - match opt_aliased { - OptionallyAliasedExpr::Aliased(aliased) => { - make_expression_semantic(&mut aliased.expr); - }, - OptionallyAliasedExpr::Unaliased(expr) => { - make_expression_semantic(expr); - }, - } - }, - } - } - }, - SelectBody::Values(values) => { - if values.is_empty() { - values.push(SelectValuesExpression::Expression(make_numeric_expression())); - } - - for value in values { - match value { - SelectValuesExpression::Expression(expr) => { - make_expression_semantic(expr); - }, - SelectValuesExpression::Substar(substar) => { - if substar.datasource.is_empty() || - !substar.datasource.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { - substar.datasource = match usize::arbitrary(&mut Gen::new(0)) % 4 { - 0 => ALL_TYPES_COLLECTION.to_string(), - 1 => RELATED_DATA_COLLECTION.to_string(), - 2 => NUMERIC_COLLECTION.to_string(), - _ => ARRAY_COLLECTION.to_string(), - }; - } - } - } - } - } - } - - if let Some(expr) = &mut query.where_clause { - make_expression_semantic(expr); - } - if let Some(group_by) = &mut query.group_by_clause { - for key in &mut group_by.keys { - match key { - OptionallyAliasedExpr::Aliased(aliased) => { - make_expression_semantic(&mut aliased.expr); - }, - OptionallyAliasedExpr::Unaliased(expr) => { - make_expression_semantic(expr); - }, - } - } - - for agg in &mut group_by.aggregations { - make_expression_semantic(&mut agg.expr); - } - } - if let Some(expr) = &mut query.having_clause { - make_expression_semantic(expr); - } - - if let Some(order_by) = &mut query.order_by_clause { - let mut valid_identifiers = Vec::new(); - match &query.select_clause.body { - SelectBody::Standard(exprs) => { - for expr in exprs { - match expr { - SelectExpression::Expression(opt_aliased) => { - match opt_aliased { - OptionallyAliasedExpr::Aliased(aliased) => { - valid_identifiers.push(Expression::Identifier(aliased.alias.clone())); - }, - OptionallyAliasedExpr::Unaliased(expr) => { - if let Expression::Identifier(id) = expr { - valid_identifiers.push(Expression::Identifier(id.clone())); - } - }, - } - }, - _ => {}, // Skip Star and Substar expressions - } - } - }, - SelectBody::Values(values) => { - for value in values { - if let SelectValuesExpression::Expression(Expression::Identifier(id)) = value { - valid_identifiers.push(Expression::Identifier(id.clone())); - } - } - } - } - - if valid_identifiers.is_empty() { - valid_identifiers.push(Expression::Identifier("_id".to_string())); - } - - for sort_spec in &mut order_by.sort_specs { - match &mut sort_spec.key { - SortKey::Simple(expr) => { - make_expression_semantic(expr); - }, - SortKey::Positional(pos) => { - let idx = (*pos as usize) % valid_identifiers.len(); - let identifier = valid_identifiers[idx].clone(); - sort_spec.key = SortKey::Simple(identifier); - } - } - } - } - - if query.limit.is_some() { - query.limit = Some(10); // Use a reasonable limit - } - - if query.offset.is_some() { - query.offset = Some(0); // Use a reasonable offset - } - } // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { @@ -415,338 +161,7 @@ mod tests { }) } - #[allow(dead_code)] - fn make_expression_semantic(expr: &mut Expression) { - match expr { - Expression::Identifier(_) => { - let collection = match usize::arbitrary(&mut Gen::new(0)) % 4 { - 0 => ALL_TYPES_COLLECTION, - 1 => RELATED_DATA_COLLECTION, - 2 => NUMERIC_COLLECTION, - _ => ARRAY_COLLECTION, - }; - - let field = match collection { - ALL_TYPES_COLLECTION => { - let fields = [ - INT_FIELD, LONG_FIELD, DOUBLE_FIELD, DECIMAL_FIELD, - NEGATIVE_INT_FIELD, ZERO_INT_FIELD, STRING_FIELD, - EMPTY_STRING_FIELD, BOOL_FIELD, TRUE_FIELD, FALSE_FIELD, - DATE_FIELD, TIMESTAMP_FIELD, TIME_FIELD, OBJECT_FIELD, - NESTED_OBJECT_FIELD, ARRAY_FIELD, STRING_ARRAY_FIELD, - MIXED_ARRAY_FIELD, NULL_FIELD, OBJECTID_FIELD - ]; - fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] - }, - RELATED_DATA_COLLECTION => { - let fields = [ID_FIELD, ALL_TYPES_ID_FIELD, DESCRIPTION_FIELD]; - fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] - }, - NUMERIC_COLLECTION => { - let fields = ["id", "int_value", "long_value", "double_value", "decimal_value", "calculated_field"]; - fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] - }, - _ => { // ARRAY_COLLECTION - let fields = ["id", "int_array", "string_array", "object_array", "nested_array"]; - fields[usize::arbitrary(&mut Gen::new(0)) % fields.len()] - } - }; - - *expr = Expression::Identifier(field.to_string()); - }, - Expression::Binary(binary) => { - make_expression_semantic(&mut binary.left); - make_expression_semantic(&mut binary.right); - - // Generate a more diverse set of binary operations - let op = match usize::arbitrary(&mut Gen::new(0)) % 8 { - 0 => BinaryOp::Add, - 1 => BinaryOp::Sub, - 2 => BinaryOp::Mul, - 3 => BinaryOp::Div, - 4 => BinaryOp::And, - 5 => BinaryOp::Or, - 6 => BinaryOp::Concat, - _ => BinaryOp::Comparison(ComparisonOp::Eq), - }; - - binary.op = op; - - match op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { - // Ensure numeric operands for arithmetic operations - *binary.left = make_numeric_expression(); - *binary.right = make_numeric_expression(); - }, - BinaryOp::And | BinaryOp::Or => { - // Ensure boolean operands for logical operations - *binary.left = make_boolean_expression(); - *binary.right = make_boolean_expression(); - }, - BinaryOp::Concat => { - *binary.left = make_string_expression(); - *binary.right = make_string_expression(); - }, - BinaryOp::In | BinaryOp::NotIn => { - *binary.right = make_array_expression(); - *binary.left = make_numeric_expression(); - }, - BinaryOp::Comparison(comp_op) => { - let left_type = expression_type(&binary.left); - let right_type = expression_type(&binary.right); - - if !are_types_compatible(left_type, right_type) { - match comp_op { - ComparisonOp::Eq | ComparisonOp::Neq => { - *binary.left = make_numeric_expression(); - *binary.right = make_numeric_expression(); - }, - ComparisonOp::Lt | ComparisonOp::Lte | - ComparisonOp::Gt | ComparisonOp::Gte => { - *binary.left = make_numeric_expression(); - *binary.right = make_numeric_expression(); - } - } - } - } - } - }, - Expression::Unary(unary) => { - make_expression_semantic(&mut unary.expr); - - let op = match usize::arbitrary(&mut Gen::new(0)) % 3 { - 0 => UnaryOp::Not, - 1 => UnaryOp::Neg, - _ => UnaryOp::Pos, - }; - - unary.op = op; - - match op { - UnaryOp::Not => { - *unary.expr = make_boolean_expression(); - }, - UnaryOp::Neg | UnaryOp::Pos => { - *unary.expr = make_numeric_expression(); - }, - } - }, - Expression::Cast(cast) => { - make_expression_semantic(&mut cast.expr); - - let source_type = expression_type(&cast.expr); - - cast.to = match source_type { - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - match usize::arbitrary(&mut Gen::new(0)) % 4 { - 0 => Type::Int32, - 1 => Type::Int64, - 2 => Type::Double, - _ => Type::Decimal128, - } - }, - Type::String => { - Type::Int32 - }, - Type::Boolean => { - Type::Int32 - }, - _ => { - Type::Int32 - } - }; - }, - Expression::Case(case) => { - if let Some(expr) = &mut case.expr { - make_expression_semantic(expr); - } - - if case.when_branch.is_empty() { - case.when_branch.push(WhenBranch { - when: Box::new(make_boolean_expression()), - then: Box::new(make_numeric_expression()), - }); - } - - for branch in &mut case.when_branch { - *branch.when = make_boolean_expression(); - - make_expression_semantic(&mut branch.then); - } - - if let Some(expr) = &mut case.else_branch { - make_expression_semantic(expr); - - if !case.when_branch.is_empty() { - let then_type = expression_type(&case.when_branch[0].then); - let else_type = expression_type(expr); - - if !are_types_compatible(then_type, else_type) { - match then_type { - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - *expr = Box::new(make_numeric_expression()); - }, - Type::Boolean => { - *expr = Box::new(make_boolean_expression()); - }, - Type::String => { - *expr = Box::new(make_string_expression()); - }, - _ => { - *expr = Box::new(make_numeric_expression()); - } - } - } - } - } - }, - Expression::Literal(lit) => { - *lit = match usize::arbitrary(&mut Gen::new(0)) % 6 { - 0 => Literal::Integer(42), - 1 => Literal::Integer(-10), - 2 => Literal::Long(1000000), - 3 => Literal::Double(std::f64::consts::PI), - 4 => Literal::Boolean(bool::arbitrary(&mut Gen::new(0))), - _ => Literal::Null, - }; - }, - Expression::Array(array) => { - if array.is_empty() { - array.push(make_numeric_expression()); - } - - for elem in &mut *array { - make_expression_semantic(elem); - } - - if !array.is_empty() { - let first_type = expression_type(&array[0]); - for elem in array.iter_mut().skip(1) { - let elem_type = expression_type(elem); - if !are_types_compatible(first_type, elem_type) { - match first_type { - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - *elem = make_numeric_expression(); - }, - Type::Boolean => { - *elem = make_boolean_expression(); - }, - Type::String => { - *elem = make_string_expression(); - }, - _ => { - *elem = make_numeric_expression(); - } - } - } - } - } - }, - Expression::StringConstructor(_str_constructor) => { - *expr = make_string_expression(); - }, - Expression::Function(func) => { - if let FunctionArguments::Args(args) = &mut func.args { - for arg in &mut *args { - make_expression_semantic(arg); - } - - if !args.is_empty() { - match func.function { - FunctionName::Split | FunctionName::LTrim | FunctionName::RTrim => { - args[0] = make_string_expression(); - }, - FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => { - args[0] = make_numeric_expression(); - }, - _ => { - args[0] = make_numeric_expression(); - } - } - } - } - }, - Expression::TypeAssertion(type_assertion) => { - make_expression_semantic(&mut type_assertion.expr); - }, - Expression::Between(between) => { - make_expression_semantic(&mut between.arg); - make_expression_semantic(&mut between.min); - make_expression_semantic(&mut between.max); - - *between.arg = make_numeric_expression(); - *between.min = make_numeric_expression(); - *between.max = make_numeric_expression(); - }, - Expression::Tuple(_) => { - *expr = make_numeric_expression(); - }, - Expression::Trim(trim) => { - make_expression_semantic(&mut trim.arg); - *trim.arg = make_string_expression(); - }, - Expression::Is(is_expr) => { - make_expression_semantic(&mut is_expr.expr); - - match is_expr.target_type { - TypeOrMissing::Missing => { - }, - TypeOrMissing::Number => { - is_expr.expr = Box::new(make_numeric_expression()); - }, - TypeOrMissing::Type(typ) => { - match typ { - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - is_expr.expr = Box::new(make_numeric_expression()); - }, - Type::String => { - is_expr.expr = Box::new(make_string_expression()); - }, - Type::Boolean => { - is_expr.expr = Box::new(make_boolean_expression()); - }, - Type::Date | Type::Timestamp | Type::Time => { - is_expr.expr = Box::new(make_date_expression()); - }, - Type::Array => { - is_expr.expr = Box::new(make_array_expression()); - }, - _ => { - } - } - } - } - }, - Expression::Extract(extract) => { - make_expression_semantic(&mut extract.arg); - *extract.arg = make_date_expression(); - - extract.extract_spec = match usize::arbitrary(&mut Gen::new(0)) % 7 { - 0 => DatePart::Year, - 1 => DatePart::Month, - 2 => DatePart::Day, - 3 => DatePart::Hour, - 4 => DatePart::Minute, - 5 => DatePart::Second, - _ => DatePart::Millisecond, - }; - }, - Expression::Subpath(subpath) => { - make_expression_semantic(&mut subpath.expr); - - if !matches!(*subpath.expr, Expression::Identifier(_) | Expression::Document(_)) { - *subpath.expr = Expression::Identifier(INT_FIELD.to_string()); - } - - if subpath.subpath.is_empty() || !subpath.subpath.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') { - subpath.subpath = INT_FIELD.to_string(); - } - }, - _ => { - *expr = Expression::Identifier(INT_FIELD.to_string()); - } - } - } + fn expression_type(expr: &Expression) -> Type { From 0e1c8a58b1900790bf402b630b0f82f5de8972e6 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 21:15:43 +0000 Subject: [PATCH 08/38] Improve semantic visitor implementation: replace wildcard with explicit Expression variants and ensure Tuple expressions are properly handled Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 289 +++++++++++++++++++------ 1 file changed, 227 insertions(+), 62 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index d5d015f06..d48e12b39 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -87,7 +87,7 @@ mod tests { // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 8 { + match usize::arbitrary(&mut Gen::new(0)) % 9 { 0 => Expression::Identifier(INT_FIELD.to_string()), 1 => Expression::Identifier(LONG_FIELD.to_string()), 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), @@ -95,48 +95,177 @@ mod tests { 4 => Expression::Literal(Literal::Integer(42)), 5 => Expression::Literal(Literal::Integer(-10)), 6 => Expression::Literal(Literal::Long(1000000)), - _ => Expression::Literal(Literal::Double(std::f64::consts::PI)), + 7 => Expression::Literal(Literal::Double(std::f64::consts::PI)), + 8 => { + let left = make_numeric_expression(); + let right = make_numeric_expression(); + let op = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => BinaryOp::Add, + 1 => BinaryOp::Sub, + 2 => BinaryOp::Mul, + _ => BinaryOp::Div, + }; + Expression::Binary(BinaryExpr { + left: Box::new(left), + op, + right: Box::new(right), + }) + } } } fn make_boolean_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 4 { + match usize::arbitrary(&mut Gen::new(0)) % 7 { 0 => Expression::Identifier(BOOL_FIELD.to_string()), 1 => Expression::Identifier(TRUE_FIELD.to_string()), 2 => Expression::Identifier(FALSE_FIELD.to_string()), - _ => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), + 3 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), + 4 => { + let left = make_numeric_expression(); + let right = make_numeric_expression(); + let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => ComparisonOp::Eq, + 1 => ComparisonOp::Neq, + 2 => ComparisonOp::Lt, + 3 => ComparisonOp::Lte, + 4 => ComparisonOp::Gt, + _ => ComparisonOp::Gte, + }; + Expression::Binary(BinaryExpr { + left: Box::new(left), + op: BinaryOp::Comparison(comp_op), + right: Box::new(right), + }) + }, + 5 => { + let left = make_boolean_expression(); + let right = make_boolean_expression(); + let op = if bool::arbitrary(&mut Gen::new(0)) { + BinaryOp::And + } else { + BinaryOp::Or + }; + Expression::Binary(BinaryExpr { + left: Box::new(left), + op, + right: Box::new(right), + }) + }, + _ => { + let expr = make_boolean_expression(); + Expression::Unary(UnaryExpr { + op: UnaryOp::Not, + expr: Box::new(expr), + }) + } } } fn make_string_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 3 { + match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(STRING_FIELD.to_string()), 1 => Expression::Identifier(EMPTY_STRING_FIELD.to_string()), - _ => Expression::Identifier(DESCRIPTION_FIELD.to_string()), + 2 => Expression::Identifier(DESCRIPTION_FIELD.to_string()), + 3 => { + // String concatenation + let left = make_string_expression(); + let right = make_string_expression(); + Expression::Binary(BinaryExpr { + left: Box::new(left), + op: BinaryOp::Concat, + right: Box::new(right), + }) + }, + _ => { + // String constructor + let parts = vec![ + StringConstructorPart::String("Hello ".to_string()), + StringConstructorPart::Expression(Box::new(make_string_expression())), + StringConstructorPart::String("!".to_string()), + ]; + Expression::StringConstructor(StringConstructor { parts }) + } } } fn make_array_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 3 { + match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(ARRAY_FIELD.to_string()), 1 => Expression::Identifier(STRING_ARRAY_FIELD.to_string()), - _ => Expression::Identifier(MIXED_ARRAY_FIELD.to_string()), + 2 => Expression::Identifier(MIXED_ARRAY_FIELD.to_string()), + 3 => { + let mut elements = Vec::new(); + let size = (usize::arbitrary(&mut Gen::new(0)) % 3) + 1; // 1-3 elements + for _ in 0..size { + elements.push(make_numeric_expression()); + } + Expression::Array(elements) + }, + _ => { + let mut elements = Vec::new(); + let size = (usize::arbitrary(&mut Gen::new(0)) % 3) + 1; // 1-3 elements + for _ in 0..size { + elements.push(make_string_expression()); + } + Expression::Array(elements) + } } } fn make_date_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 3 { + match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(DATE_FIELD.to_string()), 1 => Expression::Identifier(TIMESTAMP_FIELD.to_string()), - _ => Expression::Identifier(TIME_FIELD.to_string()), + 2 => Expression::Identifier(TIME_FIELD.to_string()), + 3 => { + // Date function + Expression::DateFunction(DateFunction { + function: match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => DateFunctionName::CurrentDate, + 1 => DateFunctionName::CurrentTimestamp, + _ => DateFunctionName::CurrentTime, + } + }) + }, + _ => { + Expression::Extract(ExtractExpr { + extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 7 { + 0 => DatePart::Year, + 1 => DatePart::Month, + 2 => DatePart::Day, + 3 => DatePart::Hour, + 4 => DatePart::Minute, + 5 => DatePart::Second, + _ => DatePart::Millisecond, + }, + arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), + }) + } } } - #[allow(dead_code)] fn make_object_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 2 { + match usize::arbitrary(&mut Gen::new(0)) % 4 { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), - _ => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), + 1 => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), + 2 => { + let mut fields = BTreeMap::new(); + fields.insert("id".to_string(), make_numeric_expression()); + fields.insert("name".to_string(), make_string_expression()); + fields.insert("active".to_string(), make_boolean_expression()); + Expression::Document(fields) + }, + _ => { + let mut fields = BTreeMap::new(); + fields.insert("id".to_string(), make_numeric_expression()); + + let mut nested_fields = BTreeMap::new(); + nested_fields.insert("nested_id".to_string(), make_numeric_expression()); + nested_fields.insert("nested_name".to_string(), make_string_expression()); + + fields.insert("metadata".to_string(), Expression::Document(nested_fields)); + Expression::Document(fields) + } } } @@ -392,42 +521,24 @@ mod tests { Expression::Is(_) | Expression::Like(_) | Expression::Exists(_) => { None }, - _ => None, // Default for other expression types + Expression::Array(_) => None, + Expression::Document(_) => None, + Expression::Access(_) => None, + Expression::Subquery(_) => None, + Expression::SubqueryComparison(_) => None, + Expression::Subpath(_) => None, + Expression::StringConstructor(_) => None, + Expression::TypeAssertion(_) => None, + Expression::Trim(_) => None, + Expression::DateFunction(_) => None, + Expression::Extract(_) => None, + Expression::Identifier(_) => None, + Expression::Literal(_) => None, + Expression::Tuple(_) => None, } } - #[allow(dead_code)] - fn visit_expression(&mut self, node: Expression) -> Expression { - if self.target_type.is_none() { - return node.walk(self); - } - - let node_type = expression_type(&node); - let target_type = self.target_type.unwrap(); - - let node = if node_type != target_type && !are_types_compatible(node_type, target_type) { - match target_type { - Type::Boolean => make_boolean_expression(), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), - Type::String => make_string_expression(), - Type::Array => make_array_expression(), - Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), - Type::Document => make_object_expression(), - _ => node, // Keep the original node for other types - } - } else { - node - }; - - let child_target_type = self.determine_child_target_type(&node); - - let old_target_type = self.target_type; - self.target_type = child_target_type; - let new_node = node.walk(self); - self.target_type = old_target_type; - - new_node - } + } impl visitor::Visitor for SemanticVisitor { @@ -453,7 +564,6 @@ mod tests { } } - #[allow(dead_code)] fn visit_expression(&mut self, node: Expression) -> Expression { let mut expr = node.clone(); self.visit_expression_custom(&mut expr); @@ -472,23 +582,25 @@ mod tests { impl SemanticVisitor { fn visit_expression_custom(&mut self, node: &mut Expression) { - if self.target_type.is_none() { + if let Expression::Tuple(_) = node { + *node = make_numeric_expression(); return; } - let node_type = expression_type(node); - let target_type = self.target_type.unwrap(); - - if node_type != target_type && !are_types_compatible(node_type, target_type) { - *node = match target_type { - Type::Boolean => make_boolean_expression(), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), - Type::String => make_string_expression(), - Type::Array => make_array_expression(), - Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), - Type::Document => make_object_expression(), - _ => node.clone(), // Keep the original node for other types - }; + if let Some(target_type) = self.target_type { + let node_type = expression_type(node); + + if node_type != target_type && !are_types_compatible(node_type, target_type) { + *node = match target_type { + Type::Boolean => make_boolean_expression(), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), + Type::String => make_string_expression(), + Type::Array => make_array_expression(), + Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), + Type::Document => make_object_expression(), + _ => node.clone(), // Keep the original node for other types + }; + } } let child_target_type = self.determine_child_target_type(node); @@ -520,7 +632,60 @@ mod tests { self.visit_expression_custom(else_branch); } }, - _ => {} + Expression::Array(array) => { + for elem in array { + self.visit_expression_custom(elem); + } + }, + Expression::Document(doc) => { + for (_, value) in doc { + self.visit_expression_custom(value); + } + }, + Expression::Access(access) => { + self.visit_expression_custom(&mut access.expr); + }, + Expression::Subquery(subquery) => { + }, + Expression::Exists(exists) => { + }, + Expression::SubqueryComparison(comp) => { + }, + Expression::Subpath(subpath) => { + self.visit_expression_custom(&mut subpath.expr); + }, + Expression::Is(is_expr) => { + self.visit_expression_custom(&mut is_expr.expr); + }, + Expression::Like(like) => { + self.visit_expression_custom(&mut like.expr); + self.visit_expression_custom(&mut like.pattern); + }, + Expression::StringConstructor(str_constructor) => { + for part in &mut str_constructor.parts { + if let StringConstructorPart::Expression(expr) = part { + self.visit_expression_custom(expr); + } + } + }, + Expression::TypeAssertion(type_assertion) => { + self.visit_expression_custom(&mut type_assertion.expr); + }, + Expression::Between(between) => { + self.visit_expression_custom(&mut between.arg); + self.visit_expression_custom(&mut between.min); + self.visit_expression_custom(&mut between.max); + }, + Expression::Trim(trim) => { + self.visit_expression_custom(&mut trim.arg); + }, + Expression::DateFunction(date_func) => { + }, + Expression::Extract(extract) => { + self.visit_expression_custom(&mut extract.arg); + }, + Expression::Identifier(_) | Expression::Literal(_) => { + }, } self.target_type = old_target_type; From a5d2a6b5383fa607588af099a0e2cef62a1881cb Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 21:21:20 +0000 Subject: [PATCH 09/38] Fix type mismatches in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 72 ++++++++++++-------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index d48e12b39..bf402eca6 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -96,7 +96,7 @@ mod tests { 5 => Expression::Literal(Literal::Integer(-10)), 6 => Expression::Literal(Literal::Long(1000000)), 7 => Expression::Literal(Literal::Double(std::f64::consts::PI)), - 8 => { + _ => { let left = make_numeric_expression(); let right = make_numeric_expression(); let op = match usize::arbitrary(&mut Gen::new(0)) % 4 { @@ -177,13 +177,8 @@ mod tests { }) }, _ => { - // String constructor - let parts = vec![ - StringConstructorPart::String("Hello ".to_string()), - StringConstructorPart::Expression(Box::new(make_string_expression())), - StringConstructorPart::String("!".to_string()), - ]; - Expression::StringConstructor(StringConstructor { parts }) + // String constructor - simplified to use String directly + Expression::StringConstructor(format!("Hello {}!", STRING_FIELD)) } } } @@ -219,24 +214,29 @@ mod tests { 2 => Expression::Identifier(TIME_FIELD.to_string()), 3 => { // Date function - Expression::DateFunction(DateFunction { + Expression::DateFunction(DateFunctionExpr { function: match usize::arbitrary(&mut Gen::new(0)) % 3 { - 0 => DateFunctionName::CurrentDate, - 1 => DateFunctionName::CurrentTimestamp, - _ => DateFunctionName::CurrentTime, - } + 0 => DateFunctionName::Add, + 1 => DateFunctionName::Diff, + _ => DateFunctionName::Trunc, + }, + part: DatePart::Day, + args: vec![ + Box::new(Expression::Identifier(DATE_FIELD.to_string())), + Box::new(Expression::Literal(Literal::Integer(1))), + ] }) }, _ => { Expression::Extract(ExtractExpr { - extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 7 { + field: match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => DatePart::Year, - 1 => DatePart::Month, - 2 => DatePart::Day, - 3 => DatePart::Hour, - 4 => DatePart::Minute, - 5 => DatePart::Second, - _ => DatePart::Millisecond, + 1 => DatePart::Quarter, + 2 => DatePart::Month, + 3 => DatePart::Week, + 4 => DatePart::Day, + 5 => DatePart::Hour, + _ => DatePart::Minute, }, arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), }) @@ -249,21 +249,21 @@ mod tests { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), 1 => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), 2 => { - let mut fields = BTreeMap::new(); - fields.insert("id".to_string(), make_numeric_expression()); - fields.insert("name".to_string(), make_string_expression()); - fields.insert("active".to_string(), make_boolean_expression()); + let mut fields = Vec::new(); + fields.push(DocumentPair { key: "id".to_string(), value: make_numeric_expression() }); + fields.push(DocumentPair { key: "name".to_string(), value: make_string_expression() }); + fields.push(DocumentPair { key: "active".to_string(), value: make_boolean_expression() }); Expression::Document(fields) }, _ => { - let mut fields = BTreeMap::new(); - fields.insert("id".to_string(), make_numeric_expression()); + let mut fields = Vec::new(); + fields.push(DocumentPair { key: "id".to_string(), value: make_numeric_expression() }); - let mut nested_fields = BTreeMap::new(); - nested_fields.insert("nested_id".to_string(), make_numeric_expression()); - nested_fields.insert("nested_name".to_string(), make_string_expression()); + let mut nested_fields = Vec::new(); + nested_fields.push(DocumentPair { key: "nested_id".to_string(), value: make_numeric_expression() }); + nested_fields.push(DocumentPair { key: "nested_name".to_string(), value: make_string_expression() }); - fields.insert("metadata".to_string(), Expression::Document(nested_fields)); + fields.push(DocumentPair { key: "metadata".to_string(), value: Expression::Document(nested_fields) }); Expression::Document(fields) } } @@ -535,6 +535,7 @@ mod tests { Expression::Identifier(_) => None, Expression::Literal(_) => None, Expression::Tuple(_) => None, + Expression::Cast(_) => None, } } @@ -638,8 +639,8 @@ mod tests { } }, Expression::Document(doc) => { - for (_, value) in doc { - self.visit_expression_custom(value); + for pair in doc { + self.visit_expression_custom(&mut pair.value); } }, Expression::Access(access) => { @@ -661,12 +662,7 @@ mod tests { self.visit_expression_custom(&mut like.expr); self.visit_expression_custom(&mut like.pattern); }, - Expression::StringConstructor(str_constructor) => { - for part in &mut str_constructor.parts { - if let StringConstructorPart::Expression(expr) = part { - self.visit_expression_custom(expr); - } - } + Expression::StringConstructor(_) => { }, Expression::TypeAssertion(type_assertion) => { self.visit_expression_custom(&mut type_assertion.expr); From 04dac59e71fd8b92bd1050758e72ecb9465a33bf Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 21:22:43 +0000 Subject: [PATCH 10/38] Fix field names and add missing pattern matches in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index bf402eca6..33afc81a0 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -220,16 +220,16 @@ mod tests { 1 => DateFunctionName::Diff, _ => DateFunctionName::Trunc, }, - part: DatePart::Day, + date_part: DatePart::Day, args: vec![ - Box::new(Expression::Identifier(DATE_FIELD.to_string())), - Box::new(Expression::Literal(Literal::Integer(1))), + Expression::Identifier(DATE_FIELD.to_string()), + Expression::Literal(Literal::Integer(1)), ] }) }, _ => { Expression::Extract(ExtractExpr { - field: match usize::arbitrary(&mut Gen::new(0)) % 6 { + extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => DatePart::Year, 1 => DatePart::Quarter, 2 => DatePart::Month, @@ -682,6 +682,14 @@ mod tests { }, Expression::Identifier(_) | Expression::Literal(_) => { }, + Expression::Cast(cast) => { + self.visit_expression_custom(&mut cast.expr); + }, + Expression::Tuple(tuple) => { + for expr in tuple { + self.visit_expression_custom(expr); + } + }, } self.target_type = old_target_type; From c92ac866e267edcc9f610d2ea1cff15ed1d8a2c0 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 21:25:06 +0000 Subject: [PATCH 11/38] Fix clippy warnings in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 35 +++++++++++++++----------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 33afc81a0..aeb732b96 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -19,8 +19,11 @@ mod tests { const TEST_DB: &str = "test_db"; const ALL_TYPES_COLLECTION: &str = "all_types"; + #[allow(dead_code)] const RELATED_DATA_COLLECTION: &str = "related_data"; + #[allow(dead_code)] const NUMERIC_COLLECTION: &str = "numeric_data"; + #[allow(dead_code)] const ARRAY_COLLECTION: &str = "array_data"; const INT_FIELD: &str = "int_field"; // Int32 @@ -249,21 +252,23 @@ mod tests { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), 1 => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), 2 => { - let mut fields = Vec::new(); - fields.push(DocumentPair { key: "id".to_string(), value: make_numeric_expression() }); - fields.push(DocumentPair { key: "name".to_string(), value: make_string_expression() }); - fields.push(DocumentPair { key: "active".to_string(), value: make_boolean_expression() }); + let fields = vec![ + DocumentPair { key: "id".to_string(), value: make_numeric_expression() }, + DocumentPair { key: "name".to_string(), value: make_string_expression() }, + DocumentPair { key: "active".to_string(), value: make_boolean_expression() } + ]; Expression::Document(fields) }, _ => { - let mut fields = Vec::new(); - fields.push(DocumentPair { key: "id".to_string(), value: make_numeric_expression() }); - - let mut nested_fields = Vec::new(); - nested_fields.push(DocumentPair { key: "nested_id".to_string(), value: make_numeric_expression() }); - nested_fields.push(DocumentPair { key: "nested_name".to_string(), value: make_string_expression() }); + let nested_fields = vec![ + DocumentPair { key: "nested_id".to_string(), value: make_numeric_expression() }, + DocumentPair { key: "nested_name".to_string(), value: make_string_expression() } + ]; - fields.push(DocumentPair { key: "metadata".to_string(), value: Expression::Document(nested_fields) }); + let fields = vec![ + DocumentPair { key: "id".to_string(), value: make_numeric_expression() }, + DocumentPair { key: "metadata".to_string(), value: Expression::Document(nested_fields) } + ]; Expression::Document(fields) } } @@ -646,11 +651,11 @@ mod tests { Expression::Access(access) => { self.visit_expression_custom(&mut access.expr); }, - Expression::Subquery(subquery) => { + Expression::Subquery(_) => { }, - Expression::Exists(exists) => { + Expression::Exists(_) => { }, - Expression::SubqueryComparison(comp) => { + Expression::SubqueryComparison(_) => { }, Expression::Subpath(subpath) => { self.visit_expression_custom(&mut subpath.expr); @@ -675,7 +680,7 @@ mod tests { Expression::Trim(trim) => { self.visit_expression_custom(&mut trim.arg); }, - Expression::DateFunction(date_func) => { + Expression::DateFunction(_) => { }, Expression::Extract(extract) => { self.visit_expression_custom(&mut extract.arg); From 47738a2ed21932aa1fcf43eb50dd236d173ff344 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:09:37 +0000 Subject: [PATCH 12/38] Remove dead code functions ensure_numeric_expression and ensure_boolean_expression Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 626 +++++++++++++------------ 1 file changed, 320 insertions(+), 306 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index aeb732b96..d9d5d9b4c 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -1,11 +1,7 @@ #[cfg(test)] mod tests { use crate::{ - ast::{ - definitions::*, - definitions::visitor::Visitor, - pretty_print::PrettyPrint, - }, + ast::{definitions::visitor::Visitor, definitions::*, pretty_print::PrettyPrint}, build_catalog_from_catalog_schema, catalog::Catalog, json_schema::Schema as JsonSchema, @@ -26,67 +22,61 @@ mod tests { #[allow(dead_code)] const ARRAY_COLLECTION: &str = "array_data"; - const INT_FIELD: &str = "int_field"; // Int32 - const LONG_FIELD: &str = "long_field"; // Int64 - const DOUBLE_FIELD: &str = "double_field"; // Double - const DECIMAL_FIELD: &str = "decimal_field"; // Decimal128 - const NEGATIVE_INT_FIELD: &str = "neg_int_field"; // Int32 (negative) - const ZERO_INT_FIELD: &str = "zero_int_field"; // Int32 (zero) - - const STRING_FIELD: &str = "string_field"; // String + const INT_FIELD: &str = "int_field"; // Int32 + const LONG_FIELD: &str = "long_field"; // Int64 + const DOUBLE_FIELD: &str = "double_field"; // Double + const DECIMAL_FIELD: &str = "decimal_field"; // Decimal128 + const NEGATIVE_INT_FIELD: &str = "neg_int_field"; // Int32 (negative) + const ZERO_INT_FIELD: &str = "zero_int_field"; // Int32 (zero) + + const STRING_FIELD: &str = "string_field"; // String const EMPTY_STRING_FIELD: &str = "empty_string_field"; // String (empty) const DESCRIPTION_FIELD: &str = "description"; // String - - const BOOL_FIELD: &str = "bool_field"; // Boolean - const TRUE_FIELD: &str = "true_field"; // Boolean (true) - const FALSE_FIELD: &str = "false_field"; // Boolean (false) - - const DATE_FIELD: &str = "date_field"; // Date + + const BOOL_FIELD: &str = "bool_field"; // Boolean + const TRUE_FIELD: &str = "true_field"; // Boolean (true) + const FALSE_FIELD: &str = "false_field"; // Boolean (false) + + const DATE_FIELD: &str = "date_field"; // Date const TIMESTAMP_FIELD: &str = "timestamp_field"; // Timestamp - const TIME_FIELD: &str = "time_field"; // Time - - const OBJECT_FIELD: &str = "object_field"; // Document + const TIME_FIELD: &str = "time_field"; // Time + + const OBJECT_FIELD: &str = "object_field"; // Document const NESTED_OBJECT_FIELD: &str = "nested_object_field"; // Document with nested fields - const ARRAY_FIELD: &str = "array_field"; // Array of Int32 + const ARRAY_FIELD: &str = "array_field"; // Array of Int32 const STRING_ARRAY_FIELD: &str = "string_array_field"; // Array of String - const MIXED_ARRAY_FIELD: &str = "mixed_array_field"; // Array of mixed types - - const NULL_FIELD: &str = "null_field"; // Null + const MIXED_ARRAY_FIELD: &str = "mixed_array_field"; // Array of mixed types + + const NULL_FIELD: &str = "null_field"; // Null const OBJECTID_FIELD: &str = "objectid_field"; // ObjectId - const ID_FIELD: &str = "id"; // Int32 (for related_data) + const ID_FIELD: &str = "id"; // Int32 (for related_data) const ALL_TYPES_ID_FIELD: &str = "all_types_id"; // Int32 (foreign key) - + fn field_type(field_name: &str) -> Type { match field_name { INT_FIELD | NEGATIVE_INT_FIELD | ZERO_INT_FIELD => Type::Int32, LONG_FIELD => Type::Int64, DOUBLE_FIELD => Type::Double, DECIMAL_FIELD => Type::Decimal128, - + STRING_FIELD | EMPTY_STRING_FIELD | DESCRIPTION_FIELD => Type::String, - + BOOL_FIELD | TRUE_FIELD | FALSE_FIELD => Type::Boolean, - + DATE_FIELD => Type::Date, TIMESTAMP_FIELD => Type::Timestamp, TIME_FIELD => Type::Time, - + OBJECT_FIELD | NESTED_OBJECT_FIELD => Type::Document, ARRAY_FIELD | STRING_ARRAY_FIELD | MIXED_ARRAY_FIELD => Type::Array, - + NULL_FIELD => Type::Null, OBJECTID_FIELD => Type::ObjectId, ID_FIELD | ALL_TYPES_ID_FIELD => Type::Int32, - + _ => Type::String, } } - - - - - - // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { @@ -116,30 +106,14 @@ mod tests { } } } - + fn make_boolean_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 7 { 0 => Expression::Identifier(BOOL_FIELD.to_string()), 1 => Expression::Identifier(TRUE_FIELD.to_string()), 2 => Expression::Identifier(FALSE_FIELD.to_string()), 3 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), - 4 => { - let left = make_numeric_expression(); - let right = make_numeric_expression(); - let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { - 0 => ComparisonOp::Eq, - 1 => ComparisonOp::Neq, - 2 => ComparisonOp::Lt, - 3 => ComparisonOp::Lte, - 4 => ComparisonOp::Gt, - _ => ComparisonOp::Gte, - }; - Expression::Binary(BinaryExpr { - left: Box::new(left), - op: BinaryOp::Comparison(comp_op), - right: Box::new(right), - }) - }, + 4 => make_comparison_expression(), 5 => { let left = make_boolean_expression(); let right = make_boolean_expression(); @@ -153,7 +127,7 @@ mod tests { op, right: Box::new(right), }) - }, + } _ => { let expr = make_boolean_expression(); Expression::Unary(UnaryExpr { @@ -163,7 +137,7 @@ mod tests { } } } - + fn make_string_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(STRING_FIELD.to_string()), @@ -178,14 +152,14 @@ mod tests { op: BinaryOp::Concat, right: Box::new(right), }) - }, + } _ => { // String constructor - simplified to use String directly Expression::StringConstructor(format!("Hello {}!", STRING_FIELD)) } } } - + fn make_array_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(ARRAY_FIELD.to_string()), @@ -198,7 +172,7 @@ mod tests { elements.push(make_numeric_expression()); } Expression::Array(elements) - }, + } _ => { let mut elements = Vec::new(); let size = (usize::arbitrary(&mut Gen::new(0)) % 3) + 1; // 1-3 elements @@ -209,7 +183,7 @@ mod tests { } } } - + fn make_date_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(DATE_FIELD.to_string()), @@ -227,58 +201,76 @@ mod tests { args: vec![ Expression::Identifier(DATE_FIELD.to_string()), Expression::Literal(Literal::Integer(1)), - ] - }) - }, - _ => { - Expression::Extract(ExtractExpr { - extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 6 { - 0 => DatePart::Year, - 1 => DatePart::Quarter, - 2 => DatePart::Month, - 3 => DatePart::Week, - 4 => DatePart::Day, - 5 => DatePart::Hour, - _ => DatePart::Minute, - }, - arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), + ], }) } + _ => Expression::Extract(ExtractExpr { + extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => DatePart::Year, + 1 => DatePart::Quarter, + 2 => DatePart::Month, + 3 => DatePart::Week, + 4 => DatePart::Day, + 5 => DatePart::Hour, + _ => DatePart::Minute, + }, + arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), + }), } } - + fn make_object_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 4 { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), 1 => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), 2 => { let fields = vec![ - DocumentPair { key: "id".to_string(), value: make_numeric_expression() }, - DocumentPair { key: "name".to_string(), value: make_string_expression() }, - DocumentPair { key: "active".to_string(), value: make_boolean_expression() } + DocumentPair { + key: "id".to_string(), + value: make_numeric_expression(), + }, + DocumentPair { + key: "name".to_string(), + value: make_string_expression(), + }, + DocumentPair { + key: "active".to_string(), + value: make_boolean_expression(), + }, ]; Expression::Document(fields) - }, + } _ => { let nested_fields = vec![ - DocumentPair { key: "nested_id".to_string(), value: make_numeric_expression() }, - DocumentPair { key: "nested_name".to_string(), value: make_string_expression() } + DocumentPair { + key: "nested_id".to_string(), + value: make_numeric_expression(), + }, + DocumentPair { + key: "nested_name".to_string(), + value: make_string_expression(), + }, ]; - + let fields = vec![ - DocumentPair { key: "id".to_string(), value: make_numeric_expression() }, - DocumentPair { key: "metadata".to_string(), value: Expression::Document(nested_fields) } + DocumentPair { + key: "id".to_string(), + value: make_numeric_expression(), + }, + DocumentPair { + key: "metadata".to_string(), + value: Expression::Document(nested_fields), + }, ]; Expression::Document(fields) } } } - - #[allow(dead_code)] + fn make_comparison_expression() -> Expression { let left = make_numeric_expression(); let right = make_numeric_expression(); - + let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => ComparisonOp::Eq, 1 => ComparisonOp::Neq, @@ -287,17 +279,14 @@ mod tests { 4 => ComparisonOp::Gt, _ => ComparisonOp::Gte, }; - + Expression::Binary(BinaryExpr { left: Box::new(left), op: BinaryOp::Comparison(comp_op), right: Box::new(right), }) } - - - fn expression_type(expr: &Expression) -> Type { match expr { Expression::Identifier(name) => field_type(name), @@ -308,72 +297,83 @@ mod tests { Literal::Boolean(_) => Type::Boolean, Literal::Null => Type::Null, }, - Expression::Binary(binary) => { - match binary.op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { - let left_type = expression_type(&binary.left); - let right_type = expression_type(&binary.right); - - if left_type == Type::Decimal128 || right_type == Type::Decimal128 { - Type::Decimal128 - } else if left_type == Type::Double || right_type == Type::Double { - Type::Double - } else if left_type == Type::Int64 || right_type == Type::Int64 { - Type::Int64 - } else { - Type::Int32 - } - }, - BinaryOp::And | BinaryOp::Or => Type::Boolean, - BinaryOp::Comparison(_) => Type::Boolean, - BinaryOp::In | BinaryOp::NotIn => Type::Boolean, - BinaryOp::Concat => Type::String, + Expression::Binary(binary) => match binary.op { + BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { + let left_type = expression_type(&binary.left); + let right_type = expression_type(&binary.right); + + if left_type == Type::Decimal128 || right_type == Type::Decimal128 { + Type::Decimal128 + } else if left_type == Type::Double || right_type == Type::Double { + Type::Double + } else if left_type == Type::Int64 || right_type == Type::Int64 { + Type::Int64 + } else { + Type::Int32 + } } + BinaryOp::And | BinaryOp::Or => Type::Boolean, + BinaryOp::Comparison(_) => Type::Boolean, + BinaryOp::In | BinaryOp::NotIn => Type::Boolean, + BinaryOp::Concat => Type::String, }, - Expression::Unary(unary) => { - match unary.op { - UnaryOp::Not => Type::Boolean, - UnaryOp::Neg | UnaryOp::Pos => expression_type(&unary.expr), - } + Expression::Unary(unary) => match unary.op { + UnaryOp::Not => Type::Boolean, + UnaryOp::Neg | UnaryOp::Pos => expression_type(&unary.expr), }, Expression::Cast(cast) => cast.to, Expression::Between(_) => Type::Boolean, - Expression::Case(case) => case.else_branch.as_ref() - .map_or_else( - || case.when_branch.first().map_or(Type::Null, |wb| expression_type(&wb.then)), - |else_expr| expression_type(else_expr) - ), + Expression::Case(case) => case.else_branch.as_ref().map_or_else( + || { + case.when_branch + .first() + .map_or(Type::Null, |wb| expression_type(&wb.then)) + }, + |else_expr| expression_type(else_expr), + ), Expression::Function(func) => match func.function { // Aggregation functions - FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => Type::Double, + FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => { + Type::Double + } FunctionName::Count => Type::Int64, FunctionName::AddToSet | FunctionName::AddToArray => Type::Array, FunctionName::First | FunctionName::Last => Type::String, // Depends on the argument type - + // String functions FunctionName::Substring => Type::String, FunctionName::Lower | FunctionName::Upper => Type::String, FunctionName::LTrim | FunctionName::RTrim => Type::String, FunctionName::Replace => Type::String, - + // Date functions - FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => Type::Date, + FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => { + Type::Date + } FunctionName::CurrentTimestamp => Type::Date, FunctionName::Year | FunctionName::Month | FunctionName::Week => Type::Int32, - FunctionName::DayOfWeek | FunctionName::DayOfMonth | FunctionName::DayOfYear => Type::Int32, - FunctionName::Hour | FunctionName::Minute | FunctionName::Second | FunctionName::Millisecond => Type::Int32, - + FunctionName::DayOfWeek | FunctionName::DayOfMonth | FunctionName::DayOfYear => { + Type::Int32 + } + FunctionName::Hour + | FunctionName::Minute + | FunctionName::Second + | FunctionName::Millisecond => Type::Int32, + // Numeric functions - FunctionName::Abs | FunctionName::Ceil | FunctionName::Floor | FunctionName::Round => Type::Double, + FunctionName::Abs + | FunctionName::Ceil + | FunctionName::Floor + | FunctionName::Round => Type::Double, FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Type::Double, FunctionName::Pow => Type::Double, FunctionName::Mod => Type::Int32, - + // Other functions FunctionName::Coalesce => Type::String, // Depends on arguments FunctionName::NullIf => Type::String, // Depends on arguments FunctionName::Size => Type::Int32, - + _ => Type::String, // Default for other functions }, Expression::Array(_) => Type::Array, @@ -387,7 +387,7 @@ mod tests { } else { Type::String // Default case } - }, + } Expression::Subquery(_) => Type::Array, Expression::Exists(_) => Type::Boolean, Expression::SubqueryComparison(_) => Type::Boolean, @@ -402,30 +402,63 @@ mod tests { Expression::Extract(_) => Type::Int32, } } - + fn are_types_compatible(type1: Type, type2: Type) -> bool { if type1 == type2 { return true; } - - let is_type1_numeric = matches!(type1, Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128); - let is_type2_numeric = matches!(type2, Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128); - + + let is_type1_numeric = matches!( + type1, + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 + ); + let is_type2_numeric = matches!( + type2, + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 + ); + if is_type1_numeric && is_type2_numeric { return true; } - - + false } - + struct SemanticVisitor { target_type: Option, + select_fields: Vec, } - + impl SemanticVisitor { fn visit_select_query(&mut self, node: SelectQuery) -> SelectQuery { + self.select_fields.clear(); + let select_clause = node.select_clause.walk(self); + + match &select_clause.body { + SelectBody::Standard(exprs) => { + for expr in exprs { + match expr { + SelectExpression::Expression(OptionallyAliasedExpr::Aliased(aliased)) => { + self.select_fields.push(aliased.alias.clone()); + }, + SelectExpression::Expression(OptionallyAliasedExpr::Unaliased(Expression::Identifier(ident))) => { + self.select_fields.push(ident.clone()); + }, + _ => { + self.select_fields.push(INT_FIELD.to_string()); + } + } + } + }, + SelectBody::Values(_) => { + self.select_fields.push(INT_FIELD.to_string()); + } + } + + if self.select_fields.is_empty() { + self.select_fields.push(INT_FIELD.to_string()); + } let from_clause = Some(Datasource::Collection(CollectionSource { database: Some(TEST_DB.to_string()), @@ -461,71 +494,65 @@ mod tests { offset, } } - + fn determine_child_target_type(&self, node: &Expression) -> Option { match node { - Expression::Binary(binary) => { - match binary.op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { - Some(Type::Double) - }, - BinaryOp::And | BinaryOp::Or => { - Some(Type::Boolean) - }, - BinaryOp::Comparison(_) => { - None - }, - BinaryOp::In | BinaryOp::NotIn => { - None - }, - BinaryOp::Concat => { - Some(Type::String) - }, + Expression::Binary(binary) => match binary.op { + BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { + Some(Type::Double) } + BinaryOp::And | BinaryOp::Or => Some(Type::Boolean), + BinaryOp::Comparison(_) => None, + BinaryOp::In | BinaryOp::NotIn => None, + BinaryOp::Concat => Some(Type::String), }, - Expression::Unary(unary) => { - match unary.op { - UnaryOp::Not => Some(Type::Boolean), - UnaryOp::Neg | UnaryOp::Pos => Some(Type::Double), - } + Expression::Unary(unary) => match unary.op { + UnaryOp::Not => Some(Type::Boolean), + UnaryOp::Neg | UnaryOp::Pos => Some(Type::Double), }, Expression::Function(func) => { match func.function { // Aggregation functions - FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => Some(Type::Double), + FunctionName::Sum + | FunctionName::Avg + | FunctionName::Min + | FunctionName::Max => Some(Type::Double), FunctionName::Count => None, // Count can take any type FunctionName::AddToSet | FunctionName::AddToArray => None, // Can add any type to arrays - + // String functions - FunctionName::Substring | FunctionName::Lower | FunctionName::Upper => Some(Type::String), + FunctionName::Substring | FunctionName::Lower | FunctionName::Upper => { + Some(Type::String) + } FunctionName::LTrim | FunctionName::RTrim => Some(Type::String), FunctionName::Replace => Some(Type::String), - + // Date functions - FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => Some(Type::Date), + FunctionName::DateAdd + | FunctionName::DateDiff + | FunctionName::DateTrunc => Some(Type::Date), FunctionName::CurrentTimestamp => Some(Type::Date), - + // Numeric functions - FunctionName::Abs | FunctionName::Ceil | FunctionName::Floor | FunctionName::Round => Some(Type::Double), - FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Some(Type::Double), + FunctionName::Abs + | FunctionName::Ceil + | FunctionName::Floor + | FunctionName::Round => Some(Type::Double), + FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => { + Some(Type::Double) + } FunctionName::Pow => Some(Type::Double), - + // Other functions FunctionName::Coalesce | FunctionName::NullIf => None, FunctionName::Size => None, - + _ => None, // Default for other functions } - }, - Expression::Case(_case) => { - Some(Type::Boolean) - }, - Expression::Between(_) => { - None - }, - Expression::Is(_) | Expression::Like(_) | Expression::Exists(_) => { - None - }, + } + Expression::Case(_case) => Some(Type::Boolean), + Expression::Between(_) => None, + Expression::Is(_) | Expression::Like(_) | Expression::Exists(_) => None, Expression::Array(_) => None, Expression::Document(_) => None, Expression::Access(_) => None, @@ -543,63 +570,66 @@ mod tests { Expression::Cast(_) => None, } } - - } - + impl visitor::Visitor for SemanticVisitor { fn visit_query(&mut self, node: Query) -> Query { match node { - Query::Select(select_query) => { - Query::Select(self.visit_select_query(select_query)) - }, + Query::Select(select_query) => Query::Select(self.visit_select_query(select_query)), Query::Set(set_query) => { let old_target_type = self.target_type; self.target_type = None; // Clear target_type when walking set operations let walked = Query::Set(set_query.walk(self)); self.target_type = old_target_type; walked - }, + } Query::With(with_query) => { let old_target_type = self.target_type; self.target_type = None; // Clear target_type when walking with queries let walked = Query::With(with_query.walk(self)); self.target_type = old_target_type; walked - }, + } } } - + fn visit_expression(&mut self, node: Expression) -> Expression { let mut expr = node.clone(); self.visit_expression_custom(&mut expr); expr } - + fn visit_sort_key(&mut self, node: SortKey) -> SortKey { match node { SortKey::Positional(_) => { - SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())) - }, + if !self.select_fields.is_empty() { + let idx = usize::arbitrary(&mut Gen::new(0)) % self.select_fields.len(); + SortKey::Simple(Expression::Identifier(self.select_fields[idx].clone())) + } else { + SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())) + } + } _ => node.walk(self), } } } - + impl SemanticVisitor { fn visit_expression_custom(&mut self, node: &mut Expression) { if let Expression::Tuple(_) = node { *node = make_numeric_expression(); return; } - + if let Some(target_type) = self.target_type { let node_type = expression_type(node); - + if node_type != target_type && !are_types_compatible(node_type, target_type) { *node = match target_type { Type::Boolean => make_boolean_expression(), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => make_numeric_expression(), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + make_numeric_expression() + } Type::String => make_string_expression(), Type::Array => make_array_expression(), Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), @@ -608,27 +638,27 @@ mod tests { }; } } - + let child_target_type = self.determine_child_target_type(node); - + let old_target_type = self.target_type; self.target_type = child_target_type; - + match node { Expression::Binary(bin) => { self.visit_expression_custom(&mut bin.left); self.visit_expression_custom(&mut bin.right); - }, + } Expression::Unary(un) => { self.visit_expression_custom(&mut un.expr); - }, + } Expression::Function(func) => { if let FunctionArguments::Args(args) = &mut func.args { for arg in args { self.visit_expression_custom(arg); } } - }, + } Expression::Case(case) => { for branch in &mut case.when_branch { self.visit_expression_custom(&mut branch.when); @@ -637,112 +667,81 @@ mod tests { if let Some(else_branch) = &mut case.else_branch { self.visit_expression_custom(else_branch); } - }, + } Expression::Array(array) => { for elem in array { self.visit_expression_custom(elem); } - }, + } Expression::Document(doc) => { for pair in doc { self.visit_expression_custom(&mut pair.value); } - }, + } Expression::Access(access) => { self.visit_expression_custom(&mut access.expr); - }, - Expression::Subquery(_) => { - }, - Expression::Exists(_) => { - }, - Expression::SubqueryComparison(_) => { - }, + } + Expression::Subquery(_) => {} + Expression::Exists(_) => {} + Expression::SubqueryComparison(_) => {} Expression::Subpath(subpath) => { self.visit_expression_custom(&mut subpath.expr); - }, + } Expression::Is(is_expr) => { self.visit_expression_custom(&mut is_expr.expr); - }, + } Expression::Like(like) => { self.visit_expression_custom(&mut like.expr); self.visit_expression_custom(&mut like.pattern); - }, - Expression::StringConstructor(_) => { - }, + } + Expression::StringConstructor(_) => {} Expression::TypeAssertion(type_assertion) => { self.visit_expression_custom(&mut type_assertion.expr); - }, + } Expression::Between(between) => { self.visit_expression_custom(&mut between.arg); self.visit_expression_custom(&mut between.min); self.visit_expression_custom(&mut between.max); - }, + } Expression::Trim(trim) => { self.visit_expression_custom(&mut trim.arg); - }, - Expression::DateFunction(_) => { - }, + } + Expression::DateFunction(_) => {} Expression::Extract(extract) => { self.visit_expression_custom(&mut extract.arg); - }, - Expression::Identifier(_) | Expression::Literal(_) => { - }, + } + Expression::Identifier(_) | Expression::Literal(_) => {} Expression::Cast(cast) => { self.visit_expression_custom(&mut cast.expr); - }, + } Expression::Tuple(tuple) => { for expr in tuple { self.visit_expression_custom(expr); } - }, + } } - + self.target_type = old_target_type; } } - - #[allow(dead_code)] - fn ensure_numeric_expression(expr: &mut Expression) { - if !matches!(expression_type(expr), Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128) { - *expr = match usize::arbitrary(&mut Gen::new(0)) % 4 { - 0 => Expression::Identifier(INT_FIELD.to_string()), - 1 => Expression::Identifier(LONG_FIELD.to_string()), - 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), - _ => Expression::Literal(Literal::Integer(42)), - }; - } - } - - #[allow(dead_code)] - fn ensure_boolean_expression(expr: &mut Expression) { - if expression_type(expr) != Type::Boolean { - *expr = match usize::arbitrary(&mut Gen::new(0)) % 3 { - 0 => Expression::Identifier(BOOL_FIELD.to_string()), - 1 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), - _ => { - Expression::Binary(BinaryExpr { - left: Box::new(Expression::Identifier(INT_FIELD.to_string())), - op: BinaryOp::Comparison(ComparisonOp::Eq), - right: Box::new(Expression::Literal(Literal::Integer(42))), - }) - } - }; - } - } + + fn contains_invalid_select_query(query: &Query) -> bool { match query { Query::Select(select) => { - select.from_clause.is_none() && matches!(select.select_clause.body, SelectBody::Values(_)) - }, + select.from_clause.is_none() + && matches!(select.select_clause.body, SelectBody::Values(_)) + } Query::Set(set) => { - contains_invalid_select_query(&set.left) || contains_invalid_select_query(&set.right) - }, + contains_invalid_select_query(&set.left) + || contains_invalid_select_query(&set.right) + } Query::With(with) => { if contains_invalid_select_query(&with.body) { return true; } - + for named_query in &with.queries { if contains_invalid_select_query(&named_query.query) { return true; @@ -759,26 +758,26 @@ mod tests { if contains_invalid_select_query(&query) { return TestResult::discard(); } - - let mut v = SemanticVisitor { target_type: None }; + + let mut v = SemanticVisitor { target_type: None, select_fields: Vec::new() }; query = v.visit_query(query); - + let sql = match query.pretty_print() { Err(_) => return TestResult::discard(), Ok(sql) => sql, }; - + let sql_options = SqlOptions { schema_checking_mode: SchemaCheckingMode::Strict, exclude_namespaces: ExcludeNamespacesOption::IncludeNamespaces, allow_order_by_missing_columns: false, }; - + let result = translate_sql(TEST_DB, &sql, &TEST_CATALOG, sql_options); - + TestResult::from_bool(result.is_ok()) } - + quickcheck::QuickCheck::new() .gen(Gen::new(0)) .quickcheck(property as fn(Query) -> TestResult); @@ -805,39 +804,41 @@ mod tests { return; } }; - + fn property(mut query: Query) -> TestResult { if contains_invalid_select_query(&query) { return TestResult::discard(); } - - let mut v = SemanticVisitor { target_type: None }; + + let mut v = SemanticVisitor { target_type: None, select_fields: Vec::new() }; query = v.visit_query(query); - + let client = match get_mongodb_client() { Some(client) => client, None => return TestResult::discard(), // Skip if no MongoDB connection }; - + let sql = match query.pretty_print() { Err(_) => return TestResult::discard(), Ok(sql) => sql, }; - + let sql_options = SqlOptions { schema_checking_mode: SchemaCheckingMode::Strict, exclude_namespaces: ExcludeNamespacesOption::IncludeNamespaces, allow_order_by_missing_columns: false, }; - + let translation = match translate_sql(TEST_DB, &sql, &TEST_CATALOG, sql_options) { Ok(t) => t, Err(_) => return TestResult::discard(), // Skip if translation fails }; - + let target_db = translation.target_db; - let target_collection = translation.target_collection.unwrap_or_else(|| "unknown".to_string()); - + let target_collection = translation + .target_collection + .unwrap_or_else(|| "unknown".to_string()); + let pipeline_docs = match translation.pipeline { bson::Bson::Array(array) => { let mut docs = Vec::new(); @@ -849,19 +850,19 @@ mod tests { } } docs - }, + } _ => return TestResult::discard(), // Not a valid pipeline }; - + let result = client .database(&target_db) .collection::(&target_collection) .aggregate(pipeline_docs) .run(); - + TestResult::from_bool(result.is_ok()) } - + quickcheck::QuickCheck::new() .gen(Gen::new(0)) .quickcheck(property as fn(Query) -> TestResult); @@ -869,12 +870,14 @@ mod tests { lazy_static! { static ref TEST_CATALOG: Catalog = { - let mut catalog_schema: BTreeMap> = BTreeMap::new(); + let mut catalog_schema: BTreeMap> = + BTreeMap::new(); let mut db_schema: BTreeMap = BTreeMap::new(); - + db_schema.insert( "all_types".to_string(), - serde_json::from_str(r#"{ + serde_json::from_str( + r#"{ "bsonType": "object", "properties": { "int_field": { "bsonType": "int" }, @@ -925,12 +928,15 @@ mod tests { "objectid_field": { "bsonType": "objectId" } }, "additionalProperties": false - }"#).unwrap(), + }"#, + ) + .unwrap(), ); - + db_schema.insert( "related_data".to_string(), - serde_json::from_str(r#"{ + serde_json::from_str( + r#"{ "bsonType": "object", "properties": { "id": { "bsonType": "int" }, @@ -938,12 +944,15 @@ mod tests { "description": { "bsonType": "string" } }, "additionalProperties": false - }"#).unwrap(), + }"#, + ) + .unwrap(), ); - + db_schema.insert( "numeric_data".to_string(), - serde_json::from_str(r#"{ + serde_json::from_str( + r#"{ "bsonType": "object", "properties": { "id": { "bsonType": "int" }, @@ -954,12 +963,15 @@ mod tests { "calculated_field": { "bsonType": "double" } }, "additionalProperties": false - }"#).unwrap(), + }"#, + ) + .unwrap(), ); - + db_schema.insert( "array_data".to_string(), - serde_json::from_str(r#"{ + serde_json::from_str( + r#"{ "bsonType": "object", "properties": { "id": { "bsonType": "int" }, @@ -990,9 +1002,11 @@ mod tests { } }, "additionalProperties": false - }"#).unwrap(), + }"#, + ) + .unwrap(), ); - + catalog_schema.insert("test_db".to_string(), db_schema); build_catalog_from_catalog_schema(catalog_schema).unwrap() }; From 3fb736899d895f088211e4917ceebb7c62616d3a Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:20:52 +0000 Subject: [PATCH 13/38] Fix duplicate case in make_array_expression function Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index d9d5d9b4c..228ddada2 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -175,9 +175,13 @@ mod tests { } _ => { let mut elements = Vec::new(); - let size = (usize::arbitrary(&mut Gen::new(0)) % 3) + 1; // 1-3 elements - for _ in 0..size { - elements.push(make_string_expression()); + let size = (usize::arbitrary(&mut Gen::new(0)) % 4) + 2; // 2-5 elements + for i in 0..size { + match i % 3 { + 0 => elements.push(make_numeric_expression()), + 1 => elements.push(make_string_expression()), + _ => elements.push(make_boolean_expression()), + } } Expression::Array(elements) } From ef80a1103aad0cc6460ef564e4d8f6411ea81930 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 18:30:37 +0000 Subject: [PATCH 14/38] Fix formatting issues in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 32 ++++++++++++++++---------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 228ddada2..275a2335d 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -436,30 +436,34 @@ mod tests { impl SemanticVisitor { fn visit_select_query(&mut self, node: SelectQuery) -> SelectQuery { self.select_fields.clear(); - + let select_clause = node.select_clause.walk(self); - + match &select_clause.body { SelectBody::Standard(exprs) => { for expr in exprs { match expr { - SelectExpression::Expression(OptionallyAliasedExpr::Aliased(aliased)) => { + SelectExpression::Expression(OptionallyAliasedExpr::Aliased( + aliased, + )) => { self.select_fields.push(aliased.alias.clone()); - }, - SelectExpression::Expression(OptionallyAliasedExpr::Unaliased(Expression::Identifier(ident))) => { + } + SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( + Expression::Identifier(ident), + )) => { self.select_fields.push(ident.clone()); - }, + } _ => { self.select_fields.push(INT_FIELD.to_string()); } } } - }, + } SelectBody::Values(_) => { self.select_fields.push(INT_FIELD.to_string()); } } - + if self.select_fields.is_empty() { self.select_fields.push(INT_FIELD.to_string()); } @@ -729,8 +733,6 @@ mod tests { } } - - fn contains_invalid_select_query(query: &Query) -> bool { match query { Query::Select(select) => { @@ -763,7 +765,10 @@ mod tests { return TestResult::discard(); } - let mut v = SemanticVisitor { target_type: None, select_fields: Vec::new() }; + let mut v = SemanticVisitor { + target_type: None, + select_fields: Vec::new(), + }; query = v.visit_query(query); let sql = match query.pretty_print() { @@ -814,7 +819,10 @@ mod tests { return TestResult::discard(); } - let mut v = SemanticVisitor { target_type: None, select_fields: Vec::new() }; + let mut v = SemanticVisitor { + target_type: None, + select_fields: Vec::new(), + }; query = v.visit_query(query); let client = match get_mongodb_client() { From fa02fb1d73cab02ec6f14bc0b03fa9fa98342e70 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 19:02:43 +0000 Subject: [PATCH 15/38] Fix DatePart::Quarter issue and update evergreen.yml with MongoDB orchestration Co-Authored-By: matthew.chiaravalloti@mongodb.com --- evergreen.yml | 4 ++ mongosql/src/ast/semantic_fuzz_test.rs | 58 ++++++++++++++------------ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/evergreen.yml b/evergreen.yml index 97e0196a7..1ee29da4a 100644 --- a/evergreen.yml +++ b/evergreen.yml @@ -966,6 +966,10 @@ tasks: - name: test-rust-fuzz tags: ["fuzz"] commands: + - func: "bootstrap mongo-orchestration" + vars: + MONGODB_VERSION: 6.0 + TOPOLOGY: server - func: "install rust toolchain" - func: "run rust tests" retry_on_failure: true diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 275a2335d..b16e34582 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -209,13 +209,12 @@ mod tests { }) } _ => Expression::Extract(ExtractExpr { - extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 6 { + extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => DatePart::Year, - 1 => DatePart::Quarter, - 2 => DatePart::Month, - 3 => DatePart::Week, - 4 => DatePart::Day, - 5 => DatePart::Hour, + 1 => DatePart::Month, + 2 => DatePart::Week, + 3 => DatePart::Day, + 4 => DatePart::Hour, _ => DatePart::Minute, }, arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), @@ -628,10 +627,17 @@ mod tests { *node = make_numeric_expression(); return; } - + + // if let Expression::Extract(extract) = node { + // if matches!(extract.extract_spec, DatePart::Quarter) { + // *node = make_date_expression(); + // return; + // } + // } + if let Some(target_type) = self.target_type { let node_type = expression_type(node); - + if node_type != target_type && !are_types_compatible(node_type, target_type) { *node = match target_type { Type::Boolean => make_boolean_expression(), @@ -644,14 +650,15 @@ mod tests { Type::Document => make_object_expression(), _ => node.clone(), // Keep the original node for other types }; + + return; } } - + let child_target_type = self.determine_child_target_type(node); - let old_target_type = self.target_type; self.target_type = child_target_type; - + match node { Expression::Binary(bin) => { self.visit_expression_custom(&mut bin.left); @@ -722,13 +729,10 @@ mod tests { Expression::Cast(cast) => { self.visit_expression_custom(&mut cast.expr); } - Expression::Tuple(tuple) => { - for expr in tuple { - self.visit_expression_custom(expr); - } + Expression::Tuple(_) => { } } - + self.target_type = old_target_type; } } @@ -906,13 +910,13 @@ mod tests { "date_field": { "bsonType": "date" }, "timestamp_field": { "bsonType": "timestamp" }, "time_field": { "bsonType": "timestamp" }, - "object_field": { + "object_field": { "bsonType": "object", "properties": { "nested_field": { "bsonType": "string" } } }, - "nested_object_field": { + "nested_object_field": { "bsonType": "object", "properties": { "nested_int": { "bsonType": "int" }, @@ -925,15 +929,15 @@ mod tests { } } }, - "array_field": { + "array_field": { "bsonType": "array", "items": { "bsonType": "int" } }, - "string_array_field": { + "string_array_field": { "bsonType": "array", "items": { "bsonType": "string" } }, - "mixed_array_field": { + "mixed_array_field": { "bsonType": "array" }, "null_field": { "bsonType": "null" }, @@ -987,17 +991,17 @@ mod tests { "bsonType": "object", "properties": { "id": { "bsonType": "int" }, - "int_array": { + "int_array": { "bsonType": "array", "items": { "bsonType": "int" } }, - "string_array": { + "string_array": { "bsonType": "array", "items": { "bsonType": "string" } }, - "object_array": { + "object_array": { "bsonType": "array", - "items": { + "items": { "bsonType": "object", "properties": { "key": { "bsonType": "string" }, @@ -1005,9 +1009,9 @@ mod tests { } } }, - "nested_array": { + "nested_array": { "bsonType": "array", - "items": { + "items": { "bsonType": "array", "items": { "bsonType": "int" } } From 4684e96ad4932c31943a24c2318984b916fb401f Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 19:11:56 +0000 Subject: [PATCH 16/38] Fix formatting issues in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index b16e34582..1d7ab5be6 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -627,17 +627,17 @@ mod tests { *node = make_numeric_expression(); return; } - + // if let Expression::Extract(extract) = node { // if matches!(extract.extract_spec, DatePart::Quarter) { // *node = make_date_expression(); // return; // } // } - + if let Some(target_type) = self.target_type { let node_type = expression_type(node); - + if node_type != target_type && !are_types_compatible(node_type, target_type) { *node = match target_type { Type::Boolean => make_boolean_expression(), @@ -650,15 +650,15 @@ mod tests { Type::Document => make_object_expression(), _ => node.clone(), // Keep the original node for other types }; - + return; } } - + let child_target_type = self.determine_child_target_type(node); let old_target_type = self.target_type; self.target_type = child_target_type; - + match node { Expression::Binary(bin) => { self.visit_expression_custom(&mut bin.left); @@ -729,10 +729,9 @@ mod tests { Expression::Cast(cast) => { self.visit_expression_custom(&mut cast.expr); } - Expression::Tuple(_) => { - } + Expression::Tuple(_) => {} } - + self.target_type = old_target_type; } } From 9bdc4eb9e576fc156905aa1a3a5e431c8b62cb4d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 19:35:41 +0000 Subject: [PATCH 17/38] Remove test-rust-fuzz task from windows, macos, and macos-arm64 buildvariants Co-Authored-By: matthew.chiaravalloti@mongodb.com --- evergreen.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/evergreen.yml b/evergreen.yml index 1ee29da4a..6daa2607b 100644 --- a/evergreen.yml +++ b/evergreen.yml @@ -1427,7 +1427,6 @@ buildvariants: - name: compile - name: test-pipeline-generate - name: test-rust - - name: test-rust-fuzz - name: test-go - name: macos-arm64 @@ -1439,7 +1438,6 @@ buildvariants: - name: compile - name: test-pipeline-generate - name: test-rust - - name: test-rust-fuzz - name: test-go - name: amazon2-arm64 @@ -1463,7 +1461,6 @@ buildvariants: - name: compile - name: test-pipeline-generate - name: test-rust - - name: test-rust-fuzz - name: test-go - name: benchmark From fd7c9b8abdf2b2b2e10f7f931112949f096548e5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 15 May 2025 20:20:11 +0000 Subject: [PATCH 18/38] Improve semantic validity of fuzz tests by fixing GroupByClause handling and SubpathExpr types Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 182 +++++++++++++++++++++---- 1 file changed, 159 insertions(+), 23 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 1d7ab5be6..8ee480d0d 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -78,6 +78,36 @@ mod tests { } } + fn get_field_for_type(target_type: Type) -> String { + match target_type { + Type::Int32 => INT_FIELD.to_string(), + Type::Int64 => LONG_FIELD.to_string(), + Type::Double => DOUBLE_FIELD.to_string(), + Type::Decimal128 => DECIMAL_FIELD.to_string(), + Type::String => STRING_FIELD.to_string(), + Type::Boolean => BOOL_FIELD.to_string(), + Type::Date => DATE_FIELD.to_string(), + Type::Timestamp => TIMESTAMP_FIELD.to_string(), + Type::Document => OBJECT_FIELD.to_string(), + Type::Array => ARRAY_FIELD.to_string(), + _ => INT_FIELD.to_string(), // Default to int for other types + } + } + + fn replace_invalid_expression(target_type: Type) -> Expression { + match target_type { + Type::Boolean => make_boolean_expression(), + Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { + make_numeric_expression() + } + Type::String => make_string_expression(), + Type::Array => make_array_expression(), + Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), + Type::Document => make_object_expression(), + _ => Expression::Identifier(INT_FIELD.to_string()), // Default for other types + } + } + // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 9 { @@ -478,7 +508,36 @@ mod tests { let where_clause = node.where_clause.map(|wc| wc.walk(self)); self.target_type = old_target_type; - let group_by_clause = node.group_by_clause.map(|gbc| gbc.walk(self)); + let group_by_clause = node.group_by_clause.map(|gbc| { + let keys = gbc.keys.into_iter().map(|key| { + match key { + OptionallyAliasedExpr::Unaliased(expr) => { + let field_name = if let Some(target_type) = self.target_type { + get_field_for_type(target_type) + } else { + INT_FIELD.to_string() + }; + OptionallyAliasedExpr::Unaliased(Expression::Identifier(field_name)) + }, + OptionallyAliasedExpr::Aliased(aliased) => { + let field_name = if let Some(target_type) = self.target_type { + get_field_for_type(target_type) + } else { + INT_FIELD.to_string() + }; + OptionallyAliasedExpr::Aliased(AliasedExpr { + expr: Expression::Identifier(field_name), + alias: aliased.alias, + }) + } + } + }).collect(); + + GroupByClause { + keys, + aggregations: Vec::new(), + } + }); let old_target_type = self.target_type; self.target_type = Some(Type::Boolean); @@ -619,38 +678,97 @@ mod tests { _ => node.walk(self), } } + + } impl SemanticVisitor { fn visit_expression_custom(&mut self, node: &mut Expression) { - if let Expression::Tuple(_) = node { - *node = make_numeric_expression(); - return; + match node { + Expression::Tuple(_) => { + if let Some(target_type) = self.target_type { + *node = replace_invalid_expression(target_type); + } else { + *node = make_numeric_expression(); + } + return; + } + Expression::Binary(bin) if matches!(bin.op, BinaryOp::In | BinaryOp::NotIn) => { + if let Some(target_type) = self.target_type { + *node = replace_invalid_expression(target_type); + } else { + *node = make_boolean_expression(); + } + return; + } + Expression::Subpath(_) => { + if VALID_SUBPATHS.is_empty() { + if let Some(target_type) = self.target_type { + *node = replace_invalid_expression(target_type); + } else { + *node = make_numeric_expression(); + } + } else { + let idx = usize::arbitrary(&mut Gen::new(0)) % VALID_SUBPATHS.len(); + *node = Expression::Subpath(VALID_SUBPATHS[idx].clone()); + } + return; + } + Expression::Identifier(ident) => { + if let Some(target_type) = self.target_type { + *ident = get_field_for_type(target_type); + } + return; + } + // Handle aggregate functions + Expression::Function(func) => { + // Check if this is an aggregate function + let is_aggregate = matches!( + func.function, + FunctionName::AddToArray + | FunctionName::AddToSet + | FunctionName::Avg + | FunctionName::Count + | FunctionName::First + | FunctionName::Last + | FunctionName::Max + | FunctionName::MergeDocuments + | FunctionName::Min + | FunctionName::StddevPop + | FunctionName::StddevSamp + | FunctionName::Sum + ); + + if is_aggregate { + // Determine appropriate field type for the function + let field_type = match func.function { + FunctionName::Sum + | FunctionName::Avg + | FunctionName::Min + | FunctionName::Max => Type::Double, + FunctionName::Count => Type::Int32, + FunctionName::AddToArray | FunctionName::AddToSet => Type::Array, + FunctionName::First + | FunctionName::Last + | FunctionName::MergeDocuments => Type::String, + FunctionName::StddevPop | FunctionName::StddevSamp => Type::Double, + _ => Type::Int32, + }; + + func.args = FunctionArguments::Args(vec![Expression::Identifier( + get_field_for_type(field_type), + )]); + return; + } + } + _ => {} } - // if let Expression::Extract(extract) = node { - // if matches!(extract.extract_spec, DatePart::Quarter) { - // *node = make_date_expression(); - // return; - // } - // } - if let Some(target_type) = self.target_type { let node_type = expression_type(node); if node_type != target_type && !are_types_compatible(node_type, target_type) { - *node = match target_type { - Type::Boolean => make_boolean_expression(), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - make_numeric_expression() - } - Type::String => make_string_expression(), - Type::Array => make_array_expression(), - Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), - Type::Document => make_object_expression(), - _ => node.clone(), // Keep the original node for other types - }; - + *node = replace_invalid_expression(target_type); return; } } @@ -796,6 +914,24 @@ mod tests { } lazy_static! { + static ref VALID_SUBPATHS: Vec = vec![ + SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: "nested_int".to_string(), + }, + SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: "nested_string".to_string(), + }, + SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: "nested_object.deeply_nested".to_string(), + }, + SubpathExpr { + expr: Box::new(Expression::Identifier(OBJECT_FIELD.to_string())), + subpath: "nested_field".to_string(), + }, + ]; static ref MONGODB_URI: String = format!( "mongodb://localhost:{}", std::env::var("MDB_TEST_LOCAL_PORT").unwrap_or_else(|_| "27017".to_string()) From 149136b70176089bcb6ed03babf6f63cba361fda Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 13:59:46 +0000 Subject: [PATCH 19/38] Fix formatting issues in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 8ee480d0d..84422cb67 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -509,8 +509,10 @@ mod tests { self.target_type = old_target_type; let group_by_clause = node.group_by_clause.map(|gbc| { - let keys = gbc.keys.into_iter().map(|key| { - match key { + let keys = gbc + .keys + .into_iter() + .map(|key| match key { OptionallyAliasedExpr::Unaliased(expr) => { let field_name = if let Some(target_type) = self.target_type { get_field_for_type(target_type) @@ -518,7 +520,7 @@ mod tests { INT_FIELD.to_string() }; OptionallyAliasedExpr::Unaliased(Expression::Identifier(field_name)) - }, + } OptionallyAliasedExpr::Aliased(aliased) => { let field_name = if let Some(target_type) = self.target_type { get_field_for_type(target_type) @@ -530,9 +532,9 @@ mod tests { alias: aliased.alias, }) } - } - }).collect(); - + }) + .collect(); + GroupByClause { keys, aggregations: Vec::new(), @@ -678,8 +680,6 @@ mod tests { _ => node.walk(self), } } - - } impl SemanticVisitor { From 8c444e8a3a8f8080aaacbe5d76111f0d01a591d1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 14:12:17 +0000 Subject: [PATCH 20/38] Update visitor to handle SubstarExpr in SELECT clauses Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 84422cb67..848588b46 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -680,6 +680,60 @@ mod tests { _ => node.walk(self), } } + + fn visit_select_clause(&mut self, node: SelectClause) -> SelectClause { + let body = match node.body { + SelectBody::Standard(exprs) => { + let mut has_substar = false; + let mut new_exprs = Vec::new(); + + for expr in exprs { + match expr { + SelectExpression::Substar(_) => { + if !has_substar { + has_substar = true; + new_exprs.push(SelectExpression::Substar(SubstarExpr { + datasource: ALL_TYPES_COLLECTION.to_string(), + })); + } + } + _ => { + new_exprs.push(expr.walk(self)); + } + } + } + + SelectBody::Standard(new_exprs) + } + SelectBody::Values(values) => { + let mut has_substar = false; + let mut new_values = Vec::new(); + + for value in values { + match value { + SelectValuesExpression::Substar(_) => { + if !has_substar { + has_substar = true; + new_values.push(SelectValuesExpression::Substar(SubstarExpr { + datasource: ALL_TYPES_COLLECTION.to_string(), + })); + } + } + _ => { + new_values.push(value.walk(self)); + } + } + } + + SelectBody::Values(new_values) + } + }; + + SelectClause { + set_quantifier: node.set_quantifier, + body, + } + } } impl SemanticVisitor { From 4e0da703991fe6f5203189f0c03df35ca5076a47 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 14:27:22 +0000 Subject: [PATCH 21/38] Update visitor to handle SelectExpression::Star in SELECT clauses Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 848588b46..077466c44 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -685,7 +685,9 @@ mod tests { let body = match node.body { SelectBody::Standard(exprs) => { let mut has_substar = false; + let mut has_star = false; let mut new_exprs = Vec::new(); + let mut non_star_exprs = Vec::new(); for expr in exprs { match expr { @@ -697,13 +699,26 @@ mod tests { })); } } + SelectExpression::Star => { + has_star = true; + } _ => { - new_exprs.push(expr.walk(self)); + let processed_expr = expr.walk(self); + non_star_exprs.push(processed_expr); } } } - SelectBody::Standard(new_exprs) + if has_star || (usize::arbitrary(&mut Gen::new(0)) % 10) < 2 { + SelectBody::Standard(vec![SelectExpression::Star]) + } else { + if has_substar { + new_exprs.extend(non_star_exprs); + SelectBody::Standard(new_exprs) + } else { + SelectBody::Standard(non_star_exprs) + } + } } SelectBody::Values(values) => { let mut has_substar = false; From dec6648882a656c41df0d5936095d3eca025a088 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 14:37:17 +0000 Subject: [PATCH 22/38] Replace unused variable with underscore in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 077466c44..0d37a8c95 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -513,7 +513,7 @@ mod tests { .keys .into_iter() .map(|key| match key { - OptionallyAliasedExpr::Unaliased(expr) => { + OptionallyAliasedExpr::Unaliased(_) => { let field_name = if let Some(target_type) = self.target_type { get_field_for_type(target_type) } else { From d011a63b2f3f3b142df392a94661fec8ac728166 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 14:49:52 +0000 Subject: [PATCH 23/38] Update visit_sort_key method to handle empty and non-empty select_fields Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 0d37a8c95..2d8f1011c 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -668,16 +668,17 @@ mod tests { } fn visit_sort_key(&mut self, node: SortKey) -> SortKey { + if self.select_fields.is_empty() { + return SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())); + } + + let idx = usize::arbitrary(&mut Gen::new(0)) % self.select_fields.len(); + match node { - SortKey::Positional(_) => { - if !self.select_fields.is_empty() { - let idx = usize::arbitrary(&mut Gen::new(0)) % self.select_fields.len(); - SortKey::Simple(Expression::Identifier(self.select_fields[idx].clone())) - } else { - SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())) - } + SortKey::Positional(_) => SortKey::Positional(idx as u32 + 1), + SortKey::Simple(_) => { + SortKey::Simple(Expression::Identifier(self.select_fields[idx].clone())) } - _ => node.walk(self), } } From 4285fd02a571a26f5eab97937f70cbbc040b4ec3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 14:54:54 +0000 Subject: [PATCH 24/38] Update line 469 to use visit_select_clause method Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 2d8f1011c..fb791fa80 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -466,7 +466,7 @@ mod tests { fn visit_select_query(&mut self, node: SelectQuery) -> SelectQuery { self.select_fields.clear(); - let select_clause = node.select_clause.walk(self); + let select_clause = self.visit_select_clause(node.select_clause); match &select_clause.body { SelectBody::Standard(exprs) => { From 82dbe00c222190367e18a4e09fa9dd33419da63c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 15:01:33 +0000 Subject: [PATCH 25/38] Update WHERE and HAVING clauses to use visit_expression instead of walk Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index fb791fa80..b5a09d040 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -505,7 +505,7 @@ mod tests { let old_target_type = self.target_type; self.target_type = Some(Type::Boolean); - let where_clause = node.where_clause.map(|wc| wc.walk(self)); + let where_clause = node.where_clause.map(|wc| self.visit_expression(wc)); self.target_type = old_target_type; let group_by_clause = node.group_by_clause.map(|gbc| { @@ -543,7 +543,7 @@ mod tests { let old_target_type = self.target_type; self.target_type = Some(Type::Boolean); - let having_clause = node.having_clause.map(|hc| hc.walk(self)); + let having_clause = node.having_clause.map(|hc| self.visit_expression(hc)); self.target_type = old_target_type; let order_by_clause = node.order_by_clause.map(|obc| obc.walk(self)); From 67337d755a352e47d6e5f455acf43761e49edec1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 15:12:20 +0000 Subject: [PATCH 26/38] Update wildcard case to not add INT_FIELD to select_fields Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index b5a09d040..197bc0d9f 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -482,9 +482,7 @@ mod tests { )) => { self.select_fields.push(ident.clone()); } - _ => { - self.select_fields.push(INT_FIELD.to_string()); - } + _ => {} } } } From 4647ebb1e2028758eafd899023b7797a3c303798 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 15:18:56 +0000 Subject: [PATCH 27/38] Simplify logic for generating * expressions in SELECT clauses Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 197bc0d9f..551c3b2d4 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -684,7 +684,6 @@ mod tests { let body = match node.body { SelectBody::Standard(exprs) => { let mut has_substar = false; - let mut has_star = false; let mut new_exprs = Vec::new(); let mut non_star_exprs = Vec::new(); @@ -698,9 +697,7 @@ mod tests { })); } } - SelectExpression::Star => { - has_star = true; - } + SelectExpression::Star => {} _ => { let processed_expr = expr.walk(self); non_star_exprs.push(processed_expr); @@ -708,7 +705,7 @@ mod tests { } } - if has_star || (usize::arbitrary(&mut Gen::new(0)) % 10) < 2 { + if (usize::arbitrary(&mut Gen::new(0)) % 10) < 2 { SelectBody::Standard(vec![SelectExpression::Star]) } else { if has_substar { From 7a96e85623144390702e95da0ba19f5daaaf1a63 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 15:37:31 +0000 Subject: [PATCH 28/38] Update SelectBody::Values to ensure Document expressions Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 30 ++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 551c3b2d4..50b0c9378 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -486,8 +486,19 @@ mod tests { } } } - SelectBody::Values(_) => { - self.select_fields.push(INT_FIELD.to_string()); + SelectBody::Values(values) => { + for value in values { + if let SelectValuesExpression::Expression(Expression::Document(doc_pairs)) = + &value + { + for pair in doc_pairs { + self.select_fields.push(pair.key.clone()); + } + } + } + if self.select_fields.is_empty() { + self.select_fields.push(INT_FIELD.to_string()); + } } } @@ -719,6 +730,7 @@ mod tests { SelectBody::Values(values) => { let mut has_substar = false; let mut new_values = Vec::new(); + let mut doc_exprs = Vec::new(); for value in values { match value { @@ -730,12 +742,22 @@ mod tests { })); } } - _ => { - new_values.push(value.walk(self)); + SelectValuesExpression::Expression(expr) => { + let key = crate::ast::pretty_print_fuzz_test::fuzz_test::arbitrary_identifier(&mut quickcheck::Gen::new(0)); + doc_exprs.push(DocumentPair { + key, + value: expr.walk(self), + }); } } } + if !doc_exprs.is_empty() { + new_values.push(SelectValuesExpression::Expression(Expression::Document( + doc_exprs, + ))); + } + SelectBody::Values(new_values) } }; From 5e42a3233deeb59308ffb0a416f7965b6a597dae Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 15:46:01 +0000 Subject: [PATCH 29/38] Remove unnecessary code forcing select_fields to be non-empty Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 50b0c9378..ebab21b40 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -496,16 +496,9 @@ mod tests { } } } - if self.select_fields.is_empty() { - self.select_fields.push(INT_FIELD.to_string()); - } } } - if self.select_fields.is_empty() { - self.select_fields.push(INT_FIELD.to_string()); - } - let from_clause = Some(Datasource::Collection(CollectionSource { database: Some(TEST_DB.to_string()), collection: ALL_TYPES_COLLECTION.to_string(), From aca537574bdd384d0878fe417625831c5d0d31d3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 16:13:57 +0000 Subject: [PATCH 30/38] Make arbitrary_identifier function public for use in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/pretty_print_fuzz_test.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mongosql/src/ast/pretty_print_fuzz_test.rs b/mongosql/src/ast/pretty_print_fuzz_test.rs index fa77e028f..5ad0ab091 100644 --- a/mongosql/src/ast/pretty_print_fuzz_test.rs +++ b/mongosql/src/ast/pretty_print_fuzz_test.rs @@ -70,7 +70,7 @@ Reparsed AST: } } -mod arbitrary { +pub mod arbitrary { use crate::ast::definitions::*; use quickcheck::{Arbitrary, Gen}; use rand::{rng, Rng}; @@ -106,13 +106,13 @@ mod arbitrary { /// Return an arbitrary String without null characters. /// /// These Strings can be used for aliases, identifiers, or literals. - fn arbitrary_string(_: &mut Gen) -> String { + pub fn arbitrary_string(_: &mut Gen) -> String { arbitrary_string_with_max_len(rand_len(1, 20) as usize) } /// Return an arbitrary Option, using the provided Fn to /// construct the value if the chosen variant is Some. - fn arbitrary_optional(g: &mut Gen, f: F) -> Option + pub fn arbitrary_optional(g: &mut Gen, f: F) -> Option where F: Fn(&mut Gen) -> T, { @@ -127,7 +127,7 @@ mod arbitrary { /// it just uses arbitrary_string, but this allows us to fine tune /// easily if we decide to use different rules for identifiers from /// strings. - fn arbitrary_identifier(g: &mut Gen) -> String { + pub fn arbitrary_identifier(g: &mut Gen) -> String { arbitrary_string(g) } From 520d6daede0814dff81fa80a39532cdf19c23c2e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 16:28:43 +0000 Subject: [PATCH 31/38] Update import path for arbitrary_identifier in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index ebab21b40..c232f3297 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -736,7 +736,7 @@ mod tests { } } SelectValuesExpression::Expression(expr) => { - let key = crate::ast::pretty_print_fuzz_test::fuzz_test::arbitrary_identifier(&mut quickcheck::Gen::new(0)); + let key = crate::ast::pretty_print_fuzz_test::arbitrary::arbitrary_identifier(&mut quickcheck::Gen::new(0)); doc_exprs.push(DocumentPair { key, value: expr.walk(self), From 7d7cead26b5fd6758c82b476be56deb171ac8246 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 17:42:20 +0000 Subject: [PATCH 32/38] Fix type mismatches and formatting issues in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 82 ++++++++++---------------- 1 file changed, 31 insertions(+), 51 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index c232f3297..6fd2b25f9 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -702,22 +702,32 @@ mod tests { } } SelectExpression::Star => {} - _ => { - let processed_expr = expr.walk(self); - non_star_exprs.push(processed_expr); + SelectExpression::Expression(expr) => { + if let OptionallyAliasedExpr::Unaliased(e) = expr { + let processed_expr = self.visit_expression(e); + non_star_exprs.push(SelectExpression::Expression( + OptionallyAliasedExpr::Unaliased(processed_expr), + )); + } else if let OptionallyAliasedExpr::Aliased(aliased) = expr { + let processed_expr = self.visit_expression(aliased.expr); + non_star_exprs.push(SelectExpression::Expression( + OptionallyAliasedExpr::Aliased(AliasedExpr { + expr: processed_expr, + alias: aliased.alias.clone(), + }), + )); + } } } } if (usize::arbitrary(&mut Gen::new(0)) % 10) < 2 { SelectBody::Standard(vec![SelectExpression::Star]) + } else if has_substar { + new_exprs.extend(non_star_exprs); + SelectBody::Standard(new_exprs) } else { - if has_substar { - new_exprs.extend(non_star_exprs); - SelectBody::Standard(new_exprs) - } else { - SelectBody::Standard(non_star_exprs) - } + SelectBody::Standard(non_star_exprs) } } SelectBody::Values(values) => { @@ -739,7 +749,7 @@ mod tests { let key = crate::ast::pretty_print_fuzz_test::arbitrary::arbitrary_identifier(&mut quickcheck::Gen::new(0)); doc_exprs.push(DocumentPair { key, - value: expr.walk(self), + value: self.visit_expression(expr), }); } } @@ -782,16 +792,8 @@ mod tests { return; } Expression::Subpath(_) => { - if VALID_SUBPATHS.is_empty() { - if let Some(target_type) = self.target_type { - *node = replace_invalid_expression(target_type); - } else { - *node = make_numeric_expression(); - } - } else { - let idx = usize::arbitrary(&mut Gen::new(0)) % VALID_SUBPATHS.len(); - *node = Expression::Subpath(VALID_SUBPATHS[idx].clone()); - } + let idx = usize::arbitrary(&mut Gen::new(0)) % VALID_SUBPATHS.len(); + *node = Expression::Subpath(VALID_SUBPATHS[idx].clone()); return; } Expression::Identifier(ident) => { @@ -934,36 +936,11 @@ mod tests { } } - fn contains_invalid_select_query(query: &Query) -> bool { - match query { - Query::Select(select) => { - select.from_clause.is_none() - && matches!(select.select_clause.body, SelectBody::Values(_)) - } - Query::Set(set) => { - contains_invalid_select_query(&set.left) - || contains_invalid_select_query(&set.right) - } - Query::With(with) => { - if contains_invalid_select_query(&with.body) { - return true; - } - - for named_query in &with.queries { - if contains_invalid_select_query(&named_query.query) { - return true; - } - } - false - } - } - } - #[test] fn prop_semantic_queries_translate() { fn property(mut query: Query) -> TestResult { - if contains_invalid_select_query(&query) { - return TestResult::discard(); + if matches!(query, Query::With(_)) { + query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); } let mut v = SemanticVisitor { @@ -1004,8 +981,11 @@ mod tests { subpath: "nested_string".to_string(), }, SubpathExpr { - expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: "nested_object.deeply_nested".to_string(), + expr: Box::new(Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: "nested_object".to_string(), + })), + subpath: "deeply_nested".to_string(), }, SubpathExpr { expr: Box::new(Expression::Identifier(OBJECT_FIELD.to_string())), @@ -1034,8 +1014,8 @@ mod tests { }; fn property(mut query: Query) -> TestResult { - if contains_invalid_select_query(&query) { - return TestResult::discard(); + if matches!(query, Query::With(_)) { + query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); } let mut v = SemanticVisitor { From 6330e5d3828ecf1c94e3d5ceb82286924829c9e1 Mon Sep 17 00:00:00 2001 From: Matthew Chiaravalloti Date: Fri, 16 May 2025 14:19:11 -0400 Subject: [PATCH 33/38] Remove unused catalog info --- mongosql/src/ast/semantic_fuzz_test.rs | 80 -------------------------- 1 file changed, 80 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 6fd2b25f9..3218fc838 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -15,12 +15,6 @@ mod tests { const TEST_DB: &str = "test_db"; const ALL_TYPES_COLLECTION: &str = "all_types"; - #[allow(dead_code)] - const RELATED_DATA_COLLECTION: &str = "related_data"; - #[allow(dead_code)] - const NUMERIC_COLLECTION: &str = "numeric_data"; - #[allow(dead_code)] - const ARRAY_COLLECTION: &str = "array_data"; const INT_FIELD: &str = "int_field"; // Int32 const LONG_FIELD: &str = "long_field"; // Int64 @@ -1144,80 +1138,6 @@ mod tests { .unwrap(), ); - db_schema.insert( - "related_data".to_string(), - serde_json::from_str( - r#"{ - "bsonType": "object", - "properties": { - "id": { "bsonType": "int" }, - "all_types_id": { "bsonType": "int" }, - "description": { "bsonType": "string" } - }, - "additionalProperties": false - }"#, - ) - .unwrap(), - ); - - db_schema.insert( - "numeric_data".to_string(), - serde_json::from_str( - r#"{ - "bsonType": "object", - "properties": { - "id": { "bsonType": "int" }, - "int_value": { "bsonType": "int" }, - "long_value": { "bsonType": "long" }, - "double_value": { "bsonType": "double" }, - "decimal_value": { "bsonType": "decimal" }, - "calculated_field": { "bsonType": "double" } - }, - "additionalProperties": false - }"#, - ) - .unwrap(), - ); - - db_schema.insert( - "array_data".to_string(), - serde_json::from_str( - r#"{ - "bsonType": "object", - "properties": { - "id": { "bsonType": "int" }, - "int_array": { - "bsonType": "array", - "items": { "bsonType": "int" } - }, - "string_array": { - "bsonType": "array", - "items": { "bsonType": "string" } - }, - "object_array": { - "bsonType": "array", - "items": { - "bsonType": "object", - "properties": { - "key": { "bsonType": "string" }, - "value": { "bsonType": "int" } - } - } - }, - "nested_array": { - "bsonType": "array", - "items": { - "bsonType": "array", - "items": { "bsonType": "int" } - } - } - }, - "additionalProperties": false - }"#, - ) - .unwrap(), - ); - catalog_schema.insert("test_db".to_string(), db_schema); build_catalog_from_catalog_schema(catalog_schema).unwrap() }; From 1a7cd3c791c683c0065a66b5c477047470fd5838 Mon Sep 17 00:00:00 2001 From: Matthew Chiaravalloti Date: Fri, 16 May 2025 14:59:06 -0400 Subject: [PATCH 34/38] Clean up names and make_*_expression functions --- mongosql/src/ast/semantic_fuzz_test.rs | 209 +++++++++++++++---------- 1 file changed, 122 insertions(+), 87 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 3218fc838..d5feee515 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -1,5 +1,6 @@ #[cfg(test)] mod tests { + use crate::ast::pretty_print_fuzz_test::arbitrary::{arbitrary_identifier, arbitrary_string}; use crate::{ ast::{definitions::visitor::Visitor, definitions::*, pretty_print::PrettyPrint}, build_catalog_from_catalog_schema, @@ -20,23 +21,21 @@ mod tests { const LONG_FIELD: &str = "long_field"; // Int64 const DOUBLE_FIELD: &str = "double_field"; // Double const DECIMAL_FIELD: &str = "decimal_field"; // Decimal128 - const NEGATIVE_INT_FIELD: &str = "neg_int_field"; // Int32 (negative) - const ZERO_INT_FIELD: &str = "zero_int_field"; // Int32 (zero) const STRING_FIELD: &str = "string_field"; // String - const EMPTY_STRING_FIELD: &str = "empty_string_field"; // String (empty) - const DESCRIPTION_FIELD: &str = "description"; // String const BOOL_FIELD: &str = "bool_field"; // Boolean - const TRUE_FIELD: &str = "true_field"; // Boolean (true) - const FALSE_FIELD: &str = "false_field"; // Boolean (false) const DATE_FIELD: &str = "date_field"; // Date const TIMESTAMP_FIELD: &str = "timestamp_field"; // Timestamp - const TIME_FIELD: &str = "time_field"; // Time const OBJECT_FIELD: &str = "object_field"; // Document + const NESTED_FIELD: &str = "nested_field"; // Nested string field const NESTED_OBJECT_FIELD: &str = "nested_object_field"; // Document with nested fields + const NESTED_INT: &str = "nested_int"; // Nested int + const NESTED_STRING: &str = "nested_string"; // Nested string + const NESTED_OBJECT: &str = "nested_object"; // Nested object + const DEEPLY_NESTED: &str = "deeply_nested"; // Deeply nested int field const ARRAY_FIELD: &str = "array_field"; // Array of Int32 const STRING_ARRAY_FIELD: &str = "string_array_field"; // Array of String const MIXED_ARRAY_FIELD: &str = "mixed_array_field"; // Array of mixed types @@ -44,31 +43,29 @@ mod tests { const NULL_FIELD: &str = "null_field"; // Null const OBJECTID_FIELD: &str = "objectid_field"; // ObjectId const ID_FIELD: &str = "id"; // Int32 (for related_data) - const ALL_TYPES_ID_FIELD: &str = "all_types_id"; // Int32 (foreign key) fn field_type(field_name: &str) -> Type { match field_name { - INT_FIELD | NEGATIVE_INT_FIELD | ZERO_INT_FIELD => Type::Int32, + INT_FIELD | NESTED_INT | DEEPLY_NESTED => Type::Int32, LONG_FIELD => Type::Int64, DOUBLE_FIELD => Type::Double, DECIMAL_FIELD => Type::Decimal128, - STRING_FIELD | EMPTY_STRING_FIELD | DESCRIPTION_FIELD => Type::String, + STRING_FIELD | NESTED_STRING => Type::String, - BOOL_FIELD | TRUE_FIELD | FALSE_FIELD => Type::Boolean, + BOOL_FIELD => Type::Boolean, DATE_FIELD => Type::Date, TIMESTAMP_FIELD => Type::Timestamp, - TIME_FIELD => Type::Time, - OBJECT_FIELD | NESTED_OBJECT_FIELD => Type::Document, + OBJECT_FIELD | NESTED_OBJECT_FIELD | NESTED_OBJECT => Type::Document, ARRAY_FIELD | STRING_ARRAY_FIELD | MIXED_ARRAY_FIELD => Type::Array, NULL_FIELD => Type::Null, OBJECTID_FIELD => Type::ObjectId, - ID_FIELD | ALL_TYPES_ID_FIELD => Type::Int32, + ID_FIELD => Type::Int32, - _ => Type::String, + _ => Type::Null, } } @@ -84,7 +81,8 @@ mod tests { Type::Timestamp => TIMESTAMP_FIELD.to_string(), Type::Document => OBJECT_FIELD.to_string(), Type::Array => ARRAY_FIELD.to_string(), - _ => INT_FIELD.to_string(), // Default to int for other types + Type::ObjectId => OBJECTID_FIELD.to_string(), + _ => NULL_FIELD.to_string(), // Default to null for other types } } @@ -96,7 +94,7 @@ mod tests { } Type::String => make_string_expression(), Type::Array => make_array_expression(), - Type::Date | Type::Datetime | Type::Timestamp => make_date_expression(), + Type::Date | Type::Datetime => make_date_expression(), Type::Document => make_object_expression(), _ => Expression::Identifier(INT_FIELD.to_string()), // Default for other types } @@ -104,16 +102,28 @@ mod tests { // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 9 { + match usize::arbitrary(&mut Gen::new(0)) % 12 { 0 => Expression::Identifier(INT_FIELD.to_string()), 1 => Expression::Identifier(LONG_FIELD.to_string()), 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), 3 => Expression::Identifier(DECIMAL_FIELD.to_string()), - 4 => Expression::Literal(Literal::Integer(42)), - 5 => Expression::Literal(Literal::Integer(-10)), - 6 => Expression::Literal(Literal::Long(1000000)), - 7 => Expression::Literal(Literal::Double(std::f64::consts::PI)), - _ => { + 4 => Expression::Literal(Literal::Integer(i32::arbitrary(&mut Gen::new(0)))), + 5 => Expression::Literal(Literal::Integer(-(u16::arbitrary(&mut Gen::new(0)) as i32))), + 6 => Expression::Literal(Literal::Long(i64::arbitrary(&mut Gen::new(0)))), + 7 => Expression::Literal(Literal::Double(f64::arbitrary(&mut Gen::new(0)))), + 8 => { + let arg = make_numeric_expression(); + let op = if bool::arbitrary(&mut Gen::new(0)) { + UnaryOp::Pos + } else { + UnaryOp::Neg + }; + Expression::Unary(UnaryExpr { + op, + expr: Box::new(arg), + }) + } + 9 => { let left = make_numeric_expression(); let right = make_numeric_expression(); let op = match usize::arbitrary(&mut Gen::new(0)) % 4 { @@ -128,17 +138,41 @@ mod tests { right: Box::new(right), }) } + 10 => { + let arg = make_numeric_expression(); + Expression::Function(FunctionExpr { + function: match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => FunctionName::Abs, + 1 => FunctionName::Ceil, + 2 => FunctionName::Floor, + 3 => FunctionName::Round, + 4 => FunctionName::Sqrt, + _ => FunctionName::Log10, + }, + args: FunctionArguments::Args(vec![arg]), + set_quantifier: None, + }) + } + _ => Expression::Extract(ExtractExpr { + extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 5 { + 0 => DatePart::Year, + 1 => DatePart::Month, + 2 => DatePart::Week, + 3 => DatePart::Day, + 4 => DatePart::Hour, + _ => DatePart::Minute, + }, + arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), + }), } } fn make_boolean_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 7 { - 0 => Expression::Identifier(BOOL_FIELD.to_string()), - 1 => Expression::Identifier(TRUE_FIELD.to_string()), - 2 => Expression::Identifier(FALSE_FIELD.to_string()), - 3 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), - 4 => make_comparison_expression(), - 5 => { + match usize::arbitrary(&mut Gen::new(0)) % 10 { + 0..=4 => Expression::Identifier(BOOL_FIELD.to_string()), + 5 | 6 => Expression::Literal(Literal::Boolean(bool::arbitrary(&mut Gen::new(0)))), + 7 => make_comparison_expression(), + 8 => { let left = make_boolean_expression(); let right = make_boolean_expression(); let op = if bool::arbitrary(&mut Gen::new(0)) { @@ -164,9 +198,7 @@ mod tests { fn make_string_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { - 0 => Expression::Identifier(STRING_FIELD.to_string()), - 1 => Expression::Identifier(EMPTY_STRING_FIELD.to_string()), - 2 => Expression::Identifier(DESCRIPTION_FIELD.to_string()), + 0..=2 => Expression::Identifier(STRING_FIELD.to_string()), 3 => { // String concatenation let left = make_string_expression(); @@ -179,7 +211,7 @@ mod tests { } _ => { // String constructor - simplified to use String directly - Expression::StringConstructor(format!("Hello {}!", STRING_FIELD)) + Expression::StringConstructor(arbitrary_string(&mut Gen::new(0))) } } } @@ -214,35 +246,26 @@ mod tests { fn make_date_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { - 0 => Expression::Identifier(DATE_FIELD.to_string()), - 1 => Expression::Identifier(TIMESTAMP_FIELD.to_string()), - 2 => Expression::Identifier(TIME_FIELD.to_string()), + 0..=2 => Expression::Identifier(DATE_FIELD.to_string()), 3 => { - // Date function + // DATEADD function Expression::DateFunction(DateFunctionExpr { - function: match usize::arbitrary(&mut Gen::new(0)) % 3 { - 0 => DateFunctionName::Add, - 1 => DateFunctionName::Diff, - _ => DateFunctionName::Trunc, - }, + function: DateFunctionName::Add, date_part: DatePart::Day, args: vec![ - Expression::Identifier(DATE_FIELD.to_string()), Expression::Literal(Literal::Integer(1)), + Expression::Identifier(DATE_FIELD.to_string()), ], }) } - _ => Expression::Extract(ExtractExpr { - extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 5 { - 0 => DatePart::Year, - 1 => DatePart::Month, - 2 => DatePart::Week, - 3 => DatePart::Day, - 4 => DatePart::Hour, - _ => DatePart::Minute, - }, - arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), - }), + _ => { + // DATETRUNC function + Expression::DateFunction(DateFunctionExpr { + function: DateFunctionName::Trunc, + date_part: DatePart::Year, + args: vec![Expression::Identifier(DATE_FIELD.to_string())], + }) + } } } @@ -295,8 +318,11 @@ mod tests { } fn make_comparison_expression() -> Expression { - let left = make_numeric_expression(); - let right = make_numeric_expression(); + let (left, right) = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => (make_numeric_expression(), make_numeric_expression()), + 1 => (make_string_expression(), make_string_expression()), + _ => (make_boolean_expression(), make_boolean_expression()), + }; let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => ComparisonOp::Eq, @@ -350,6 +376,8 @@ mod tests { }, Expression::Cast(cast) => cast.to, Expression::Between(_) => Type::Boolean, + // expression_type only returns one type but Case is polymorphic so this is actually + // inaccurate. We tolerate this for now. Expression::Case(case) => case.else_branch.as_ref().map_or_else( || { case.when_branch @@ -360,11 +388,15 @@ mod tests { ), Expression::Function(func) => match func.function { // Aggregation functions - FunctionName::Sum | FunctionName::Avg | FunctionName::Min | FunctionName::Max => { - Type::Double - } + FunctionName::Sum + | FunctionName::Avg + | FunctionName::Min + | FunctionName::Max + | FunctionName::StddevPop + | FunctionName::StddevSamp => Type::Double, FunctionName::Count => Type::Int64, FunctionName::AddToSet | FunctionName::AddToArray => Type::Array, + FunctionName::MergeDocuments => Type::Document, FunctionName::First | FunctionName::Last => Type::String, // Depends on the argument type // String functions @@ -374,10 +406,9 @@ mod tests { FunctionName::Replace => Type::String, // Date functions - FunctionName::DateAdd | FunctionName::DateDiff | FunctionName::DateTrunc => { - Type::Date - } + FunctionName::DateAdd | FunctionName::DateTrunc => Type::Date, FunctionName::CurrentTimestamp => Type::Date, + FunctionName::DateDiff => Type::Int64, FunctionName::Year | FunctionName::Month | FunctionName::Week => Type::Int32, FunctionName::DayOfWeek | FunctionName::DayOfMonth | FunctionName::DayOfYear => { Type::Int32 @@ -395,13 +426,22 @@ mod tests { FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Type::Double, FunctionName::Pow => Type::Double, FunctionName::Mod => Type::Int32, + FunctionName::BitLength | FunctionName::OctetLength | FunctionName::CharLength => { + Type::Int32 + } + FunctionName::Degrees + | FunctionName::Radians + | FunctionName::Cos + | FunctionName::Sin + | FunctionName::Tan => Type::Double, + FunctionName::Position => Type::Int32, // Other functions - FunctionName::Coalesce => Type::String, // Depends on arguments - FunctionName::NullIf => Type::String, // Depends on arguments + FunctionName::Coalesce => Type::Null, // Depends on arguments + FunctionName::NullIf => Type::Null, // Depends on arguments FunctionName::Size => Type::Int32, - - _ => Type::String, // Default for other functions + FunctionName::Slice => Type::Array, + FunctionName::Split => Type::String, }, Expression::Array(_) => Type::Array, Expression::Document(_) => Type::Document, @@ -418,7 +458,7 @@ mod tests { Expression::Subquery(_) => Type::Array, Expression::Exists(_) => Type::Boolean, Expression::SubqueryComparison(_) => Type::Boolean, - Expression::Subpath(_) => Type::String, + Expression::Subpath(subpath) => field_type(&subpath.subpath), Expression::Is(_) => Type::Boolean, Expression::Like(_) => Type::Boolean, Expression::StringConstructor(_) => Type::String, @@ -740,7 +780,7 @@ mod tests { } } SelectValuesExpression::Expression(expr) => { - let key = crate::ast::pretty_print_fuzz_test::arbitrary::arbitrary_identifier(&mut quickcheck::Gen::new(0)); + let key = arbitrary_identifier(&mut quickcheck::Gen::new(0)); doc_exprs.push(DocumentPair { key, value: self.visit_expression(expr), @@ -933,6 +973,7 @@ mod tests { #[test] fn prop_semantic_queries_translate() { fn property(mut query: Query) -> TestResult { + // Replace With queries for now if matches!(query, Query::With(_)) { query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); } @@ -968,22 +1009,22 @@ mod tests { static ref VALID_SUBPATHS: Vec = vec![ SubpathExpr { expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: "nested_int".to_string(), + subpath: NESTED_INT.to_string(), }, SubpathExpr { expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: "nested_string".to_string(), + subpath: NESTED_STRING.to_string(), }, SubpathExpr { expr: Box::new(Expression::Subpath(SubpathExpr { expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: "nested_object".to_string(), + subpath: NESTED_OBJECT.to_string(), })), - subpath: "deeply_nested".to_string(), + subpath: DEEPLY_NESTED.to_string(), }, SubpathExpr { expr: Box::new(Expression::Identifier(OBJECT_FIELD.to_string())), - subpath: "nested_field".to_string(), + subpath: NESTED_FIELD.to_string(), }, ]; static ref MONGODB_URI: String = format!( @@ -998,16 +1039,8 @@ mod tests { #[test] fn prop_aggregation_pipelines_run() { - // Skip test if MongoDB connection fails - let _client = match get_mongodb_client() { - Some(client) => client, - None => { - println!("Skipping test: MongoDB connection failed"); - return; - } - }; - fn property(mut query: Query) -> TestResult { + // Replace With queries for now if matches!(query, Query::With(_)) { query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); } @@ -1092,13 +1125,9 @@ mod tests { "neg_int_field": { "bsonType": "int" }, "zero_int_field": { "bsonType": "int" }, "string_field": { "bsonType": "string" }, - "empty_string_field": { "bsonType": "string" }, "bool_field": { "bsonType": "bool" }, - "true_field": { "bsonType": "bool" }, - "false_field": { "bsonType": "bool" }, "date_field": { "bsonType": "date" }, "timestamp_field": { "bsonType": "timestamp" }, - "time_field": { "bsonType": "timestamp" }, "object_field": { "bsonType": "object", "properties": { @@ -1128,6 +1157,12 @@ mod tests { }, "mixed_array_field": { "bsonType": "array" + "items": { + "anyOf": [ + { "bsonType": "string" }, + { "bsonType": "int" } + ] + } }, "null_field": { "bsonType": "null" }, "objectid_field": { "bsonType": "objectId" } From d51d1c604731f46290d7402db0c1a6fd43c22978 Mon Sep 17 00:00:00 2001 From: Matthew Chiaravalloti Date: Fri, 16 May 2025 16:22:43 -0400 Subject: [PATCH 35/38] Massive rewrite --- mongosql/src/ast/semantic_fuzz_test.rs | 1068 +++++++----------------- 1 file changed, 320 insertions(+), 748 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index d5feee515..98b06dcb9 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -1,8 +1,12 @@ #[cfg(test)] mod tests { - use crate::ast::pretty_print_fuzz_test::arbitrary::{arbitrary_identifier, arbitrary_string}; + use crate::ast::pretty_print_fuzz_test::arbitrary::arbitrary_optional; use crate::{ - ast::{definitions::visitor::Visitor, definitions::*, pretty_print::PrettyPrint}, + ast::{ + definitions::*, + pretty_print::PrettyPrint, + pretty_print_fuzz_test::arbitrary::{arbitrary_identifier, arbitrary_string}, + }, build_catalog_from_catalog_schema, catalog::Catalog, json_schema::Schema as JsonSchema, @@ -42,76 +46,47 @@ mod tests { const NULL_FIELD: &str = "null_field"; // Null const OBJECTID_FIELD: &str = "objectid_field"; // ObjectId - const ID_FIELD: &str = "id"; // Int32 (for related_data) - fn field_type(field_name: &str) -> Type { - match field_name { - INT_FIELD | NESTED_INT | DEEPLY_NESTED => Type::Int32, - LONG_FIELD => Type::Int64, - DOUBLE_FIELD => Type::Double, - DECIMAL_FIELD => Type::Decimal128, - - STRING_FIELD | NESTED_STRING => Type::String, - - BOOL_FIELD => Type::Boolean, - - DATE_FIELD => Type::Date, - TIMESTAMP_FIELD => Type::Timestamp, - - OBJECT_FIELD | NESTED_OBJECT_FIELD | NESTED_OBJECT => Type::Document, - ARRAY_FIELD | STRING_ARRAY_FIELD | MIXED_ARRAY_FIELD => Type::Array, - - NULL_FIELD => Type::Null, - OBJECTID_FIELD => Type::ObjectId, - ID_FIELD => Type::Int32, - - _ => Type::Null, - } - } - - fn get_field_for_type(target_type: Type) -> String { - match target_type { - Type::Int32 => INT_FIELD.to_string(), - Type::Int64 => LONG_FIELD.to_string(), - Type::Double => DOUBLE_FIELD.to_string(), - Type::Decimal128 => DECIMAL_FIELD.to_string(), - Type::String => STRING_FIELD.to_string(), - Type::Boolean => BOOL_FIELD.to_string(), - Type::Date => DATE_FIELD.to_string(), - Type::Timestamp => TIMESTAMP_FIELD.to_string(), - Type::Document => OBJECT_FIELD.to_string(), - Type::Array => ARRAY_FIELD.to_string(), - Type::ObjectId => OBJECTID_FIELD.to_string(), - _ => NULL_FIELD.to_string(), // Default to null for other types - } - } - - fn replace_invalid_expression(target_type: Type) -> Expression { - match target_type { - Type::Boolean => make_boolean_expression(), - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 => { - make_numeric_expression() - } - Type::String => make_string_expression(), - Type::Array => make_array_expression(), - Type::Date | Type::Datetime => make_date_expression(), - Type::Document => make_object_expression(), - _ => Expression::Identifier(INT_FIELD.to_string()), // Default for other types - } + lazy_static! { + static ref NESTED_INT_SUBPATH: Expression = Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: NESTED_INT.to_string(), + }); + static ref NESTED_STRING_SUBPATH: Expression = Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: NESTED_STRING.to_string(), + }); + static ref ALT_NESTED_STRING_SUBPATH: Expression = Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Identifier(OBJECT_FIELD.to_string())), + subpath: NESTED_FIELD.to_string(), + }); + static ref DEEPLY_NESTED_INT_SUBPATH: Expression = Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Subpath(SubpathExpr { + expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), + subpath: NESTED_OBJECT.to_string(), + })), + subpath: DEEPLY_NESTED.to_string(), + }); + static ref MONGODB_URI: String = format!( + "mongodb://localhost:{}", + std::env::var("MDB_TEST_LOCAL_PORT").unwrap_or_else(|_| "27017".to_string()) + ); } // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 12 { + match usize::arbitrary(&mut Gen::new(0)) % 17 { 0 => Expression::Identifier(INT_FIELD.to_string()), 1 => Expression::Identifier(LONG_FIELD.to_string()), 2 => Expression::Identifier(DOUBLE_FIELD.to_string()), 3 => Expression::Identifier(DECIMAL_FIELD.to_string()), - 4 => Expression::Literal(Literal::Integer(i32::arbitrary(&mut Gen::new(0)))), - 5 => Expression::Literal(Literal::Integer(-(u16::arbitrary(&mut Gen::new(0)) as i32))), - 6 => Expression::Literal(Literal::Long(i64::arbitrary(&mut Gen::new(0)))), - 7 => Expression::Literal(Literal::Double(f64::arbitrary(&mut Gen::new(0)))), - 8 => { + 4 => NESTED_INT_SUBPATH.clone(), + 5 => DEEPLY_NESTED_INT_SUBPATH.clone(), + 6 => Expression::Literal(Literal::Integer(i32::arbitrary(&mut Gen::new(0)))), + 7 => Expression::Literal(Literal::Integer(-(u16::arbitrary(&mut Gen::new(0)) as i32))), + 8 => Expression::Literal(Literal::Long(i64::arbitrary(&mut Gen::new(0)))), + 9 => Expression::Literal(Literal::Double(f64::arbitrary(&mut Gen::new(0)))), + 10 => { let arg = make_numeric_expression(); let op = if bool::arbitrary(&mut Gen::new(0)) { UnaryOp::Pos @@ -123,7 +98,7 @@ mod tests { expr: Box::new(arg), }) } - 9 => { + 11 => { let left = make_numeric_expression(); let right = make_numeric_expression(); let op = match usize::arbitrary(&mut Gen::new(0)) % 4 { @@ -138,7 +113,7 @@ mod tests { right: Box::new(right), }) } - 10 => { + 12 => { let arg = make_numeric_expression(); Expression::Function(FunctionExpr { function: match usize::arbitrary(&mut Gen::new(0)) % 6 { @@ -153,7 +128,19 @@ mod tests { set_quantifier: None, }) } - _ => Expression::Extract(ExtractExpr { + 13 => { + let arg = make_string_expression(); + Expression::Function(FunctionExpr { + function: match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => FunctionName::BitLength, + 1 => FunctionName::OctetLength, + _ => FunctionName::CharLength, + }, + args: FunctionArguments::Args(vec![arg]), + set_quantifier: None, + }) + } + 14 => Expression::Extract(ExtractExpr { extract_spec: match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => DatePart::Year, 1 => DatePart::Month, @@ -164,6 +151,67 @@ mod tests { }, arg: Box::new(Expression::Identifier(DATE_FIELD.to_string())), }), + 15 => { + let arg_name = match usize::arbitrary(&mut Gen::new(0)) % 15 { + 0 => INT_FIELD, + 1 => LONG_FIELD, + 2 => DOUBLE_FIELD, + 3 => DECIMAL_FIELD, + 4 => STRING_FIELD, + 5 => BOOL_FIELD, + 6 => DATE_FIELD, + 7 => TIMESTAMP_FIELD, + 8 => OBJECT_FIELD, + 9 => NESTED_OBJECT_FIELD, + 10 => ARRAY_FIELD, + 11 => STRING_ARRAY_FIELD, + 12 => MIXED_ARRAY_FIELD, + 13 => NULL_FIELD, + 14 => OBJECTID_FIELD, + _ => "star", + }; + + let args = if arg_name == "star" { + FunctionArguments::Star + } else { + FunctionArguments::Args(vec![Expression::Identifier(arg_name.to_string())]) + }; + + Expression::Function(FunctionExpr { + function: FunctionName::Count, + args, + set_quantifier: if bool::arbitrary(&mut Gen::new(0)) { + Some(SetQuantifier::Distinct) + } else { + None + }, + }) + } + _ => { + let arg_name = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => INT_FIELD, + 1 => LONG_FIELD, + 2 => DOUBLE_FIELD, + _ => DECIMAL_FIELD, + }; + let arg = Expression::Identifier(arg_name.to_string()); + Expression::Function(FunctionExpr { + function: match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => FunctionName::Avg, + 1 => FunctionName::Min, + 2 => FunctionName::Max, + 3 => FunctionName::StddevPop, + 4 => FunctionName::StddevSamp, + _ => FunctionName::Sum, + }, + args: FunctionArguments::Args(vec![arg]), + set_quantifier: if bool::arbitrary(&mut Gen::new(0)) { + Some(SetQuantifier::Distinct) + } else { + None + }, + }) + } } } @@ -196,9 +244,34 @@ mod tests { } } + fn make_comparison_expression() -> Expression { + let (left, right) = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => (make_numeric_expression(), make_numeric_expression()), + 1 => (make_string_expression(), make_string_expression()), + _ => (make_boolean_expression(), make_boolean_expression()), + }; + + let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => ComparisonOp::Eq, + 1 => ComparisonOp::Neq, + 2 => ComparisonOp::Lt, + 3 => ComparisonOp::Lte, + 4 => ComparisonOp::Gt, + _ => ComparisonOp::Gte, + }; + + Expression::Binary(BinaryExpr { + left: Box::new(left), + op: BinaryOp::Comparison(comp_op), + right: Box::new(right), + }) + } + fn make_string_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 5 { - 0..=2 => Expression::Identifier(STRING_FIELD.to_string()), + match usize::arbitrary(&mut Gen::new(0)) % 8 { + 0 => Expression::Identifier(STRING_FIELD.to_string()), + 1 => NESTED_STRING_SUBPATH.clone(), + 2 => ALT_NESTED_STRING_SUBPATH.clone(), 3 => { // String concatenation let left = make_string_expression(); @@ -209,6 +282,29 @@ mod tests { right: Box::new(right), }) } + 4 => Expression::Function(FunctionExpr { + function: if bool::arbitrary(&mut Gen::new(0)) { + FunctionName::Lower + } else { + FunctionName::Upper + }, + args: FunctionArguments::Args(vec![make_string_expression()]), + set_quantifier: None, + }), + 5 => Expression::Trim(TrimExpr { + trim_spec: TrimSpec::arbitrary(&mut Gen::new(0)), + trim_chars: Box::new(Expression::StringConstructor(" ".to_string())), + arg: Box::new(make_string_expression()), + }), + 6 => Expression::Function(FunctionExpr { + function: FunctionName::Substring, + args: FunctionArguments::Args(vec![ + make_string_expression(), + make_numeric_expression(), + make_numeric_expression(), + ]), + set_quantifier: None, + }), _ => { // String constructor - simplified to use String directly Expression::StringConstructor(arbitrary_string(&mut Gen::new(0))) @@ -217,7 +313,7 @@ mod tests { } fn make_array_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 5 { + match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => Expression::Identifier(ARRAY_FIELD.to_string()), 1 => Expression::Identifier(STRING_ARRAY_FIELD.to_string()), 2 => Expression::Identifier(MIXED_ARRAY_FIELD.to_string()), @@ -229,7 +325,7 @@ mod tests { } Expression::Array(elements) } - _ => { + 4 => { let mut elements = Vec::new(); let size = (usize::arbitrary(&mut Gen::new(0)) % 4) + 2; // 2-5 elements for i in 0..size { @@ -241,6 +337,27 @@ mod tests { } Expression::Array(elements) } + _ => { + let arg_name = match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => INT_FIELD, + 1 => LONG_FIELD, + 2 => DOUBLE_FIELD, + _ => DECIMAL_FIELD, + }; + let arg = Expression::Identifier(arg_name.to_string()); + Expression::Function(FunctionExpr { + function: match usize::arbitrary(&mut Gen::new(0)) % 2 { + 0 => FunctionName::AddToArray, + _ => FunctionName::AddToSet, + }, + args: FunctionArguments::Args(vec![arg]), + set_quantifier: if bool::arbitrary(&mut Gen::new(0)) { + Some(SetQuantifier::Distinct) + } else { + None + }, + }) + } } } @@ -270,7 +387,7 @@ mod tests { } fn make_object_expression() -> Expression { - match usize::arbitrary(&mut Gen::new(0)) % 4 { + match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), 1 => Expression::Identifier(NESTED_OBJECT_FIELD.to_string()), 2 => { @@ -290,7 +407,7 @@ mod tests { ]; Expression::Document(fields) } - _ => { + 3 => { let nested_fields = vec![ DocumentPair { key: "nested_id".to_string(), @@ -314,677 +431,171 @@ mod tests { ]; Expression::Document(fields) } - } - } - - fn make_comparison_expression() -> Expression { - let (left, right) = match usize::arbitrary(&mut Gen::new(0)) % 3 { - 0 => (make_numeric_expression(), make_numeric_expression()), - 1 => (make_string_expression(), make_string_expression()), - _ => (make_boolean_expression(), make_boolean_expression()), - }; - - let comp_op = match usize::arbitrary(&mut Gen::new(0)) % 6 { - 0 => ComparisonOp::Eq, - 1 => ComparisonOp::Neq, - 2 => ComparisonOp::Lt, - 3 => ComparisonOp::Lte, - 4 => ComparisonOp::Gt, - _ => ComparisonOp::Gte, - }; - - Expression::Binary(BinaryExpr { - left: Box::new(left), - op: BinaryOp::Comparison(comp_op), - right: Box::new(right), - }) - } - - fn expression_type(expr: &Expression) -> Type { - match expr { - Expression::Identifier(name) => field_type(name), - Expression::Literal(lit) => match lit { - Literal::Integer(_) => Type::Int32, - Literal::Long(_) => Type::Int64, - Literal::Double(_) => Type::Double, - Literal::Boolean(_) => Type::Boolean, - Literal::Null => Type::Null, - }, - Expression::Binary(binary) => match binary.op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { - let left_type = expression_type(&binary.left); - let right_type = expression_type(&binary.right); - - if left_type == Type::Decimal128 || right_type == Type::Decimal128 { - Type::Decimal128 - } else if left_type == Type::Double || right_type == Type::Double { - Type::Double - } else if left_type == Type::Int64 || right_type == Type::Int64 { - Type::Int64 + _ => { + let arg_name = match usize::arbitrary(&mut Gen::new(0)) % 2 { + 0 => OBJECT_FIELD, + _ => NESTED_OBJECT_FIELD, + }; + let arg = Expression::Identifier(arg_name.to_string()); + Expression::Function(FunctionExpr { + function: FunctionName::MergeDocuments, + args: FunctionArguments::Args(vec![arg]), + set_quantifier: if bool::arbitrary(&mut Gen::new(0)) { + Some(SetQuantifier::Distinct) } else { - Type::Int32 - } - } - BinaryOp::And | BinaryOp::Or => Type::Boolean, - BinaryOp::Comparison(_) => Type::Boolean, - BinaryOp::In | BinaryOp::NotIn => Type::Boolean, - BinaryOp::Concat => Type::String, - }, - Expression::Unary(unary) => match unary.op { - UnaryOp::Not => Type::Boolean, - UnaryOp::Neg | UnaryOp::Pos => expression_type(&unary.expr), - }, - Expression::Cast(cast) => cast.to, - Expression::Between(_) => Type::Boolean, - // expression_type only returns one type but Case is polymorphic so this is actually - // inaccurate. We tolerate this for now. - Expression::Case(case) => case.else_branch.as_ref().map_or_else( - || { - case.when_branch - .first() - .map_or(Type::Null, |wb| expression_type(&wb.then)) - }, - |else_expr| expression_type(else_expr), - ), - Expression::Function(func) => match func.function { - // Aggregation functions - FunctionName::Sum - | FunctionName::Avg - | FunctionName::Min - | FunctionName::Max - | FunctionName::StddevPop - | FunctionName::StddevSamp => Type::Double, - FunctionName::Count => Type::Int64, - FunctionName::AddToSet | FunctionName::AddToArray => Type::Array, - FunctionName::MergeDocuments => Type::Document, - FunctionName::First | FunctionName::Last => Type::String, // Depends on the argument type - - // String functions - FunctionName::Substring => Type::String, - FunctionName::Lower | FunctionName::Upper => Type::String, - FunctionName::LTrim | FunctionName::RTrim => Type::String, - FunctionName::Replace => Type::String, - - // Date functions - FunctionName::DateAdd | FunctionName::DateTrunc => Type::Date, - FunctionName::CurrentTimestamp => Type::Date, - FunctionName::DateDiff => Type::Int64, - FunctionName::Year | FunctionName::Month | FunctionName::Week => Type::Int32, - FunctionName::DayOfWeek | FunctionName::DayOfMonth | FunctionName::DayOfYear => { - Type::Int32 - } - FunctionName::Hour - | FunctionName::Minute - | FunctionName::Second - | FunctionName::Millisecond => Type::Int32, - - // Numeric functions - FunctionName::Abs - | FunctionName::Ceil - | FunctionName::Floor - | FunctionName::Round => Type::Double, - FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => Type::Double, - FunctionName::Pow => Type::Double, - FunctionName::Mod => Type::Int32, - FunctionName::BitLength | FunctionName::OctetLength | FunctionName::CharLength => { - Type::Int32 - } - FunctionName::Degrees - | FunctionName::Radians - | FunctionName::Cos - | FunctionName::Sin - | FunctionName::Tan => Type::Double, - FunctionName::Position => Type::Int32, - - // Other functions - FunctionName::Coalesce => Type::Null, // Depends on arguments - FunctionName::NullIf => Type::Null, // Depends on arguments - FunctionName::Size => Type::Int32, - FunctionName::Slice => Type::Array, - FunctionName::Split => Type::String, - }, - Expression::Array(_) => Type::Array, - Expression::Document(_) => Type::Document, - Expression::Access(access) => { - let parent_type = expression_type(&access.expr); - if parent_type == Type::Document { - Type::String // Field access from a document, assuming String for simplicity - } else if parent_type == Type::Array { - Type::Int32 // Array access assumes numeric index - } else { - Type::String // Default case - } + None + }, + }) } - Expression::Subquery(_) => Type::Array, - Expression::Exists(_) => Type::Boolean, - Expression::SubqueryComparison(_) => Type::Boolean, - Expression::Subpath(subpath) => field_type(&subpath.subpath), - Expression::Is(_) => Type::Boolean, - Expression::Like(_) => Type::Boolean, - Expression::StringConstructor(_) => Type::String, - Expression::Tuple(_) => Type::Array, - Expression::TypeAssertion(type_assertion) => type_assertion.target_type, - Expression::Trim(_) => Type::String, - Expression::DateFunction(_) => Type::Date, - Expression::Extract(_) => Type::Int32, - } - } - - fn are_types_compatible(type1: Type, type2: Type) -> bool { - if type1 == type2 { - return true; } - - let is_type1_numeric = matches!( - type1, - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 - ); - let is_type2_numeric = matches!( - type2, - Type::Int32 | Type::Int64 | Type::Double | Type::Decimal128 - ); - - if is_type1_numeric && is_type2_numeric { - return true; - } - - false - } - - struct SemanticVisitor { - target_type: Option, - select_fields: Vec, } - impl SemanticVisitor { - fn visit_select_query(&mut self, node: SelectQuery) -> SelectQuery { - self.select_fields.clear(); - - let select_clause = self.visit_select_clause(node.select_clause); - - match &select_clause.body { - SelectBody::Standard(exprs) => { - for expr in exprs { - match expr { - SelectExpression::Expression(OptionallyAliasedExpr::Aliased( - aliased, - )) => { - self.select_fields.push(aliased.alias.clone()); - } - SelectExpression::Expression(OptionallyAliasedExpr::Unaliased( - Expression::Identifier(ident), - )) => { - self.select_fields.push(ident.clone()); - } - _ => {} - } - } - } - SelectBody::Values(values) => { - for value in values { - if let SelectValuesExpression::Expression(Expression::Document(doc_pairs)) = - &value - { - for pair in doc_pairs { - self.select_fields.push(pair.key.clone()); - } - } - } - } - } + fn generate_arbitrary_semantically_valid_query() -> Query { + let (select_clause, select_fields) = generate_arbitrary_semantically_valid_select_clause(); - let from_clause = Some(Datasource::Collection(CollectionSource { - database: Some(TEST_DB.to_string()), - collection: ALL_TYPES_COLLECTION.to_string(), - alias: None, - })); + let from_clause = Some(Datasource::Collection(CollectionSource { + database: Some(TEST_DB.to_string()), + collection: ALL_TYPES_COLLECTION.to_string(), + alias: None, + })); - let old_target_type = self.target_type; - self.target_type = Some(Type::Boolean); - let where_clause = node.where_clause.map(|wc| self.visit_expression(wc)); - self.target_type = old_target_type; + let where_clause = weighted_arbitrary_optional(make_boolean_expression); - let group_by_clause = node.group_by_clause.map(|gbc| { - let keys = gbc - .keys - .into_iter() - .map(|key| match key { - OptionallyAliasedExpr::Unaliased(_) => { - let field_name = if let Some(target_type) = self.target_type { - get_field_for_type(target_type) - } else { - INT_FIELD.to_string() - }; - OptionallyAliasedExpr::Unaliased(Expression::Identifier(field_name)) - } - OptionallyAliasedExpr::Aliased(aliased) => { - let field_name = if let Some(target_type) = self.target_type { - get_field_for_type(target_type) - } else { - INT_FIELD.to_string() - }; - OptionallyAliasedExpr::Aliased(AliasedExpr { - expr: Expression::Identifier(field_name), - alias: aliased.alias, - }) - } - }) - .collect(); + // let group_by_clause = + // weighted_arbitrary_optional(generate_arbitrary_semantically_valid_group_by_clause); - GroupByClause { - keys, - aggregations: Vec::new(), - } - }); + let having_clause = weighted_arbitrary_optional(make_boolean_expression); - let old_target_type = self.target_type; - self.target_type = Some(Type::Boolean); - let having_clause = node.having_clause.map(|hc| self.visit_expression(hc)); - self.target_type = old_target_type; + let order_by_clause = weighted_arbitrary_optional(|| { + generate_arbitrary_semantically_valid_order_by_clause(select_fields.clone()) + }); - let order_by_clause = node.order_by_clause.map(|obc| obc.walk(self)); + let limit = arbitrary_optional(&mut Gen::new(0), |g| 1 + u32::arbitrary(g) % 10); + let offset = arbitrary_optional(&mut Gen::new(0), |g| 1 + u32::arbitrary(g) % 10); - let limit = node.limit.map(|_| 10); - let offset = node.offset.map(|_| 0); + Query::Select(SelectQuery { + select_clause, + from_clause, + where_clause, + group_by_clause: None, + having_clause, + order_by_clause, + limit, + offset, + }) + } - SelectQuery { - select_clause, - from_clause, - where_clause, - group_by_clause, - having_clause, - order_by_clause, - limit, - offset, - } - } + fn generate_arbitrary_semantically_valid_select_clause() -> (SelectClause, Vec) { + let set_quantifier = SetQuantifier::arbitrary(&mut Gen::new(0)); - fn determine_child_target_type(&self, node: &Expression) -> Option { - match node { - Expression::Binary(binary) => match binary.op { - BinaryOp::Add | BinaryOp::Sub | BinaryOp::Mul | BinaryOp::Div => { - Some(Type::Double) - } - BinaryOp::And | BinaryOp::Or => Some(Type::Boolean), - BinaryOp::Comparison(_) => None, - BinaryOp::In | BinaryOp::NotIn => None, - BinaryOp::Concat => Some(Type::String), - }, - Expression::Unary(unary) => match unary.op { - UnaryOp::Not => Some(Type::Boolean), - UnaryOp::Neg | UnaryOp::Pos => Some(Type::Double), + // 20% of the time, return a simple SELECT * + if usize::arbitrary(&mut Gen::new(0)) % 10 < 2 { + return ( + SelectClause { + set_quantifier, + body: SelectBody::Standard(vec![SelectExpression::Star]), }, - Expression::Function(func) => { - match func.function { - // Aggregation functions - FunctionName::Sum - | FunctionName::Avg - | FunctionName::Min - | FunctionName::Max => Some(Type::Double), - FunctionName::Count => None, // Count can take any type - FunctionName::AddToSet | FunctionName::AddToArray => None, // Can add any type to arrays - - // String functions - FunctionName::Substring | FunctionName::Lower | FunctionName::Upper => { - Some(Type::String) - } - FunctionName::LTrim | FunctionName::RTrim => Some(Type::String), - FunctionName::Replace => Some(Type::String), + vec![], + ); + } - // Date functions - FunctionName::DateAdd - | FunctionName::DateDiff - | FunctionName::DateTrunc => Some(Type::Date), - FunctionName::CurrentTimestamp => Some(Type::Date), + let num_exprs = 1 + usize::arbitrary(&mut Gen::new(0)) % 10; - // Numeric functions - FunctionName::Abs - | FunctionName::Ceil - | FunctionName::Floor - | FunctionName::Round => Some(Type::Double), - FunctionName::Log | FunctionName::Log10 | FunctionName::Sqrt => { - Some(Type::Double) - } - FunctionName::Pow => Some(Type::Double), + let mut select_exprs = vec![]; + let mut select_fields = vec![]; - // Other functions - FunctionName::Coalesce | FunctionName::NullIf => None, - FunctionName::Size => None, + for _ in 0..num_exprs { + let expr = match usize::arbitrary(&mut Gen::new(0)) % 6 { + 0 => make_numeric_expression(), + 1 => make_boolean_expression(), + 2 => make_string_expression(), + 3 => make_array_expression(), + 4 => make_date_expression(), + _ => make_object_expression(), + }; - _ => None, // Default for other functions - } + let optionally_aliased_expr = if bool::arbitrary(&mut Gen::new(0)) { + if let Expression::Identifier(i) = expr.clone() { + select_fields.push(i); } - Expression::Case(_case) => Some(Type::Boolean), - Expression::Between(_) => None, - Expression::Is(_) | Expression::Like(_) | Expression::Exists(_) => None, - Expression::Array(_) => None, - Expression::Document(_) => None, - Expression::Access(_) => None, - Expression::Subquery(_) => None, - Expression::SubqueryComparison(_) => None, - Expression::Subpath(_) => None, - Expression::StringConstructor(_) => None, - Expression::TypeAssertion(_) => None, - Expression::Trim(_) => None, - Expression::DateFunction(_) => None, - Expression::Extract(_) => None, - Expression::Identifier(_) => None, - Expression::Literal(_) => None, - Expression::Tuple(_) => None, - Expression::Cast(_) => None, - } - } - } + OptionallyAliasedExpr::Unaliased(expr) + } else { + let alias = arbitrary_identifier(&mut Gen::new(0)); + select_fields.push(alias.clone()); + OptionallyAliasedExpr::Aliased(AliasedExpr { expr, alias }) + }; - impl visitor::Visitor for SemanticVisitor { - fn visit_query(&mut self, node: Query) -> Query { - match node { - Query::Select(select_query) => Query::Select(self.visit_select_query(select_query)), - Query::Set(set_query) => { - let old_target_type = self.target_type; - self.target_type = None; // Clear target_type when walking set operations - let walked = Query::Set(set_query.walk(self)); - self.target_type = old_target_type; - walked - } - Query::With(with_query) => { - let old_target_type = self.target_type; - self.target_type = None; // Clear target_type when walking with queries - let walked = Query::With(with_query.walk(self)); - self.target_type = old_target_type; - walked - } - } + select_exprs.push(SelectExpression::Expression(optionally_aliased_expr)); } - fn visit_expression(&mut self, node: Expression) -> Expression { - let mut expr = node.clone(); - self.visit_expression_custom(&mut expr); - expr - } + ( + SelectClause { + set_quantifier, + body: SelectBody::Standard(select_exprs), + }, + select_fields, + ) + } - fn visit_sort_key(&mut self, node: SortKey) -> SortKey { - if self.select_fields.is_empty() { - return SortKey::Simple(Expression::Identifier(INT_FIELD.to_string())); - } + #[allow(dead_code)] + fn generate_arbitrary_semantically_valid_group_by_clause() -> GroupByClause { + // Skipping this for now since GROUP BY changes what fields are available in the SELECT + // clause, which is a bit too complicated for this skunkworks project. + todo!() + } - let idx = usize::arbitrary(&mut Gen::new(0)) % self.select_fields.len(); + fn generate_arbitrary_semantically_valid_order_by_clause( + select_fields: Vec, + ) -> OrderByClause { + let num_sort_specs = ((1 + usize::arbitrary(&mut Gen::new(0)) % select_fields.len()) as f64 + / 2f64) + .ceil() as i32; + + let mut sort_specs = vec![]; + for _ in 0..num_sort_specs { + let idx = usize::arbitrary(&mut Gen::new(0)) % select_fields.len(); + let key = if bool::arbitrary(&mut Gen::new(0)) { + SortKey::Positional(idx as u32) + } else { + SortKey::Simple(Expression::Identifier(select_fields[idx].clone())) + }; + let direction = SortDirection::arbitrary(&mut Gen::new(0)); - match node { - SortKey::Positional(_) => SortKey::Positional(idx as u32 + 1), - SortKey::Simple(_) => { - SortKey::Simple(Expression::Identifier(self.select_fields[idx].clone())) - } - } + sort_specs.push(SortSpec { key, direction }) } - fn visit_select_clause(&mut self, node: SelectClause) -> SelectClause { - let body = match node.body { - SelectBody::Standard(exprs) => { - let mut has_substar = false; - let mut new_exprs = Vec::new(); - let mut non_star_exprs = Vec::new(); - - for expr in exprs { - match expr { - SelectExpression::Substar(_) => { - if !has_substar { - has_substar = true; - new_exprs.push(SelectExpression::Substar(SubstarExpr { - datasource: ALL_TYPES_COLLECTION.to_string(), - })); - } - } - SelectExpression::Star => {} - SelectExpression::Expression(expr) => { - if let OptionallyAliasedExpr::Unaliased(e) = expr { - let processed_expr = self.visit_expression(e); - non_star_exprs.push(SelectExpression::Expression( - OptionallyAliasedExpr::Unaliased(processed_expr), - )); - } else if let OptionallyAliasedExpr::Aliased(aliased) = expr { - let processed_expr = self.visit_expression(aliased.expr); - non_star_exprs.push(SelectExpression::Expression( - OptionallyAliasedExpr::Aliased(AliasedExpr { - expr: processed_expr, - alias: aliased.alias.clone(), - }), - )); - } - } - } - } - - if (usize::arbitrary(&mut Gen::new(0)) % 10) < 2 { - SelectBody::Standard(vec![SelectExpression::Star]) - } else if has_substar { - new_exprs.extend(non_star_exprs); - SelectBody::Standard(new_exprs) - } else { - SelectBody::Standard(non_star_exprs) - } - } - SelectBody::Values(values) => { - let mut has_substar = false; - let mut new_values = Vec::new(); - let mut doc_exprs = Vec::new(); - - for value in values { - match value { - SelectValuesExpression::Substar(_) => { - if !has_substar { - has_substar = true; - new_values.push(SelectValuesExpression::Substar(SubstarExpr { - datasource: ALL_TYPES_COLLECTION.to_string(), - })); - } - } - SelectValuesExpression::Expression(expr) => { - let key = arbitrary_identifier(&mut quickcheck::Gen::new(0)); - doc_exprs.push(DocumentPair { - key, - value: self.visit_expression(expr), - }); - } - } - } - - if !doc_exprs.is_empty() { - new_values.push(SelectValuesExpression::Expression(Expression::Document( - doc_exprs, - ))); - } - - SelectBody::Values(new_values) - } - }; + OrderByClause { sort_specs } + } - SelectClause { - set_quantifier: node.set_quantifier, - body, - } + /// Return an arbitrary Option, using the provided Fn to + /// construct the value if the chosen variant is Some. This + /// function is weighted to return Some 3 out of 4 times. + pub fn weighted_arbitrary_optional(f: F) -> Option + where + F: Fn() -> T, + { + match usize::arbitrary(&mut Gen::new(0)) % 4 { + 0 => None, + _ => Some(f()), } } - impl SemanticVisitor { - fn visit_expression_custom(&mut self, node: &mut Expression) { - match node { - Expression::Tuple(_) => { - if let Some(target_type) = self.target_type { - *node = replace_invalid_expression(target_type); - } else { - *node = make_numeric_expression(); - } - return; - } - Expression::Binary(bin) if matches!(bin.op, BinaryOp::In | BinaryOp::NotIn) => { - if let Some(target_type) = self.target_type { - *node = replace_invalid_expression(target_type); - } else { - *node = make_boolean_expression(); - } - return; - } - Expression::Subpath(_) => { - let idx = usize::arbitrary(&mut Gen::new(0)) % VALID_SUBPATHS.len(); - *node = Expression::Subpath(VALID_SUBPATHS[idx].clone()); - return; - } - Expression::Identifier(ident) => { - if let Some(target_type) = self.target_type { - *ident = get_field_for_type(target_type); - } - return; - } - // Handle aggregate functions - Expression::Function(func) => { - // Check if this is an aggregate function - let is_aggregate = matches!( - func.function, - FunctionName::AddToArray - | FunctionName::AddToSet - | FunctionName::Avg - | FunctionName::Count - | FunctionName::First - | FunctionName::Last - | FunctionName::Max - | FunctionName::MergeDocuments - | FunctionName::Min - | FunctionName::StddevPop - | FunctionName::StddevSamp - | FunctionName::Sum - ); - - if is_aggregate { - // Determine appropriate field type for the function - let field_type = match func.function { - FunctionName::Sum - | FunctionName::Avg - | FunctionName::Min - | FunctionName::Max => Type::Double, - FunctionName::Count => Type::Int32, - FunctionName::AddToArray | FunctionName::AddToSet => Type::Array, - FunctionName::First - | FunctionName::Last - | FunctionName::MergeDocuments => Type::String, - FunctionName::StddevPop | FunctionName::StddevSamp => Type::Double, - _ => Type::Int32, - }; - - func.args = FunctionArguments::Args(vec![Expression::Identifier( - get_field_for_type(field_type), - )]); - return; - } - } - _ => {} - } - - if let Some(target_type) = self.target_type { - let node_type = expression_type(node); - - if node_type != target_type && !are_types_compatible(node_type, target_type) { - *node = replace_invalid_expression(target_type); - return; - } - } - - let child_target_type = self.determine_child_target_type(node); - let old_target_type = self.target_type; - self.target_type = child_target_type; - - match node { - Expression::Binary(bin) => { - self.visit_expression_custom(&mut bin.left); - self.visit_expression_custom(&mut bin.right); - } - Expression::Unary(un) => { - self.visit_expression_custom(&mut un.expr); - } - Expression::Function(func) => { - if let FunctionArguments::Args(args) = &mut func.args { - for arg in args { - self.visit_expression_custom(arg); - } - } - } - Expression::Case(case) => { - for branch in &mut case.when_branch { - self.visit_expression_custom(&mut branch.when); - self.visit_expression_custom(&mut branch.then); - } - if let Some(else_branch) = &mut case.else_branch { - self.visit_expression_custom(else_branch); - } - } - Expression::Array(array) => { - for elem in array { - self.visit_expression_custom(elem); - } - } - Expression::Document(doc) => { - for pair in doc { - self.visit_expression_custom(&mut pair.value); - } - } - Expression::Access(access) => { - self.visit_expression_custom(&mut access.expr); - } - Expression::Subquery(_) => {} - Expression::Exists(_) => {} - Expression::SubqueryComparison(_) => {} - Expression::Subpath(subpath) => { - self.visit_expression_custom(&mut subpath.expr); - } - Expression::Is(is_expr) => { - self.visit_expression_custom(&mut is_expr.expr); - } - Expression::Like(like) => { - self.visit_expression_custom(&mut like.expr); - self.visit_expression_custom(&mut like.pattern); - } - Expression::StringConstructor(_) => {} - Expression::TypeAssertion(type_assertion) => { - self.visit_expression_custom(&mut type_assertion.expr); - } - Expression::Between(between) => { - self.visit_expression_custom(&mut between.arg); - self.visit_expression_custom(&mut between.min); - self.visit_expression_custom(&mut between.max); - } - Expression::Trim(trim) => { - self.visit_expression_custom(&mut trim.arg); - } - Expression::DateFunction(_) => {} - Expression::Extract(extract) => { - self.visit_expression_custom(&mut extract.arg); - } - Expression::Identifier(_) | Expression::Literal(_) => {} - Expression::Cast(cast) => { - self.visit_expression_custom(&mut cast.expr); - } - Expression::Tuple(_) => {} - } + #[derive(PartialEq, Debug, Clone)] + struct SemanticallyValidQuery { + query: Query, + } - self.target_type = old_target_type; + impl Arbitrary for SemanticallyValidQuery { + fn arbitrary(_g: &mut Gen) -> Self { + let query = generate_arbitrary_semantically_valid_query(); + SemanticallyValidQuery { query } } } #[test] fn prop_semantic_queries_translate() { - fn property(mut query: Query) -> TestResult { - // Replace With queries for now - if matches!(query, Query::With(_)) { - query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); - } - - let mut v = SemanticVisitor { - target_type: None, - select_fields: Vec::new(), - }; - query = v.visit_query(query); - - let sql = match query.pretty_print() { + fn property(query: SemanticallyValidQuery) -> TestResult { + let sql = match query.query.pretty_print() { Err(_) => return TestResult::discard(), Ok(sql) => sql, }; @@ -1002,35 +613,7 @@ mod tests { quickcheck::QuickCheck::new() .gen(Gen::new(0)) - .quickcheck(property as fn(Query) -> TestResult); - } - - lazy_static! { - static ref VALID_SUBPATHS: Vec = vec![ - SubpathExpr { - expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: NESTED_INT.to_string(), - }, - SubpathExpr { - expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: NESTED_STRING.to_string(), - }, - SubpathExpr { - expr: Box::new(Expression::Subpath(SubpathExpr { - expr: Box::new(Expression::Identifier(NESTED_OBJECT_FIELD.to_string())), - subpath: NESTED_OBJECT.to_string(), - })), - subpath: DEEPLY_NESTED.to_string(), - }, - SubpathExpr { - expr: Box::new(Expression::Identifier(OBJECT_FIELD.to_string())), - subpath: NESTED_FIELD.to_string(), - }, - ]; - static ref MONGODB_URI: String = format!( - "mongodb://localhost:{}", - std::env::var("MDB_TEST_LOCAL_PORT").unwrap_or_else(|_| "27017".to_string()) - ); + .quickcheck(property as fn(SemanticallyValidQuery) -> TestResult); } fn get_mongodb_client() -> Option { @@ -1039,24 +622,13 @@ mod tests { #[test] fn prop_aggregation_pipelines_run() { - fn property(mut query: Query) -> TestResult { - // Replace With queries for now - if matches!(query, Query::With(_)) { - query = Query::Select(SelectQuery::arbitrary(&mut Gen::new(0))); - } - - let mut v = SemanticVisitor { - target_type: None, - select_fields: Vec::new(), - }; - query = v.visit_query(query); - + fn property(query: SemanticallyValidQuery) -> TestResult { let client = match get_mongodb_client() { Some(client) => client, None => return TestResult::discard(), // Skip if no MongoDB connection }; - let sql = match query.pretty_print() { + let sql = match query.query.pretty_print() { Err(_) => return TestResult::discard(), Ok(sql) => sql, }; @@ -1103,7 +675,7 @@ mod tests { quickcheck::QuickCheck::new() .gen(Gen::new(0)) - .quickcheck(property as fn(Query) -> TestResult); + .quickcheck(property as fn(SemanticallyValidQuery) -> TestResult); } lazy_static! { From aa29b51e35db38552f88b48ea3a336e5555839fe Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 16 May 2025 20:44:17 +0000 Subject: [PATCH 36/38] Add explanatory comments to functions in semantic_fuzz_test.rs Co-Authored-By: matthew.chiaravalloti@mongodb.com --- mongosql/src/ast/semantic_fuzz_test.rs | 190 ++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 1 deletion(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 98b06dcb9..704d76f3e 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -73,7 +73,6 @@ mod tests { ); } - // Generate a numeric expression (Int32, Int64, Double, Decimal128) fn make_numeric_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 17 { 0 => Expression::Identifier(INT_FIELD.to_string()), @@ -386,6 +385,7 @@ mod tests { } } + /// - Nested documents with metadata containing nested fields fn make_object_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), @@ -450,6 +450,7 @@ mod tests { } } + /// - A FROM clause targeting the all_types collection fn generate_arbitrary_semantically_valid_query() -> Query { let (select_clause, select_fields) = generate_arbitrary_semantically_valid_select_clause(); @@ -485,6 +486,7 @@ mod tests { }) } + /// Returns both the SELECT clause and a vector of field names that can be referenced fn generate_arbitrary_semantically_valid_select_clause() -> (SelectClause, Vec) { let set_quantifier = SetQuantifier::arbitrary(&mut Gen::new(0)); @@ -586,14 +588,106 @@ mod tests { } impl Arbitrary for SemanticallyValidQuery { + /// Implements the Arbitrary trait for SemanticallyValidQuery. + /// + /// This function allows QuickCheck to generate arbitrary semantically valid + /// queries for property testing. It delegates to generate_arbitrary_semantically_valid_query + /// to create queries that are guaranteed to be both syntactically and semantically valid. + /// + /// @param _g - QuickCheck generator (unused as we use a fixed seed) + /// @return A SemanticallyValidQuery instance containing a valid query + /// This function allows QuickCheck to generate arbitrary semantically valid + /// queries for property testing. It delegates to generate_arbitrary_semantically_valid_query + /// to create queries that are guaranteed to be both syntactically and semantically valid. + /// + /// @param _g - QuickCheck generator (unused as we use a fixed seed) + /// @return A SemanticallyValidQuery instance containing a valid query fn arbitrary(_g: &mut Gen) -> Self { let query = generate_arbitrary_semantically_valid_query(); SemanticallyValidQuery { query } } } + /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated back to MongoDB pipelines + /// + /// This test ensures the first property from the requirements: semantically valid + /// queries "compile" via the translate_sql function without errors. + /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated to MongoDB pipelines + /// 3. The resulting pipelines can be executed against a MongoDB instance without errors + /// + /// This test ensures the second property from the requirements: the aggregation + /// pipelines from the translations run against mongod without error. + /// + /// The test is skipped if MongoDB is unavailable or if translation fails. + /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated back to MongoDB pipelines + /// + /// This test ensures the first property from the requirements: semantically valid + /// queries "compile" via the translate_sql function without errors. + /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated to MongoDB pipelines + /// 3. The resulting pipelines can be executed against a MongoDB instance without errors + /// + /// This test ensures the second property from the requirements: the aggregation + /// pipelines from the translations run against mongod without error. + /// + /// The test is skipped if MongoDB is unavailable or if translation fails. #[test] fn prop_semantic_queries_translate() { + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Pretty-prints the query to SQL + /// 2. Translates the SQL to a MongoDB pipeline + /// 3. Verifies the translation succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if translation succeeds, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Connects to MongoDB (skips test if unavailable) + /// 2. Pretty-prints the query to SQL + /// 3. Translates the SQL to a MongoDB pipeline + /// 4. Executes the pipeline against MongoDB + /// 5. Verifies execution succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if pipeline executes without error, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Pretty-prints the query to SQL + /// 2. Translates the SQL to a MongoDB pipeline + /// 3. Verifies the translation succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if translation succeeds, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Connects to MongoDB (skips test if unavailable) + /// 2. Pretty-prints the query to SQL + /// 3. Translates the SQL to a MongoDB pipeline + /// 4. Executes the pipeline against MongoDB + /// 5. Verifies execution succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if pipeline executes without error, discarded otherwise fn property(query: SemanticallyValidQuery) -> TestResult { let sql = match query.query.pretty_print() { Err(_) => return TestResult::discard(), @@ -616,12 +710,106 @@ mod tests { .quickcheck(property as fn(SemanticallyValidQuery) -> TestResult); } + /// Creates a MongoDB client connection using the configured URI. + /// + /// This helper function attempts to establish a connection to a MongoDB instance + /// using the MONGODB_URI defined in the lazy_static block. It returns None if + /// the connection cannot be established, allowing tests to gracefully skip + /// when MongoDB is unavailable. + /// + /// @return Option - MongoDB client if connection succeeds, None otherwise + /// Creates a MongoDB client connection using the configured URI. + /// + /// This helper function attempts to establish a connection to a MongoDB instance + /// using the MONGODB_URI defined in the lazy_static block. It returns None if + /// the connection cannot be established, allowing tests to gracefully skip + /// when MongoDB is unavailable. + /// + /// @return Option - MongoDB client if connection succeeds, None otherwise fn get_mongodb_client() -> Option { Client::with_uri_str(&*MONGODB_URI).ok() } + /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated back to MongoDB pipelines + /// + /// This test ensures the first property from the requirements: semantically valid + /// queries "compile" via the translate_sql function without errors. + /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated to MongoDB pipelines + /// 3. The resulting pipelines can be executed against a MongoDB instance without errors + /// + /// This test ensures the second property from the requirements: the aggregation + /// pipelines from the translations run against mongod without error. + /// + /// The test is skipped if MongoDB is unavailable or if translation fails. + /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated back to MongoDB pipelines + /// + /// This test ensures the first property from the requirements: semantically valid + /// queries "compile" via the translate_sql function without errors. + /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. + /// + /// This QuickCheck property test verifies that: + /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings + /// 2. The SQL strings can be successfully translated to MongoDB pipelines + /// 3. The resulting pipelines can be executed against a MongoDB instance without errors + /// + /// This test ensures the second property from the requirements: the aggregation + /// pipelines from the translations run against mongod without error. + /// + /// The test is skipped if MongoDB is unavailable or if translation fails. #[test] fn prop_aggregation_pipelines_run() { + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Pretty-prints the query to SQL + /// 2. Translates the SQL to a MongoDB pipeline + /// 3. Verifies the translation succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if translation succeeds, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Connects to MongoDB (skips test if unavailable) + /// 2. Pretty-prints the query to SQL + /// 3. Translates the SQL to a MongoDB pipeline + /// 4. Executes the pipeline against MongoDB + /// 5. Verifies execution succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if pipeline executes without error, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Pretty-prints the query to SQL + /// 2. Translates the SQL to a MongoDB pipeline + /// 3. Verifies the translation succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if translation succeeds, discarded otherwise + /// Inner property function for QuickCheck testing. + /// + /// This function: + /// 1. Connects to MongoDB (skips test if unavailable) + /// 2. Pretty-prints the query to SQL + /// 3. Translates the SQL to a MongoDB pipeline + /// 4. Executes the pipeline against MongoDB + /// 5. Verifies execution succeeds + /// + /// @param query - A semantically valid query to test + /// @return TestResult - Success if pipeline executes without error, discarded otherwise fn property(query: SemanticallyValidQuery) -> TestResult { let client = match get_mongodb_client() { Some(client) => client, From 7baa5ebfc898c91b7f57e3c635235e8563ac0d78 Mon Sep 17 00:00:00 2001 From: Matthew Chiaravalloti Date: Fri, 16 May 2025 16:57:45 -0400 Subject: [PATCH 37/38] Fix Devin's bad comments --- mongosql/src/ast/semantic_fuzz_test.rs | 226 ++++--------------------- 1 file changed, 37 insertions(+), 189 deletions(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 704d76f3e..707abf932 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -73,6 +73,7 @@ mod tests { ); } + /// Create an arbitrary numeric expression, guaranteed to be internally semantically valid. fn make_numeric_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 17 { 0 => Expression::Identifier(INT_FIELD.to_string()), @@ -214,6 +215,7 @@ mod tests { } } + /// Create an arbitrary boolean expression, guaranteed to be internally semantically valid. fn make_boolean_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 10 { 0..=4 => Expression::Identifier(BOOL_FIELD.to_string()), @@ -243,6 +245,7 @@ mod tests { } } + /// Create an arbitrary comparison expression, guaranteed to be internally semantically valid. fn make_comparison_expression() -> Expression { let (left, right) = match usize::arbitrary(&mut Gen::new(0)) % 3 { 0 => (make_numeric_expression(), make_numeric_expression()), @@ -266,6 +269,7 @@ mod tests { }) } + /// Create an arbitrary string expression, guaranteed to be internally semantically valid. fn make_string_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 8 { 0 => Expression::Identifier(STRING_FIELD.to_string()), @@ -311,6 +315,7 @@ mod tests { } } + /// Create an arbitrary array expression, guaranteed to be internally semantically valid. fn make_array_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 6 { 0 => Expression::Identifier(ARRAY_FIELD.to_string()), @@ -360,6 +365,7 @@ mod tests { } } + /// Create an arbitrary date expression, guaranteed to be internally semantically valid. fn make_date_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0..=2 => Expression::Identifier(DATE_FIELD.to_string()), @@ -385,7 +391,7 @@ mod tests { } } - /// - Nested documents with metadata containing nested fields + /// Create an arbitrary object expression, guaranteed to be internally semantically valid. fn make_object_expression() -> Expression { match usize::arbitrary(&mut Gen::new(0)) % 5 { 0 => Expression::Identifier(OBJECT_FIELD.to_string()), @@ -450,7 +456,11 @@ mod tests { } } - /// - A FROM clause targeting the all_types collection + /// Generate a semantically valid query. For now, we always create standard-SELECT style queries + /// that select from the ALL_TYPES_COLLECTION datasource. We never produce Set queries or With + /// queries, and we never generate subquery expressions of any kind. This implementation is just + /// an early draft of how we could produce semantically valid queries by relying just on AST + /// primitives. fn generate_arbitrary_semantically_valid_query() -> Query { let (select_clause, select_fields) = generate_arbitrary_semantically_valid_select_clause(); @@ -486,7 +496,10 @@ mod tests { }) } - /// Returns both the SELECT clause and a vector of field names that can be referenced + /// Generates a SELECT clause and a vector of field names that can be referenced from that + /// SELECT clause. 20% of the time, this returns SELECT *; otherwise it returns a standard + /// SELECT clause that contains between 1 and 10 arbitrary expressions. If it returns a + /// SELECT *, the vector is empty. fn generate_arbitrary_semantically_valid_select_clause() -> (SelectClause, Vec) { let set_quantifier = SetQuantifier::arbitrary(&mut Gen::new(0)); @@ -546,9 +559,25 @@ mod tests { todo!() } + /// Generates an ORDER BY clause that only references fields (by name or by position) that are + /// valid in the SELECT list. fn generate_arbitrary_semantically_valid_order_by_clause( select_fields: Vec, ) -> OrderByClause { + if select_fields.is_empty() { + let field = match usize::arbitrary(&mut Gen::new(0)) % 3 { + 0 => INT_FIELD, + 1 => STRING_FIELD, + _ => OBJECTID_FIELD, + }; + return OrderByClause { + sort_specs: vec![SortSpec { + key: SortKey::Simple(Expression::Identifier(field.to_string())), + direction: SortDirection::arbitrary(&mut Gen::new(0)), + }], + }; + } + let num_sort_specs = ((1 + usize::arbitrary(&mut Gen::new(0)) % select_fields.len()) as f64 / 2f64) .ceil() as i32; @@ -588,106 +617,15 @@ mod tests { } impl Arbitrary for SemanticallyValidQuery { - /// Implements the Arbitrary trait for SemanticallyValidQuery. - /// - /// This function allows QuickCheck to generate arbitrary semantically valid - /// queries for property testing. It delegates to generate_arbitrary_semantically_valid_query - /// to create queries that are guaranteed to be both syntactically and semantically valid. - /// - /// @param _g - QuickCheck generator (unused as we use a fixed seed) - /// @return A SemanticallyValidQuery instance containing a valid query - /// This function allows QuickCheck to generate arbitrary semantically valid - /// queries for property testing. It delegates to generate_arbitrary_semantically_valid_query - /// to create queries that are guaranteed to be both syntactically and semantically valid. - /// - /// @param _g - QuickCheck generator (unused as we use a fixed seed) - /// @return A SemanticallyValidQuery instance containing a valid query fn arbitrary(_g: &mut Gen) -> Self { let query = generate_arbitrary_semantically_valid_query(); SemanticallyValidQuery { query } } } - /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated back to MongoDB pipelines - /// - /// This test ensures the first property from the requirements: semantically valid - /// queries "compile" via the translate_sql function without errors. - /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated to MongoDB pipelines - /// 3. The resulting pipelines can be executed against a MongoDB instance without errors - /// - /// This test ensures the second property from the requirements: the aggregation - /// pipelines from the translations run against mongod without error. - /// - /// The test is skipped if MongoDB is unavailable or if translation fails. - /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated back to MongoDB pipelines - /// - /// This test ensures the first property from the requirements: semantically valid - /// queries "compile" via the translate_sql function without errors. - /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated to MongoDB pipelines - /// 3. The resulting pipelines can be executed against a MongoDB instance without errors - /// - /// This test ensures the second property from the requirements: the aggregation - /// pipelines from the translations run against mongod without error. - /// - /// The test is skipped if MongoDB is unavailable or if translation fails. + /// Fuzz test that asserts the property that semantically valid MongoSQL queries should compile. #[test] fn prop_semantic_queries_translate() { - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Pretty-prints the query to SQL - /// 2. Translates the SQL to a MongoDB pipeline - /// 3. Verifies the translation succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if translation succeeds, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Connects to MongoDB (skips test if unavailable) - /// 2. Pretty-prints the query to SQL - /// 3. Translates the SQL to a MongoDB pipeline - /// 4. Executes the pipeline against MongoDB - /// 5. Verifies execution succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if pipeline executes without error, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Pretty-prints the query to SQL - /// 2. Translates the SQL to a MongoDB pipeline - /// 3. Verifies the translation succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if translation succeeds, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Connects to MongoDB (skips test if unavailable) - /// 2. Pretty-prints the query to SQL - /// 3. Translates the SQL to a MongoDB pipeline - /// 4. Executes the pipeline against MongoDB - /// 5. Verifies execution succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if pipeline executes without error, discarded otherwise fn property(query: SemanticallyValidQuery) -> TestResult { let sql = match query.query.pretty_print() { Err(_) => return TestResult::discard(), @@ -710,106 +648,16 @@ mod tests { .quickcheck(property as fn(SemanticallyValidQuery) -> TestResult); } - /// Creates a MongoDB client connection using the configured URI. - /// - /// This helper function attempts to establish a connection to a MongoDB instance - /// using the MONGODB_URI defined in the lazy_static block. It returns None if - /// the connection cannot be established, allowing tests to gracefully skip - /// when MongoDB is unavailable. - /// - /// @return Option - MongoDB client if connection succeeds, None otherwise - /// Creates a MongoDB client connection using the configured URI. - /// - /// This helper function attempts to establish a connection to a MongoDB instance - /// using the MONGODB_URI defined in the lazy_static block. It returns None if - /// the connection cannot be established, allowing tests to gracefully skip - /// when MongoDB is unavailable. - /// - /// @return Option - MongoDB client if connection succeeds, None otherwise fn get_mongodb_client() -> Option { Client::with_uri_str(&*MONGODB_URI).ok() } - /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated back to MongoDB pipelines - /// - /// This test ensures the first property from the requirements: semantically valid - /// queries "compile" via the translate_sql function without errors. - /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated to MongoDB pipelines - /// 3. The resulting pipelines can be executed against a MongoDB instance without errors - /// - /// This test ensures the second property from the requirements: the aggregation - /// pipelines from the translations run against mongod without error. - /// - /// The test is skipped if MongoDB is unavailable or if translation fails. - /// Tests that semantically valid queries can be successfully translated to MongoDB pipelines. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated back to MongoDB pipelines - /// - /// This test ensures the first property from the requirements: semantically valid - /// queries "compile" via the translate_sql function without errors. - /// Tests that generated aggregation pipelines can be executed against MongoDB without errors. - /// - /// This QuickCheck property test verifies that: - /// 1. Arbitrary semantically valid queries can be pretty-printed to SQL strings - /// 2. The SQL strings can be successfully translated to MongoDB pipelines - /// 3. The resulting pipelines can be executed against a MongoDB instance without errors - /// - /// This test ensures the second property from the requirements: the aggregation - /// pipelines from the translations run against mongod without error. - /// - /// The test is skipped if MongoDB is unavailable or if translation fails. + /// Fuzz test that asserts the property that semantically valid MongoSQL queries should compile + /// and the translations should run successfully against mongod. We do not assert anything + /// about result set schemas or result sets, just that compilation succeeds and translations + /// execute without causing errors in mongod. #[test] fn prop_aggregation_pipelines_run() { - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Pretty-prints the query to SQL - /// 2. Translates the SQL to a MongoDB pipeline - /// 3. Verifies the translation succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if translation succeeds, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Connects to MongoDB (skips test if unavailable) - /// 2. Pretty-prints the query to SQL - /// 3. Translates the SQL to a MongoDB pipeline - /// 4. Executes the pipeline against MongoDB - /// 5. Verifies execution succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if pipeline executes without error, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Pretty-prints the query to SQL - /// 2. Translates the SQL to a MongoDB pipeline - /// 3. Verifies the translation succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if translation succeeds, discarded otherwise - /// Inner property function for QuickCheck testing. - /// - /// This function: - /// 1. Connects to MongoDB (skips test if unavailable) - /// 2. Pretty-prints the query to SQL - /// 3. Translates the SQL to a MongoDB pipeline - /// 4. Executes the pipeline against MongoDB - /// 5. Verifies execution succeeds - /// - /// @param query - A semantically valid query to test - /// @return TestResult - Success if pipeline executes without error, discarded otherwise fn property(query: SemanticallyValidQuery) -> TestResult { let client = match get_mongodb_client() { Some(client) => client, From 572c3ad431b4d929b05793a6c82827c5e08fc587 Mon Sep 17 00:00:00 2001 From: Matthew Chiaravalloti Date: Fri, 16 May 2025 17:00:48 -0400 Subject: [PATCH 38/38] Fix catalog mistake --- mongosql/src/ast/semantic_fuzz_test.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongosql/src/ast/semantic_fuzz_test.rs b/mongosql/src/ast/semantic_fuzz_test.rs index 707abf932..1d9625adf 100644 --- a/mongosql/src/ast/semantic_fuzz_test.rs +++ b/mongosql/src/ast/semantic_fuzz_test.rs @@ -764,7 +764,7 @@ mod tests { "items": { "bsonType": "string" } }, "mixed_array_field": { - "bsonType": "array" + "bsonType": "array", "items": { "anyOf": [ { "bsonType": "string" },