Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions rust/crates/cloudsearch-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,12 +628,35 @@ fn parse_query(value: &Value) -> Result<SearchQuery, ApiError> {
}

fn parse_term_query(value: &Value) -> Result<TermQuery, ApiError> {
use cloudsearch_common::Fuzziness;

let object = value.as_object().ok_or_else(|| {
ApiError(CloudSearchError::InvalidSearchRequest(
"term query must be a JSON object".to_string(),
))
})?;

// Extract optional fuzziness before consuming the object
let fuzziness = object
.get("fuzziness")
.map(|fv| -> Result<Fuzziness, ApiError> {
match fv {
Value::String(s) if s.eq_ignore_ascii_case("auto") => Ok(Fuzziness::Auto),
Value::Number(n) if n.is_u64() => {
let n = usize::try_from(n.as_u64().unwrap()).map_err(|_| {
ApiError(CloudSearchError::InvalidSearchRequest(
"fuzziness value is too large".to_string(),
))
})?;
Ok(Fuzziness::Exact(n))
}
_ => Err(ApiError(CloudSearchError::InvalidSearchRequest(
"fuzziness must be 'auto' or a non-negative integer".to_string(),
))),
}
})
.transpose()?;

if object.contains_key("field") || object.contains_key("value") {
let field = object.get("field").and_then(Value::as_str).ok_or_else(|| {
ApiError(CloudSearchError::InvalidSearchRequest(
Expand All @@ -649,6 +672,7 @@ fn parse_term_query(value: &Value) -> Result<TermQuery, ApiError> {
return Ok(TermQuery {
field: field.to_string(),
value,
fuzziness,
});
}

Expand All @@ -662,6 +686,7 @@ fn parse_term_query(value: &Value) -> Result<TermQuery, ApiError> {
Ok(TermQuery {
field: field.clone(),
value: raw_value.clone(),
fuzziness,
})
}

Expand Down Expand Up @@ -1982,6 +2007,7 @@ mod tests {
query: Some(SearchQuery::Term(TermQuery {
field: "service".to_string(),
value: serde_json::json!("billing"),
fuzziness: None,
})),
..Default::default()
})
Expand Down Expand Up @@ -2027,6 +2053,7 @@ mod tests {
query: Some(SearchQuery::Term(TermQuery {
field: "service".to_string(),
value: serde_json::json!("billing"),
fuzziness: None,
})),
..Default::default()
})
Expand Down Expand Up @@ -2060,6 +2087,7 @@ mod tests {
filter: vec![SearchQuery::Term(TermQuery {
field: "level".to_string(),
value: serde_json::json!("info"),
fuzziness: None,
})],
..Default::default()
})),
Expand Down Expand Up @@ -4083,6 +4111,7 @@ mod tests {
query: Some(SearchQuery::Term(TermQuery {
field: "level".to_string(),
value: serde_json::json!("info"),
fuzziness: None,
})),
aggs: Some(std::collections::BTreeMap::from([
(
Expand Down Expand Up @@ -4447,4 +4476,84 @@ mod tests {
let metrics_str = String::from_utf8(metrics_body.to_vec()).expect("metrics to string");
assert!(metrics_str.contains("cloudsearch_merge_total"));
}

#[test]
fn parse_term_query_with_fuzziness_auto() {
use cloudsearch_common::Fuzziness;
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": "auto"
});
let result = parse_term_query(&json).expect("should parse");
assert_eq!(result.fuzziness, Some(Fuzziness::Auto));
}

#[test]
fn parse_term_query_with_fuzziness_auto_uppercase() {
use cloudsearch_common::Fuzziness;
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": "AUTO"
});
let result = parse_term_query(&json).expect("should parse");
assert_eq!(result.fuzziness, Some(Fuzziness::Auto));
}

#[test]
fn parse_term_query_with_fuzziness_exact_integer() {
use cloudsearch_common::Fuzziness;
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": 2
});
let result = parse_term_query(&json).expect("should parse");
assert_eq!(result.fuzziness, Some(Fuzziness::Exact(2)));
}

#[test]
fn parse_term_query_with_fuzziness_zero() {
use cloudsearch_common::Fuzziness;
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": 0
});
let result = parse_term_query(&json).expect("should parse");
assert_eq!(result.fuzziness, Some(Fuzziness::Exact(0)));
}

#[test]
fn parse_term_query_with_fuzziness_missing() {
let json = serde_json::json!({
"field": "name",
"value": "admin"
});
let result = parse_term_query(&json).expect("should parse");
assert_eq!(result.fuzziness, None);
}

#[test]
fn parse_term_query_with_fuzziness_wrong_type_rejected() {
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": true
});
let result = parse_term_query(&json);
assert!(result.is_err(), "fuzziness: true should be rejected");
}

#[test]
fn parse_term_query_with_fuzziness_unknown_string_rejected() {
let json = serde_json::json!({
"field": "name",
"value": "admin",
"fuzziness": "unknown"
});
let result = parse_term_query(&json);
assert!(result.is_err(), "fuzziness: unknown should be rejected");
}
}
41 changes: 37 additions & 4 deletions rust/crates/cloudsearch-api/src/query_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//! into the existing `SearchQuery` AST.

use cloudsearch_common::{
BoolQuery, CloudSearchError, RangeQuery, SearchQuery, TermQuery, WildcardQuery,
BoolQuery, CloudSearchError, Fuzziness, RangeQuery, SearchQuery, TermQuery, WildcardQuery,
};

/// Parse a query string into a `SearchQuery`.
Expand Down Expand Up @@ -157,6 +157,7 @@ impl<'a> Parser<'a> {
Ok(SearchQuery::Term(TermQuery {
field: "tag".to_string(),
value: serde_json::Value::String(word.to_string()),
fuzziness: None,
}))
}
}
Expand Down Expand Up @@ -245,6 +246,34 @@ impl<'a> Parser<'a> {
}));
}

// Fuzziness suffix: value~auto or value~N
// NOTE: This fires before wildcard detection (* and ?) below. A value like
// "admin~auto*" will be parsed as a fuzziness query with suffix "auto*" (which
// fails validation) rather than a wildcard query. This is unlikely to affect real
// queries but is a known limitation of the current parse order.
if let Some(with_tilde) = value.strip_suffix('~') {
let (base_value, fuzz_suffix) = with_tilde.split_once('~').unwrap_or((with_tilde, ""));
let fuzziness = if fuzz_suffix.is_empty() {
return Err(CloudSearchError::InvalidSearchRequest(
"fuzziness suffix '~' must be followed by 'auto' or a number".to_string(),
));
} else if fuzz_suffix.eq_ignore_ascii_case("auto") {
Some(Fuzziness::Auto)
} else if let Ok(dist) = fuzz_suffix.parse::<usize>() {
Some(Fuzziness::Exact(dist))
} else {
return Err(CloudSearchError::InvalidSearchRequest(format!(
"invalid fuzziness suffix '~{fuzz_suffix}' — use '~auto' or '~N'"
)));
};
let json_value = Self::parse_value(base_value);
return Ok(SearchQuery::Term(TermQuery {
field: field.to_string(),
value: json_value,
fuzziness,
}));
}

// Wildcard detection: contains * or ?
if value.contains('*') || value.contains('?') {
return Ok(SearchQuery::Wildcard(WildcardQuery {
Expand All @@ -258,6 +287,7 @@ impl<'a> Parser<'a> {
Ok(SearchQuery::Term(TermQuery {
field: field.to_string(),
value: json_value,
fuzziness: None,
}))
}

Expand Down Expand Up @@ -597,7 +627,8 @@ mod tests {
result,
SearchQuery::Term(TermQuery {
field: "status".to_string(),
value: serde_json::json!("active")
value: serde_json::json!("active"),
fuzziness: None,
})
);
}
Expand All @@ -609,7 +640,8 @@ mod tests {
result,
SearchQuery::Term(TermQuery {
field: "count".to_string(),
value: serde_json::json!(42)
value: serde_json::json!(42),
fuzziness: None,
})
);
}
Expand Down Expand Up @@ -672,7 +704,8 @@ mod tests {
result,
SearchQuery::Term(TermQuery {
field: "message".to_string(),
value: serde_json::json!("hello world")
value: serde_json::json!("hello world"),
fuzziness: None,
})
);
}
Expand Down
11 changes: 11 additions & 0 deletions rust/crates/cloudsearch-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,20 @@ pub struct PhraseQuery {
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum Fuzziness {
/// Automatically choose edit distance: 0 for 1-2 chars, 1 for 3-5 chars, 2 for 6+ chars.
Auto,
/// Explicit edit distance threshold.
Exact(usize),
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
pub struct TermQuery {
pub field: String,
pub value: serde_json::Value,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub fuzziness: Option<Fuzziness>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
Expand Down
9 changes: 9 additions & 0 deletions rust/crates/cloudsearch-common/tests/round_trip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ fn test_search_query_term() {
round_trip(&SearchQuery::Term(TermQuery {
field: "status".to_string(),
value: serde_json::json!("active"),
fuzziness: None,
}));
}

Expand Down Expand Up @@ -150,15 +151,18 @@ fn test_search_query_bool() {
must: vec![SearchQuery::Term(TermQuery {
field: "status".to_string(),
value: serde_json::json!("active"),
fuzziness: None,
})],
should: vec![SearchQuery::Term(TermQuery {
field: "tag".to_string(),
value: serde_json::json!("featured"),
fuzziness: None,
})],
filter: vec![],
must_not: vec![SearchQuery::Term(TermQuery {
field: "deleted".to_string(),
value: serde_json::json!(true),
fuzziness: None,
})],
}));
}
Expand Down Expand Up @@ -597,6 +601,7 @@ fn test_search_request_all_fields() {
query: Some(SearchQuery::Term(TermQuery {
field: "status".to_string(),
value: serde_json::json!("active"),
fuzziness: None,
})),
from: Some(10),
size: Some(25),
Expand Down Expand Up @@ -626,6 +631,7 @@ fn test_term_query_string_value() {
round_trip(&TermQuery {
field: "name".to_string(),
value: serde_json::json!("alice"),
..Default::default()
});
}

Expand All @@ -634,6 +640,7 @@ fn test_term_query_numeric_value() {
round_trip(&TermQuery {
field: "count".to_string(),
value: serde_json::json!(42),
..Default::default()
});
}

Expand All @@ -642,6 +649,7 @@ fn test_term_query_bool_value() {
round_trip(&TermQuery {
field: "active".to_string(),
value: serde_json::json!(true),
..Default::default()
});
}

Expand Down Expand Up @@ -1065,6 +1073,7 @@ fn test_bool_query_with_should_and_filter() {
should: vec![SearchQuery::Term(TermQuery {
field: "tag".to_string(),
value: serde_json::json!("featured"),
fuzziness: None,
})],
filter: vec![SearchQuery::Range(RangeQuery {
field: "price".to_string(),
Expand Down
Loading