diff --git a/.gitignore b/.gitignore index e66ed64..1565a80 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ test.sql META.json /vectorize-* site/ -uv.lock \ No newline at end of file +uv.lock +.vscode \ No newline at end of file diff --git a/core/src/init.rs b/core/src/init.rs index 239a242..94525df 100644 --- a/core/src/init.rs +++ b/core/src/init.rs @@ -330,7 +330,7 @@ pub async fn cleanup_job(pool: &PgPool, job_name: &str) -> Result<(), VectorizeE // Delete pending PGMQ messages for this job // We search for messages where the job_name matches let delete_messages_query = - format!("DELETE FROM pgmq.vectorize_jobs WHERE message->>'job_name' = $1"); + "DELETE FROM pgmq.vectorize_jobs WHERE message->>'job_name' = $1".to_string(); match sqlx::query(&delete_messages_query) .bind(job_name) .execute(pool) @@ -353,7 +353,7 @@ pub async fn cleanup_job(pool: &PgPool, job_name: &str) -> Result<(), VectorizeE let mut tx = pool.begin().await?; // Generate cleanup SQL statements - let cleanup_statements = vec![ + let cleanup_statements = [ // Drop triggers first (they depend on the function and table) query::drop_event_trigger(job_name, &job.src_schema, &job.src_table, "INSERT"), query::drop_event_trigger(job_name, &job.src_schema, &job.src_table, "UPDATE"), diff --git a/core/src/query.rs b/core/src/query.rs index 23c0922..1ebbbf9 100644 --- a/core/src/query.rs +++ b/core/src/query.rs @@ -93,7 +93,7 @@ impl<'de> serde::Deserialize<'de> for FilterValue { type Value = FilterValue; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string in format 'operator.value' or just 'value'") + formatter.write_str("a string in format 'operator.value' or just 'value'.") } fn visit_str(self, value: &str) -> Result diff --git a/docs/server/api/search.md b/docs/server/api/search.md index df1e206..9139b1e 100644 --- a/docs/server/api/search.md +++ b/docs/server/api/search.md @@ -1,70 +1,107 @@ -## GET /api/v1/search + +## Hybrid Search Perform a hybrid semantic + full-text search against a previously initialized vectorize job. -URL +### /api/v1/search + +The following query parameters are available on both the GET and POST methods. + +- **GET**: Accepts parameters as URL query parameters. +- **POST**: Accepts parameters as a JSON object in the request body. + +Query parameters: - /api/v1/search +| Parameter | Type | Required | Default | Description | +| ----------- | :----: | :------: | :-------: | ----------------------------------------------------------------------------------------------------------------------------------------------- | +| job_name | string | yes | — | Name of the vectorize job to search. This identifies the table, schema, model and other job configuration. | +| query | string | yes | — | The user's search query string. | +| limit | int | no | 10 | Maximum number of results to return. | +| window_size | int | no | 5 * limit | Internal window size used by the hybrid search algorithm. | +| rrf_k | float | no | 60.0 | Reciprocal Rank Fusion parameter used by the hybrid ranking. | +| semantic_wt | float | no | 1.0 | Weight applied to the semantic score. | +| fts_wt | float | no | 1.0 | Weight applied to the full-text-search score. | +| filters | object | no | — | Additional filters passed as separate query parameters. The server parses values into typed filter values and validates keys/values for safety. | -Method - GET +### Notes on filters -Query parameters +- **GET**: Filters are supplied as individual URL query parameters (e.g., `product_category=outdoor`, `price=lt.10`). +- **POST**: Filters are supplied as a JSON object in the `filters` field (e.g., `{ "product_category": "outdoor", "price": "lt.10"}`). - - job_name (string) - required - - Name of the vectorize job to search. This identifies the table, schema, model and other job configuration. - - query (string) - required - - The user's search query string. - - limit (int) - optional, default: 10 - - Maximum number of results to return. - - window_size (int) - optional, default: 5 * limit - - Internal window size used by the hybrid search algorithm. - - rrf_k (float) - optional, default: 60.0 - - Reciprocal Rank Fusion param used by the hybrid ranking. - - semantic_wt (float) - optional, default: 1.0 - - Weight applied to the semantic score. - - fts_wt (float) - optional, default: 1.0 - - Weight applied to the full-text-search score. - - filters (object) - optional - - Additional filters are accepted as query params and are passed as typed filter values to the query builder. Filters are provided as URL query parameters and will be parsed into a map of keys to values. The server validates keys and raw string values for safety. +The Operator will default to `equal` if one is not provided. + Therefore, `product_category=outdoor` and `product_category=eq.outdoor` are equivalent. -Notes on filters +Supported operators: - Filters are supplied as query parameters and the server will parse them into a BTreeMap of filter keys and typed values. The server validates string inputs to avoid SQL injection; only the job is allowed to specify table/column names on job creation. See the source for details about accepted filter types. +| Operator | Full Name | +|----------|-----------| +| `eq` | Equal | +| `gt` | Greater Than | +| `gte` | Greater Than or Equal | +| `lt` | Less Than | +| `lte` | Less Than or Equal | -Example request +The server parses and validates filter values according to the job's schema and allowed columns. + +### GET /api/v1/search + +Example with multiple `filter` values ```bash curl -G "http://localhost:8080/api/v1/search" \ --data-urlencode "job_name=my_job" \ --data-urlencode "query=camping gear" \ - --data-urlencode "limit=2" + --data-urlencode "limit=2" \ + --data-urlencode "product_category=outdoor" \ + --data-urlencode "price=gt.10" ``` -Example response (200) - -The endpoint returns an array of JSON objects. The exact shape depends on the columns selected by the job (server uses `SELECT *` for results), plus additional ranking fields. Example returned item: - ```json [ { - "product_id": 39, - "product_name": "Hammock", "description": "Sling made of fabric or netting, suspended between two points for relaxation", - "product_category": "outdoor", + "fts_rank": null, "price": 40.0, - "updated_at": "2025-06-25T19:57:22.410561+00:00", - "semantic_rank": 1, - "similarity_score": 0.3192296909597241, - "rrf_score": 0.01639344262295082, - "fts_rank": null + "product_category": "outdoor", + "product_id": 39, + "product_name": "Hammock", + "rrf_score": 0.015873015873015872, + "semantic_rank": 3, + "similarity_score": 0.3863893266436258, + "updated_at": "2025-11-01T16:30:42.501294+00:00" } ] ``` -Errors +## POST /api/v1/search + +Pass parameters as a JSON object in the request body. Example: - - 400 / InvalidRequest - missing or invalid parameters - - 404 / NotFound - job not found - - 500 / InternalServerError - other server-side errors +```bash +curl -X POST "http://localhost:8080/api/v1/search" \ + -H "Content-Type: application/json" \ + -d '{ + "job_name": "my_job", + "query": "camping gear", + "limit": 2, + "filters": {"product_category": "outdoor", "price": "gt.10"} + }' +``` + +```json +[ + { + "description": "Sling made of fabric or netting, suspended between two points for relaxation", + "fts_rank": null, + "price": 40.0, + "product_category": "outdoor", + "product_id": 39, + "product_name": "Hammock", + "rrf_score": 0.015873015873015872, + "semantic_rank": 3, + "similarity_score": 0.3863893266436258, + "updated_at": "2025-11-01T16:30:42.501294+00:00" + } +] +``` diff --git a/server/src/routes/search.rs b/server/src/routes/search.rs index 7b880c6..2d89057 100644 --- a/server/src/routes/search.rs +++ b/server/src/routes/search.rs @@ -30,6 +30,39 @@ pub struct SearchRequest { pub filters: BTreeMap, } +// Same as GET except without flatten for filters +#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, FromRow)] +pub struct SearchRequestPOST { + pub job_name: String, + pub query: String, + #[serde(default = "default_window_size")] + pub window_size: i32, + #[serde(default = "default_limit")] + pub limit: i32, + #[serde(default = "default_rrf_k")] + pub rrf_k: f32, + #[serde(default = "default_semantic_wt")] + pub semantic_wt: f32, + #[serde(default = "default_fts_wt")] + pub fts_wt: f32, + pub filters: BTreeMap, +} + +impl From for SearchRequest { + fn from(request: SearchRequestPOST) -> Self { + SearchRequest { + job_name: request.job_name, + query: request.query, + window_size: request.window_size, + limit: request.limit, + rrf_k: request.rrf_k, + semantic_wt: request.semantic_wt, + fts_wt: request.fts_wt, + filters: request.filters, + } + } +} + fn default_semantic_wt() -> f32 { 1.0 } @@ -79,8 +112,34 @@ pub async fn search( app_state: web::Data, payload: web::Query, ) -> Result { - let payload = payload.into_inner(); + search_internal(app_state, payload.into_inner()).await +} +/// POST /search_json: Accepts a JSON body instead of URL query params for search +#[utoipa::path( + post, + path = "/api/v1", + request_body = SearchRequestPOST, + responses( + ( + status = 200, description = "Search results", + body = Vec, + ), + ), +)] +#[actix_web::post("/search")] +pub async fn search_json( + app_state: web::Data, + payload: web::Json, +) -> Result { + search_internal(app_state, payload.into_inner().into()).await +} + +// Internal function for search logic, used by both GET and POST +async fn search_internal( + app_state: web::Data, + payload: SearchRequest, +) -> Result { // check inputs and filters are valid if they exist and create a SQL string for them query::check_input(&payload.job_name)?; if !payload.filters.is_empty() { @@ -168,7 +227,6 @@ async fn get_vectorize_job( pool: &sqlx::PgPool, job_name: &str, ) -> Result { - // Changed return type match sqlx::query( "SELECT job_name, src_table, src_schema, src_columns, primary_key, update_time_col, model FROM vectorize.job diff --git a/server/src/server.rs b/server/src/server.rs index 192b508..cf2c680 100644 --- a/server/src/server.rs +++ b/server/src/server.rs @@ -7,6 +7,7 @@ pub fn route_config(configuration: &mut web::ServiceConfig) { web::scope("/api/v1") .service(routes::table::table) .service(routes::table::delete_table) - .service(routes::search::search), + .service(routes::search::search) + .service(routes::search::search_json), ); } diff --git a/server/tests/tests.rs b/server/tests/tests.rs index 92f63c4..2e9e9ca 100644 --- a/server/tests/tests.rs +++ b/server/tests/tests.rs @@ -247,6 +247,40 @@ async fn test_search_filters() { i ); } + + // equivalent but using POST + let filter_payload = json!({ + "job_name": job_name, + "query": "electronics", + "filters": { + "price": "gte.25", + "product_category": "eq.electronics" + }, + "limit": 5 + }); + + let resp = client + .post("http://localhost:8080/api/v1/search") + .header("Content-Type", "application/json") + .json(&filter_payload) + .send() + .await + .expect("Failed to send request"); + assert_eq!( + resp.status(), + reqwest::StatusCode::OK, + "Response status: {:?}", + resp.status() + ); + + let post_search_results: Vec = + resp.json().await.expect("Failed to parse search response"); + + assert_eq!(post_search_results.len(), 5); + for result in &post_search_results { + assert_eq!(result["product_category"].as_str().unwrap(), "electronics"); + assert!(result["price"].as_f64().unwrap() >= 25.0); + } } #[tokio::test]