Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@ test.sql
META.json
/vectorize-*
site/
uv.lock
uv.lock
.vscode
4 changes: 2 additions & 2 deletions core/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ pub async fn cleanup_job(pool: &PgPool, job_name: &str) -> Result<(), VectorizeE
// Delete pending PGMQ messages for this job
// We search for messages where the job_name matches
let delete_messages_query =
format!("DELETE FROM pgmq.vectorize_jobs WHERE message->>'job_name' = $1");
"DELETE FROM pgmq.vectorize_jobs WHERE message->>'job_name' = $1".to_string();
match sqlx::query(&delete_messages_query)
.bind(job_name)
.execute(pool)
Expand All @@ -353,7 +353,7 @@ pub async fn cleanup_job(pool: &PgPool, job_name: &str) -> Result<(), VectorizeE
let mut tx = pool.begin().await?;

// Generate cleanup SQL statements
let cleanup_statements = vec![
let cleanup_statements = [
// Drop triggers first (they depend on the function and table)
query::drop_event_trigger(job_name, &job.src_schema, &job.src_table, "INSERT"),
query::drop_event_trigger(job_name, &job.src_schema, &job.src_table, "UPDATE"),
Expand Down
2 changes: 1 addition & 1 deletion core/src/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ impl<'de> serde::Deserialize<'de> for FilterValue {
type Value = FilterValue;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string in format 'operator.value' or just 'value'")
formatter.write_str("a string in format 'operator.value' or just 'value'.")
}

fn visit_str<E>(self, value: &str) -> Result<FilterValue, E>
Expand Down
121 changes: 79 additions & 42 deletions docs/server/api/search.md
Original file line number Diff line number Diff line change
@@ -1,70 +1,107 @@
## GET /api/v1/search

## Hybrid Search

Perform a hybrid semantic + full-text search against a previously initialized vectorize job.

URL
### /api/v1/search

The following query parameters are available on both the GET and POST methods.

- **GET**: Accepts parameters as URL query parameters.
- **POST**: Accepts parameters as a JSON object in the request body.

Query parameters:

/api/v1/search
| Parameter | Type | Required | Default | Description |
| ----------- | :----: | :------: | :-------: | ----------------------------------------------------------------------------------------------------------------------------------------------- |
| job_name | string | yes | — | Name of the vectorize job to search. This identifies the table, schema, model and other job configuration. |
| query | string | yes | — | The user's search query string. |
| limit | int | no | 10 | Maximum number of results to return. |
| window_size | int | no | 5 * limit | Internal window size used by the hybrid search algorithm. |
| rrf_k | float | no | 60.0 | Reciprocal Rank Fusion parameter used by the hybrid ranking. |
| semantic_wt | float | no | 1.0 | Weight applied to the semantic score. |
| fts_wt | float | no | 1.0 | Weight applied to the full-text-search score. |
| filters | object | no | — | Additional filters passed as separate query parameters. The server parses values into typed filter values and validates keys/values for safety. |

Method

GET
### Notes on filters

Query parameters
- **GET**: Filters are supplied as individual URL query parameters (e.g., `product_category=outdoor`, `price=lt.10`).
- **POST**: Filters are supplied as a JSON object in the `filters` field (e.g., `{ "product_category": "outdoor", "price": "lt.10"}`).

- job_name (string) - required
- Name of the vectorize job to search. This identifies the table, schema, model and other job configuration.
- query (string) - required
- The user's search query string.
- limit (int) - optional, default: 10
- Maximum number of results to return.
- window_size (int) - optional, default: 5 * limit
- Internal window size used by the hybrid search algorithm.
- rrf_k (float) - optional, default: 60.0
- Reciprocal Rank Fusion param used by the hybrid ranking.
- semantic_wt (float) - optional, default: 1.0
- Weight applied to the semantic score.
- fts_wt (float) - optional, default: 1.0
- Weight applied to the full-text-search score.
- filters (object) - optional
- Additional filters are accepted as query params and are passed as typed filter values to the query builder. Filters are provided as URL query parameters and will be parsed into a map of keys to values. The server validates keys and raw string values for safety.
The Operator will default to `equal` if one is not provided.
Therefore, `product_category=outdoor` and `product_category=eq.outdoor` are equivalent.

Notes on filters
Supported operators:

Filters are supplied as query parameters and the server will parse them into a BTreeMap of filter keys and typed values. The server validates string inputs to avoid SQL injection; only the job is allowed to specify table/column names on job creation. See the source for details about accepted filter types.
| Operator | Full Name |
|----------|-----------|
| `eq` | Equal |
| `gt` | Greater Than |
| `gte` | Greater Than or Equal |
| `lt` | Less Than |
| `lte` | Less Than or Equal |

Example request
The server parses and validates filter values according to the job's schema and allowed columns.

### GET /api/v1/search

Example with multiple `filter` values

```bash
curl -G "http://localhost:8080/api/v1/search" \
--data-urlencode "job_name=my_job" \
--data-urlencode "query=camping gear" \
--data-urlencode "limit=2"
--data-urlencode "limit=2" \
--data-urlencode "product_category=outdoor" \
--data-urlencode "price=gt.10"
```

Example response (200)

The endpoint returns an array of JSON objects. The exact shape depends on the columns selected by the job (server uses `SELECT *` for results), plus additional ranking fields. Example returned item:

```json
[
{
"product_id": 39,
"product_name": "Hammock",
"description": "Sling made of fabric or netting, suspended between two points for relaxation",
"product_category": "outdoor",
"fts_rank": null,
"price": 40.0,
"updated_at": "2025-06-25T19:57:22.410561+00:00",
"semantic_rank": 1,
"similarity_score": 0.3192296909597241,
"rrf_score": 0.01639344262295082,
"fts_rank": null
"product_category": "outdoor",
"product_id": 39,
"product_name": "Hammock",
"rrf_score": 0.015873015873015872,
"semantic_rank": 3,
"similarity_score": 0.3863893266436258,
"updated_at": "2025-11-01T16:30:42.501294+00:00"
}
]
```

Errors
## POST /api/v1/search

Pass parameters as a JSON object in the request body. Example:

- 400 / InvalidRequest - missing or invalid parameters
- 404 / NotFound - job not found
- 500 / InternalServerError - other server-side errors
```bash
curl -X POST "http://localhost:8080/api/v1/search" \
-H "Content-Type: application/json" \
-d '{
"job_name": "my_job",
"query": "camping gear",
"limit": 2,
"filters": {"product_category": "outdoor", "price": "gt.10"}
}'
```

```json
[
{
"description": "Sling made of fabric or netting, suspended between two points for relaxation",
"fts_rank": null,
"price": 40.0,
"product_category": "outdoor",
"product_id": 39,
"product_name": "Hammock",
"rrf_score": 0.015873015873015872,
"semantic_rank": 3,
"similarity_score": 0.3863893266436258,
"updated_at": "2025-11-01T16:30:42.501294+00:00"
}
]
```
62 changes: 60 additions & 2 deletions server/src/routes/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,39 @@ pub struct SearchRequest {
pub filters: BTreeMap<String, FilterValue>,
}

// Same as GET except without flatten for filters
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema, FromRow)]
pub struct SearchRequestPOST {
pub job_name: String,
pub query: String,
#[serde(default = "default_window_size")]
pub window_size: i32,
#[serde(default = "default_limit")]
pub limit: i32,
#[serde(default = "default_rrf_k")]
pub rrf_k: f32,
#[serde(default = "default_semantic_wt")]
pub semantic_wt: f32,
#[serde(default = "default_fts_wt")]
pub fts_wt: f32,
pub filters: BTreeMap<String, FilterValue>,
}

impl From<SearchRequestPOST> for SearchRequest {
fn from(request: SearchRequestPOST) -> Self {
SearchRequest {
job_name: request.job_name,
query: request.query,
window_size: request.window_size,
limit: request.limit,
rrf_k: request.rrf_k,
semantic_wt: request.semantic_wt,
fts_wt: request.fts_wt,
filters: request.filters,
}
}
}

fn default_semantic_wt() -> f32 {
1.0
}
Expand Down Expand Up @@ -79,8 +112,34 @@ pub async fn search(
app_state: web::Data<AppState>,
payload: web::Query<SearchRequest>,
) -> Result<HttpResponse, ServerError> {
let payload = payload.into_inner();
search_internal(app_state, payload.into_inner()).await
}

/// POST /search_json: Accepts a JSON body instead of URL query params for search
#[utoipa::path(
post,
path = "/api/v1",
request_body = SearchRequestPOST,
responses(
(
status = 200, description = "Search results",
body = Vec<serde_json::Value>,
),
),
)]
#[actix_web::post("/search")]
pub async fn search_json(
app_state: web::Data<AppState>,
payload: web::Json<SearchRequestPOST>,
) -> Result<HttpResponse, ServerError> {
search_internal(app_state, payload.into_inner().into()).await
}

// Internal function for search logic, used by both GET and POST
async fn search_internal(
app_state: web::Data<AppState>,
payload: SearchRequest,
) -> Result<HttpResponse, ServerError> {
// check inputs and filters are valid if they exist and create a SQL string for them
query::check_input(&payload.job_name)?;
if !payload.filters.is_empty() {
Expand Down Expand Up @@ -168,7 +227,6 @@ async fn get_vectorize_job(
pool: &sqlx::PgPool,
job_name: &str,
) -> Result<VectorizeJob, ServerError> {
// Changed return type
match sqlx::query(
"SELECT job_name, src_table, src_schema, src_columns, primary_key, update_time_col, model
FROM vectorize.job
Expand Down
3 changes: 2 additions & 1 deletion server/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub fn route_config(configuration: &mut web::ServiceConfig) {
web::scope("/api/v1")
.service(routes::table::table)
.service(routes::table::delete_table)
.service(routes::search::search),
.service(routes::search::search)
.service(routes::search::search_json),
);
}
34 changes: 34 additions & 0 deletions server/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,40 @@ async fn test_search_filters() {
i
);
}

// equivalent but using POST
let filter_payload = json!({
"job_name": job_name,
"query": "electronics",
"filters": {
"price": "gte.25",
"product_category": "eq.electronics"
},
"limit": 5
});

let resp = client
.post("http://localhost:8080/api/v1/search")
.header("Content-Type", "application/json")
.json(&filter_payload)
.send()
.await
.expect("Failed to send request");
assert_eq!(
resp.status(),
reqwest::StatusCode::OK,
"Response status: {:?}",
resp.status()
);

let post_search_results: Vec<serde_json::Value> =
resp.json().await.expect("Failed to parse search response");

assert_eq!(post_search_results.len(), 5);
for result in &post_search_results {
assert_eq!(result["product_category"].as_str().unwrap(), "electronics");
assert!(result["price"].as_f64().unwrap() >= 25.0);
}
}

#[tokio::test]
Expand Down