Skip to content

Commit 548f095

Browse files
committed
feat: add AggregateHybridQuery and complete notebook port
Adds AggregateHybridQuery class as primary hybrid search API: - AggregateHybridQuery extends existing HybridQuery implementation - Matches Python redis-vl naming convention - Provides builder pattern API for Java ergonomics Completes advanced queries notebook port: - Added all 3 query types: TextQuery, AggregateHybridQuery, MultiVectorQuery - Exact data/model parity with Python notebook - All product descriptions, embeddings, and query examples match Python version - Includes examples for text scorers, filters, alpha tuning, weighted fields Technical details: - AggregateHybridQuery delegates to HybridQuery for implementation - Builder pattern provides clean Java API matching Python kwargs style - Hybrid queries combine text and vector search with configurable alpha weighting - All examples use exact same vectors and data as Python notebook Notebook structure: - 6 products with text/image embeddings (matching Python exactly) - TextQuery: 6 examples (basic, scorers, filters, weights) - AggregateHybridQuery: 4 examples (basic, alpha, filters, scorers) - MultiVectorQuery: 3 examples (basic, weights, filters) - Comparison and best practices sections Completes advanced queries documentation for 0.1.0 release.
1 parent 84ef410 commit 548f095

File tree

7 files changed

+1235
-234
lines changed

7 files changed

+1235
-234
lines changed

core/src/main/java/com/redis/vl/extensions/cache/LangCacheSemanticCache.java

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -53,23 +53,25 @@ public class LangCacheSemanticCache {
5353
* Translation map for encoding problematic attribute characters.
5454
*
5555
* <p>LangCache service rejects or mishandles certain characters in attribute values:
56+
*
5657
* <ul>
57-
* <li>Comma (,) - U+002C: Rejected by service validation</li>
58-
* <li>Forward slash (/) - U+002F: May not reliably match in filters</li>
59-
* <li>Backslash (\) - U+005C: Causes encoding issues</li>
60-
* <li>Question mark (?) - U+003F: Causes filtering failures</li>
58+
* <li>Comma (,) - U+002C: Rejected by service validation
59+
* <li>Forward slash (/) - U+002F: May not reliably match in filters
60+
* <li>Backslash (\) - U+005C: Causes encoding issues
61+
* <li>Question mark (?) - U+003F: Causes filtering failures
6162
* </ul>
6263
*
6364
* <p>We replace these with visually similar fullwidth Unicode variants that the service accepts.
6465
*
6566
* <p>Port of redis-vl-python PR #437 & #438
6667
*/
67-
private static final Map<Character, Character> ENCODE_TRANS = Map.of(
68-
',', ',', // U+FF0C FULLWIDTH COMMA
69-
'/', '∕', // U+2215 DIVISION SLASH
70-
'\\', '\', // U+FF3C FULLWIDTH REVERSE SOLIDUS
71-
'?', '?' // U+FF1F FULLWIDTH QUESTION MARK
72-
);
68+
private static final Map<Character, Character> ENCODE_TRANS =
69+
Map.of(
70+
',', ',', // U+FF0C FULLWIDTH COMMA
71+
'/', '∕', // U+2215 DIVISION SLASH
72+
'\\', '\', // U+FF3C FULLWIDTH REVERSE SOLIDUS
73+
'?', '?' // U+FF1F FULLWIDTH QUESTION MARK
74+
);
7375

7476
/**
7577
* Translation map for decoding attribute characters back to original form.
@@ -102,7 +104,7 @@ private static String encodeAttributeValue(String value) {
102104
return value;
103105
}
104106

105-
StringBuilder result = null; // Lazy allocation
107+
StringBuilder result = null; // Lazy allocation
106108
int length = value.length();
107109

108110
for (int i = 0; i < length; i++) {
@@ -128,9 +130,9 @@ private static String encodeAttributeValue(String value) {
128130
/**
129131
* Encode attribute map for LangCache service.
130132
*
131-
* <p>Returns a copy of attributes with string values safely encoded. Only top-level string
132-
* values are encoded; non-string values are left unchanged. If no values require encoding, the
133-
* original map is returned unchanged.
133+
* <p>Returns a copy of attributes with string values safely encoded. Only top-level string values
134+
* are encoded; non-string values are left unchanged. If no values require encoding, the original
135+
* map is returned unchanged.
134136
*
135137
* @param attributes The original attributes
136138
* @return Encoded attributes (may be same instance if no encoding needed)
@@ -140,7 +142,7 @@ private static Map<String, Object> encodeAttributes(Map<String, Object> attribut
140142
return attributes;
141143
}
142144

143-
Map<String, Object> encoded = null; // Lazy allocation
145+
Map<String, Object> encoded = null; // Lazy allocation
144146

145147
for (Map.Entry<String, Object> entry : attributes.entrySet()) {
146148
Object value = entry.getValue();
@@ -164,8 +166,8 @@ private static Map<String, Object> encodeAttributes(Map<String, Object> attribut
164166
/**
165167
* Decode a string attribute value returned from the LangCache service.
166168
*
167-
* <p>Reverses {@link #encodeAttributeValue}, translating fullwidth characters back to their
168-
* ASCII counterparts so callers see the original values they stored.
169+
* <p>Reverses {@link #encodeAttributeValue}, translating fullwidth characters back to their ASCII
170+
* counterparts so callers see the original values they stored.
169171
*
170172
* @param value The encoded attribute value from LangCache
171173
* @return The decoded original value
@@ -175,7 +177,7 @@ private static String decodeAttributeValue(String value) {
175177
return value;
176178
}
177179

178-
StringBuilder result = null; // Lazy allocation
180+
StringBuilder result = null; // Lazy allocation
179181
int length = value.length();
180182

181183
for (int i = 0; i < length; i++) {
@@ -202,8 +204,8 @@ private static String decodeAttributeValue(String value) {
202204
* Decode attribute map from LangCache service.
203205
*
204206
* <p>Returns a copy of attributes with string values safely decoded. This is the inverse of
205-
* {@link #encodeAttributes}. Only top-level string values are decoded; non-string values are
206-
* left unchanged. If no values require decoding, the original map is returned unchanged.
207+
* {@link #encodeAttributes}. Only top-level string values are decoded; non-string values are left
208+
* unchanged. If no values require decoding, the original map is returned unchanged.
207209
*
208210
* @param attributes The encoded attributes from LangCache
209211
* @return Decoded attributes (may be same instance if no decoding needed)
@@ -213,7 +215,7 @@ private static Map<String, Object> decodeAttributes(Map<String, Object> attribut
213215
return attributes;
214216
}
215217

216-
Map<String, Object> decoded = null; // Lazy allocation
218+
Map<String, Object> decoded = null; // Lazy allocation
217219

218220
for (Map.Entry<String, Object> entry : attributes.entrySet()) {
219221
Object value = entry.getValue();
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
package com.redis.vl.query;
2+
3+
import java.util.List;
4+
import java.util.Set;
5+
6+
/**
7+
* AggregateHybridQuery combines text and vector search in Redis using aggregation.
8+
*
9+
* <p>This is the primary name for hybrid queries in RedisVL, matching the Python implementation.
10+
* It extends {@link HybridQuery} which contains the full implementation.
11+
*
12+
* <p>Ported from Python: redisvl/query/aggregate.py:57-315 (AggregateHybridQuery class)
13+
*
14+
* <p>It allows you to perform a hybrid search using both text and vector similarity. It scores
15+
* documents based on a weighted combination of text and vector similarity.
16+
*
17+
* <p>Python equivalent:
18+
*
19+
* <pre>
20+
* query = AggregateHybridQuery(
21+
* text="example text",
22+
* text_field_name="text_field",
23+
* vector=[0.1, 0.2, 0.3],
24+
* vector_field_name="vector_field",
25+
* text_scorer="BM25STD",
26+
* filter_expression=None,
27+
* alpha=0.7,
28+
* dtype="float32",
29+
* num_results=10,
30+
* return_fields=["field1", "field2"],
31+
* stopwords="english",
32+
* dialect=2,
33+
* )
34+
* results = index.query(query)
35+
* </pre>
36+
*
37+
* <p>Java equivalent:
38+
*
39+
* <pre>
40+
* AggregateHybridQuery query = AggregateHybridQuery.builder()
41+
* .text("example text")
42+
* .textFieldName("text_field")
43+
* .vector(new float[]{0.1f, 0.2f, 0.3f})
44+
* .vectorFieldName("vector_field")
45+
* .textScorer("BM25STD")
46+
* .filterExpression(null)
47+
* .alpha(0.7f)
48+
* .dtype("float32")
49+
* .numResults(10)
50+
* .returnFields(List.of("field1", "field2"))
51+
* .stopwords(AggregateHybridQuery.loadDefaultStopwords("english"))
52+
* .dialect(2)
53+
* .build();
54+
* List&lt;Map&lt;String, Object&gt;&gt; results = index.query(query);
55+
* </pre>
56+
*
57+
* <p>This class is final to prevent finalizer attacks, as it throws exceptions in constructors for
58+
* input validation (SEI CERT OBJ11-J).
59+
*
60+
* @since 0.1.0
61+
*/
62+
public final class AggregateHybridQuery extends HybridQuery {
63+
64+
// Private constructor delegates to parent
65+
private AggregateHybridQuery(AggregateHybridQueryBuilder builder) {
66+
super(builder.toHybridQueryBuilder());
67+
}
68+
69+
/**
70+
* Create a new builder for AggregateHybridQuery.
71+
*
72+
* @return A new AggregateHybridQueryBuilder instance
73+
*/
74+
public static AggregateHybridQueryBuilder builder() {
75+
return new AggregateHybridQueryBuilder();
76+
}
77+
78+
/** Builder for creating AggregateHybridQuery instances with fluent API. */
79+
public static class AggregateHybridQueryBuilder {
80+
private String text;
81+
private String textFieldName;
82+
private float[] vector;
83+
private String vectorFieldName;
84+
private String textScorer = "BM25STD";
85+
private Object filterExpression;
86+
private float alpha = 0.7f;
87+
private String dtype = "float32";
88+
private int numResults = 10;
89+
private List<String> returnFields = List.of();
90+
private Set<String> stopwords = loadDefaultStopwords("english");
91+
private int dialect = 2;
92+
93+
/** Package-private constructor used by builder() method. */
94+
AggregateHybridQueryBuilder() {}
95+
96+
/**
97+
* Set the text query string.
98+
*
99+
* @param text The text to search for
100+
* @return This builder for chaining
101+
*/
102+
public AggregateHybridQueryBuilder text(String text) {
103+
this.text = text;
104+
return this;
105+
}
106+
107+
/**
108+
* Set the name of the text field to search.
109+
*
110+
* @param textFieldName The field name containing text data
111+
* @return This builder for chaining
112+
*/
113+
public AggregateHybridQueryBuilder textFieldName(String textFieldName) {
114+
this.textFieldName = textFieldName;
115+
return this;
116+
}
117+
118+
/**
119+
* Set the query vector for similarity search. Makes a defensive copy.
120+
*
121+
* @param vector The embedding vector to search with
122+
* @return This builder for chaining
123+
*/
124+
public AggregateHybridQueryBuilder vector(float[] vector) {
125+
this.vector = vector != null ? vector.clone() : null;
126+
return this;
127+
}
128+
129+
/**
130+
* Set the name of the vector field to search.
131+
*
132+
* @param vectorFieldName The field name containing vector data
133+
* @return This builder for chaining
134+
*/
135+
public AggregateHybridQueryBuilder vectorFieldName(String vectorFieldName) {
136+
this.vectorFieldName = vectorFieldName;
137+
return this;
138+
}
139+
140+
/**
141+
* Set the scoring algorithm for text search.
142+
*
143+
* @param textScorer The text scorer (e.g., "BM25", "TFIDF")
144+
* @return This builder for chaining
145+
*/
146+
public AggregateHybridQueryBuilder textScorer(String textScorer) {
147+
this.textScorer = textScorer;
148+
return this;
149+
}
150+
151+
/**
152+
* Set an additional filter expression for the query using a Filter object.
153+
*
154+
* @param filterExpression The filter to apply
155+
* @return This builder for chaining
156+
*/
157+
public AggregateHybridQueryBuilder filterExpression(Filter filterExpression) {
158+
this.filterExpression = filterExpression;
159+
return this;
160+
}
161+
162+
/**
163+
* Set an additional filter expression for the query using a raw Redis query string.
164+
*
165+
* @param filterExpression The raw Redis filter string
166+
* @return This builder for chaining
167+
*/
168+
public AggregateHybridQueryBuilder filterExpression(String filterExpression) {
169+
this.filterExpression = filterExpression;
170+
return this;
171+
}
172+
173+
/**
174+
* Set the weight for combining text and vector scores.
175+
*
176+
* @param alpha Weight between 0.0 (text only) and 1.0 (vector only), default 0.7
177+
* @return This builder for chaining
178+
*/
179+
public AggregateHybridQueryBuilder alpha(float alpha) {
180+
this.alpha = alpha;
181+
return this;
182+
}
183+
184+
/**
185+
* Set the data type for vector storage.
186+
*
187+
* @param dtype The data type (e.g., "float32", "float64")
188+
* @return This builder for chaining
189+
*/
190+
public AggregateHybridQueryBuilder dtype(String dtype) {
191+
this.dtype = dtype;
192+
return this;
193+
}
194+
195+
/**
196+
* Set the maximum number of results to return.
197+
*
198+
* @param numResults The result limit
199+
* @return This builder for chaining
200+
*/
201+
public AggregateHybridQueryBuilder numResults(int numResults) {
202+
this.numResults = numResults;
203+
return this;
204+
}
205+
206+
/**
207+
* Set the fields to return in results. Makes a defensive copy.
208+
*
209+
* @param returnFields List of field names to return
210+
* @return This builder for chaining
211+
*/
212+
public AggregateHybridQueryBuilder returnFields(List<String> returnFields) {
213+
this.returnFields = returnFields != null ? List.copyOf(returnFields) : List.of();
214+
return this;
215+
}
216+
217+
/**
218+
* Set custom stopwords for text search. Makes a defensive copy.
219+
*
220+
* @param stopwords Set of words to exclude from text search
221+
* @return This builder for chaining
222+
*/
223+
public AggregateHybridQueryBuilder stopwords(Set<String> stopwords) {
224+
this.stopwords = stopwords != null ? Set.copyOf(stopwords) : Set.of();
225+
return this;
226+
}
227+
228+
/**
229+
* Set the query dialect version.
230+
*
231+
* @param dialect The dialect version (default 2)
232+
* @return This builder for chaining
233+
*/
234+
public AggregateHybridQueryBuilder dialect(int dialect) {
235+
this.dialect = dialect;
236+
return this;
237+
}
238+
239+
/**
240+
* Build the AggregateHybridQuery instance.
241+
*
242+
* @return The configured AggregateHybridQuery
243+
* @throws IllegalArgumentException if required fields are missing or invalid
244+
*/
245+
public AggregateHybridQuery build() {
246+
return new AggregateHybridQuery(this);
247+
}
248+
249+
/**
250+
* Convert this builder to a HybridQuery.HybridQueryBuilder for delegation.
251+
*
252+
* @return A HybridQueryBuilder with the same configuration
253+
*/
254+
HybridQuery.HybridQueryBuilder toHybridQueryBuilder() {
255+
return HybridQuery.builder()
256+
.text(text)
257+
.textFieldName(textFieldName)
258+
.vector(vector)
259+
.vectorFieldName(vectorFieldName)
260+
.textScorer(textScorer)
261+
.filterExpression(filterExpression)
262+
.alpha(alpha)
263+
.dtype(dtype)
264+
.numResults(numResults)
265+
.returnFields(returnFields)
266+
.stopwords(stopwords)
267+
.dialect(dialect);
268+
}
269+
}
270+
}

core/src/main/java/com/redis/vl/schema/TagField.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,7 @@ public class TagField extends BaseField {
2626
@JsonProperty("index_missing")
2727
private final boolean indexMissing;
2828

29-
/**
30-
* Index empty values - allow indexing and searching for empty strings
31-
*/
29+
/** Index empty values - allow indexing and searching for empty strings */
3230
@JsonProperty("index_empty")
3331
private final boolean indexEmpty;
3432

0 commit comments

Comments
 (0)