Skip to content

Commit 4e7358c

Browse files
authored
feat(query): Inverted index support search Variant inner fields (#18861)
* feat(query): Inverted index support search Variant inner fields * fix
1 parent d2bd648 commit 4e7358c

File tree

18 files changed

+816
-380
lines changed

18 files changed

+816
-380
lines changed

Cargo.lock

Lines changed: 291 additions & 166 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -506,10 +506,10 @@ strum = "0.24.1"
506506
sub-cache = "0.2.1"
507507
sys-info = "0.9"
508508
sysinfo = "0.34.2"
509-
tantivy = "0.22.0"
510-
tantivy-common = "0.7.0"
509+
tantivy = "0.25.0"
510+
tantivy-common = "0.10.0"
511511
tantivy-fst = "0.5"
512-
tantivy-jieba = "0.11.0"
512+
tantivy-jieba = "0.17.0"
513513
temp-env = "0.3.0"
514514
tempfile = "3.4.0"
515515
terminal_size = "0.4.2"
@@ -664,8 +664,8 @@ recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "1
664664
sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" }
665665
state-machine-api = { git = "https://github.com/databendlabs/state-machine-api.git", tag = "v0.3.4" }
666666
sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1" }
667-
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370" }
668-
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370", package = "tantivy-common" }
669-
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "0e300e9" }
667+
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d" }
668+
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "9065a4d", package = "tantivy-common" }
669+
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "ac27464" }
670670
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" }
671671
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" }

src/common/metrics/src/metrics/storage.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ static BLOCK_INVERTED_INDEX_GENERATE_MILLISECONDS: LazyLock<Histogram> = LazyLoc
174174
static BLOCK_INVERTED_INDEX_READ_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
175175
register_histogram_in_milliseconds("fuse_block_inverted_index_read_milliseconds")
176176
});
177+
static BLOCK_INVERTED_INDEX_READ_BYTES: LazyLock<Counter> =
178+
LazyLock::new(|| register_counter("fuse_block_inverted_index_read_bytes"));
177179
static BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS: LazyLock<Histogram> = LazyLock::new(|| {
178180
register_histogram_in_milliseconds("fuse_block_inverted_index_search_milliseconds")
179181
});
@@ -602,6 +604,10 @@ pub fn metrics_inc_block_inverted_index_read_milliseconds(c: u64) {
602604
BLOCK_INVERTED_INDEX_READ_MILLISECONDS.observe(c as f64);
603605
}
604606

607+
pub fn metrics_inc_block_inverted_index_read_bytes(c: u64) {
608+
BLOCK_INVERTED_INDEX_READ_BYTES.inc_by(c);
609+
}
610+
605611
pub fn metrics_inc_block_inverted_index_search_milliseconds(c: u64) {
606612
BLOCK_INVERTED_INDEX_SEARCH_MILLISECONDS.observe(c as f64);
607613
}

src/query/config/src/config.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3318,15 +3318,15 @@ pub struct CacheConfig {
33183318
#[clap(
33193319
long = "cache-inverted-index-meta-count",
33203320
value_name = "VALUE",
3321-
default_value = "3000"
3321+
default_value = "30000"
33223322
)]
33233323
pub inverted_index_meta_count: u64,
33243324

33253325
/// Max bytes of cached inverted index filters used. Set it to 0 to disable it.
33263326
#[clap(
33273327
long = "cache-inverted-index-filter-size",
33283328
value_name = "VALUE",
3329-
default_value = "2147483648"
3329+
default_value = "64424509440"
33303330
)]
33313331
pub inverted_index_filter_size: u64,
33323332

src/query/config/src/inner.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,8 @@ impl Default for CacheConfig {
756756
table_bloom_index_filter_size: 2147483648,
757757
disk_cache_table_bloom_index_data_size: 0,
758758
disk_cache_table_bloom_index_meta_size: 0,
759-
inverted_index_meta_count: 3000,
760-
inverted_index_filter_size: 2147483648,
759+
inverted_index_meta_count: 30000,
760+
inverted_index_filter_size: 64424509440,
761761
inverted_index_filter_memory_ratio: 0,
762762
vector_index_meta_count: 30000,
763763
vector_index_filter_size: 64424509440,

src/query/ee/tests/it/inverted_index/index_refresh.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ async fn test_fuse_do_refresh_inverted_index() -> Result<()> {
160160
let queries = vec![
161161
("rust".to_string(), vec![0, 1]),
162162
("java".to_string(), vec![2]),
163-
("data".to_string(), vec![1, 4, 5]),
163+
("data".to_string(), vec![4, 1, 5]),
164164
];
165165

166166
for (query_text, ids) in queries.into_iter() {
Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
---------- TABLE INFO ------------
22
DB.Table: 'system'.'caches', Table: caches-table_id:1, ver:0, Engine: SystemCache
33
-------- TABLE CONTENTS ----------
4-
+-------------+----------------------------------------------+----------+----------+------------+----------+----------+----------+----------+
5-
| Column 0 | Column 1 | Column 2 | Column 3 | Column 4 | Column 5 | Column 6 | Column 7 | Column 8 |
6-
+-------------+----------------------------------------------+----------+----------+------------+----------+----------+----------+----------+
7-
| 'test-node' | 'memory_cache_bloom_index_file_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
8-
| 'test-node' | 'memory_cache_bloom_index_filter' | 0 | 0 | 2147483648 | 'bytes' | 0 | 0 | 0 |
9-
| 'test-node' | 'memory_cache_column_oriented_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
10-
| 'test-node' | 'memory_cache_compact_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
11-
| 'test-node' | 'memory_cache_iceberg_table' | 0 | 0 | 1024 | 'count' | 0 | 0 | 0 |
12-
| 'test-node' | 'memory_cache_inverted_index_file' | 0 | 0 | 2147483648 | 'bytes' | 0 | 0 | 0 |
13-
| 'test-node' | 'memory_cache_inverted_index_file_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
14-
| 'test-node' | 'memory_cache_parquet_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
15-
| 'test-node' | 'memory_cache_prune_partitions' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
16-
| 'test-node' | 'memory_cache_segment_statistics' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
17-
| 'test-node' | 'memory_cache_table_snapshot' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
18-
| 'test-node' | 'memory_cache_table_statistics' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
19-
+-------------+----------------------------------------------+----------+----------+------------+----------+----------+----------+----------+
4+
+-------------+----------------------------------------------+----------+----------+-------------+----------+----------+----------+----------+
5+
| Column 0 | Column 1 | Column 2 | Column 3 | Column 4 | Column 5 | Column 6 | Column 7 | Column 8 |
6+
+-------------+----------------------------------------------+----------+----------+-------------+----------+----------+----------+----------+
7+
| 'test-node' | 'memory_cache_bloom_index_file_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
8+
| 'test-node' | 'memory_cache_bloom_index_filter' | 0 | 0 | 2147483648 | 'bytes' | 0 | 0 | 0 |
9+
| 'test-node' | 'memory_cache_column_oriented_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
10+
| 'test-node' | 'memory_cache_compact_segment_info' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
11+
| 'test-node' | 'memory_cache_iceberg_table' | 0 | 0 | 1024 | 'count' | 0 | 0 | 0 |
12+
| 'test-node' | 'memory_cache_inverted_index_file' | 0 | 0 | 64424509440 | 'bytes' | 0 | 0 | 0 |
13+
| 'test-node' | 'memory_cache_inverted_index_file_meta_data' | 0 | 0 | 30000 | 'count' | 0 | 0 | 0 |
14+
| 'test-node' | 'memory_cache_parquet_meta_data' | 0 | 0 | 3000 | 'count' | 0 | 0 | 0 |
15+
| 'test-node' | 'memory_cache_prune_partitions' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
16+
| 'test-node' | 'memory_cache_segment_statistics' | 0 | 0 | 1073741824 | 'bytes' | 0 | 0 | 0 |
17+
| 'test-node' | 'memory_cache_table_snapshot' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
18+
| 'test-node' | 'memory_cache_table_statistics' | 0 | 0 | 256 | 'count' | 0 | 0 | 0 |
19+
| 'test-node' | 'memory_cache_vector_index_file' | 0 | 0 | 64424509440 | 'bytes' | 0 | 0 | 0 |
20+
| 'test-node' | 'memory_cache_vector_index_file_meta_data' | 0 | 0 | 30000 | 'count' | 0 | 0 | 0 |
21+
+-------------+----------------------------------------------+----------+----------+-------------+----------+----------+----------+----------+
2022

2123

src/query/service/tests/it/storages/testdata/configs_table_basic.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ DB.Table: 'system'.'configs', Table: configs-table_id:1, ver:0, Engine: SystemCo
1717
| 'cache' | 'enable_table_meta_cache' | 'true' | '' |
1818
| 'cache' | 'iceberg_table_meta_count' | '1024' | '' |
1919
| 'cache' | 'inverted_index_filter_memory_ratio' | '0' | '' |
20-
| 'cache' | 'inverted_index_filter_size' | '2147483648' | '' |
21-
| 'cache' | 'inverted_index_meta_count' | '3000' | '' |
20+
| 'cache' | 'inverted_index_filter_size' | '64424509440' | '' |
21+
| 'cache' | 'inverted_index_meta_count' | '30000' | '' |
2222
| 'cache' | 'meta_service_ownership_cache' | 'false' | '' |
2323
| 'cache' | 'segment_block_metas_count' | '0' | '' |
2424
| 'cache' | 'segment_statistics_bytes' | '1073741824' | '' |

src/query/storages/common/index/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ bitvec = { workspace = true }
2222
bytemuck = { workspace = true }
2323
bytes = { workspace = true }
2424
cbordata = { workspace = true }
25+
crc32fast = { workspace = true }
2526
fastrace = { workspace = true }
2627
feistel-permutation-rs = { workspace = true }
2728
goldenfile = { workspace = true }

0 commit comments

Comments
 (0)