Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
654ceb0
added space inefficient code
Feb 11, 2024
1c46db0
added multifilter gt
Feb 12, 2024
6776ee1
minor bug fix for large base files
Feb 12, 2024
23d4d27
setting up AND queries
Feb 12, 2024
5d6134a
write code for penalty method
Mar 2, 2024
e5f449e
commented warning for fewer than K results
Mar 27, 2024
7406bbd
minor edits for penalty method
Mar 27, 2024
8309924
resolved conflict
Mar 27, 2024
6e7d4fa
small bug fix in penalty calculation
Mar 29, 2024
3be62b5
small change to final post-processing
Apr 2, 2024
13e7a2d
Minor changes and clang-format
Apr 2, 2024
f760ebf
Add roaring bitmaps and sample snippet to search_memory_index.cpp
Apr 2, 2024
6ac465a
Add roaring bitmaps to graph traversal and bruteforce plan
Apr 4, 2024
2d8ca0c
gt now prints sme stats
Apr 5, 2024
768457f
minor clang-format
Apr 5, 2024
a07d9c2
Revert graph traversal to regular set intersection
Apr 5, 2024
cdd4f9e
added a tester util for bitmap perf
Apr 9, 2024
62ca20d
added id_list and started cluster store
Apr 9, 2024
8da6899
added cluster_store.cpp
Apr 9, 2024
525953e
setup ivf clustering in index class for build and load. yet to handle…
Apr 9, 2024
cfe12d1
minor bug fix
Apr 10, 2024
11f39d4
minor bug fix in indexing
Apr 10, 2024
2d9d113
minor fix
Apr 10, 2024
e9f3b89
minor fix
Apr 10, 2024
fda3b8d
Add clustering to search path
Apr 10, 2024
22ba983
Merge branch 'rakri/multifilter_with_query_planning' of https://githu…
Apr 10, 2024
341f1c3
fixed some bugs: TODO: add shared pointers and destructor for id_list
Apr 10, 2024
f450421
Add query stats
Apr 10, 2024
9982540
changing roaring from c to c++
Apr 11, 2024
f759bc3
Add avg time to perform each step of clustering
Apr 11, 2024
1cada97
minor changes: doesnt load empty clusters, and intersects filters bef…
Apr 16, 2024
42ba9e3
removed instrumentation into IFDEF
Apr 17, 2024
44df9c9
minor edit
Apr 17, 2024
50e47cd
Re-add query planning with clusters
Apr 17, 2024
be23b84
Minor changes
Apr 19, 2024
ba450a2
Add per-cluster bitmap for faster queries
Apr 20, 2024
126e56f
Minor change to gen random slice with label file also
Apr 25, 2024
94ed971
Add a sampling-based intersection technique to query plan
Apr 26, 2024
e5784d0
Remove union step and do dist calulation on the fly while filter matc…
suri-kumkaran Apr 26, 2024
dc94f97
Add timing for the sample-based intersection
Apr 29, 2024
220325c
Resolve merge conflicts
Apr 29, 2024
e071fe8
Minor changes to labels to points data structure. Hard Coded 5000 lab…
May 1, 2024
d7bb2c6
minor bug fix
May 1, 2024
9f2c714
Add some performance improvements for graph and bruteforce search
May 1, 2024
baaddce
Undo Ravi's change to _labels_to_pts for a possibly better option
May 1, 2024
8237560
minor bug fix in cluster centroid calculation
May 1, 2024
6581827
Merge branch 'rakri/multifilter_with_query_planning' of https://githu…
May 1, 2024
3c66392
Changed the estimation code to accept sample files, and added some st…
May 1, 2024
8254e3c
towards sgemv and scratch-ing more stuff
May 2, 2024
3e2a0d3
continuing optims
May 3, 2024
2a4fd6c
minor bug fix
May 7, 2024
3d8c5f1
replaced math utils with sgemv in cluster store
May 8, 2024
f90addb
minor bug fix
May 9, 2024
d9ca810
minor changes
May 10, 2024
8591906
clang-format
May 11, 2024
caffb4f
added back build index
May 13, 2024
6e37cdd
nuanced prints in search_memory_index
May 13, 2024
ff5c3c2
Remove cout
May 11, 2024
e2cf3a9
Add search path tracing for query 1 and generalize filtered diskann b…
May 13, 2024
4d306fb
minor fix
May 14, 2024
2114cf4
Add penalty distance to iterate
May 14, 2024
d1b3fab
minor commit conflict
May 15, 2024
5c0d23d
Make query stats print for all graph-routed queries
May 15, 2024
5acfb0a
commented some debug code
May 15, 2024
8776842
rebased
May 16, 2024
c2786b8
Add some improvements for QPS gain
May 21, 2024
a183e26
Add some bugfixes
May 21, 2024
2a2c874
minor changes
Sep 15, 2024
81d63a3
filtered GT now takes AND of ORs
Sep 18, 2024
34c9ce6
minor changes
Sep 18, 2024
ede9ea1
minor styling fix
Sep 19, 2024
dea551a
added some commented code to print more useful debugging txt
Sep 20, 2024
9907d53
added flag to decide start point
Sep 20, 2024
1a89fc0
minor assert removed in GT
Sep 20, 2024
ca2fb3b
made some changes for num local start pts
Oct 24, 2024
a2e6a0a
first commit to generalize to AND of ORs
Feb 28, 2025
d546f46
made changes to and of or for iterate, etc.
Mar 5, 2025
dc7f559
minor fixes
Mar 5, 2025
d4f609b
added post-processing module
Mar 5, 2025
3e45bc7
added minor instrumentation
Mar 11, 2025
55b605f
minor instr. bug
Mar 11, 2025
a8bfb30
cleaned up and rempved clustering
Mar 18, 2025
b06049d
minor changes
Mar 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
{
"files.associations": {
"chrono": "cpp",
"shared_mutex": "cpp",
"cctype": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"any": "cpp",
"array": "cpp",
"atomic": "cpp",
"strstream": "cpp",
"bit": "cpp",
"*.tcc": "cpp",
"bitset": "cpp",
"cinttypes": "cpp",
"codecvt": "cpp",
"complex": "cpp",
"condition_variable": "cpp",
"cstdint": "cpp",
"deque": "cpp",
"list": "cpp",
"map": "cpp",
"set": "cpp",
"unordered_map": "cpp",
"unordered_set": "cpp",
"vector": "cpp",
"exception": "cpp",
"algorithm": "cpp",
"functional": "cpp",
"iterator": "cpp",
"memory": "cpp",
"memory_resource": "cpp",
"numeric": "cpp",
"optional": "cpp",
"random": "cpp",
"ratio": "cpp",
"string": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"utility": "cpp",
"fstream": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"mutex": "cpp",
"new": "cpp",
"ostream": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"thread": "cpp",
"cfenv": "cpp",
"typeindex": "cpp",
"typeinfo": "cpp",
"variant": "cpp",
"compare": "cpp",
"concepts": "cpp",
"future": "cpp",
"numbers": "cpp",
"semaphore": "cpp",
"stop_token": "cpp"
}
}
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ if (MSVC)
endif()

include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/roaring)

if(NOT PYBIND)
set(DISKANN_RELEASE_UNUSED_TCMALLOC_MEMORY_AT_CHECKPOINTS ON)
Expand Down
5 changes: 5 additions & 0 deletions apps/build_memory_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ int main(int argc, char **argv)
std::string data_type, dist_fn, data_path, index_path_prefix, label_file, universal_label, label_type;
uint32_t num_threads, R, L, Lf, build_PQ_bytes;
float alpha;
uint32_t inter_size;
bool use_pq_build, use_opq;

po::options_description desc{
Expand Down Expand Up @@ -70,6 +71,9 @@ int main(int argc, char **argv)
program_options_utils::FILTERED_LBUILD);
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
program_options_utils::LABEL_TYPE_DESCRIPTION);
optional_configs.add_options()(
"min_inter", po::value<uint32_t>(&inter_size)->default_value(1),
"Sets the minimum intersection size between filter sets. Defaults to 1, giving filtered-diskann");

// Merge required and optional parameters
desc.add(required_configs).add(optional_configs);
Expand Down Expand Up @@ -119,6 +123,7 @@ int main(int argc, char **argv)

size_t data_num, data_dim;
diskann::get_bin_metadata(data_path, data_num, data_dim);
min_inter_size = inter_size;

auto index_build_params = diskann::IndexWriteParametersBuilder(L, R)
.with_filter_list_size(Lf)
Expand Down
12 changes: 7 additions & 5 deletions apps/search_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
const uint32_t num_threads, const uint32_t recall_at, const uint32_t beamwidth,
const uint32_t num_nodes_to_cache, const uint32_t search_io_limit,
const std::vector<uint32_t> &Lvec, const float fail_if_recall_below,
const std::vector<std::string> &query_filters, const bool use_reorder_data = false)
const std::vector<std::vector<std::string>> &query_filters, const bool use_reorder_data = false)
{
diskann::cout << "Search parameters: #threads: " << num_threads << ", ";
if (beamwidth <= 0)
Expand Down Expand Up @@ -239,11 +239,11 @@ int search_disk_index(diskann::Metric &metric, const std::string &index_path_pre
LabelT label_for_search;
if (query_filters.size() == 1)
{ // one label for all queries
label_for_search = _pFlashIndex->get_converted_label(query_filters[0]);
label_for_search = _pFlashIndex->get_converted_label(query_filters[0][0]);
}
else
{ // one label for each query
label_for_search = _pFlashIndex->get_converted_label(query_filters[i]);
label_for_search = _pFlashIndex->get_converted_label(query_filters[i][0]);
}
_pFlashIndex->cached_beam_search(
query + (i * query_aligned_dim), recall_at, L, query_result_ids_64.data() + (i * recall_at),
Expand Down Expand Up @@ -434,10 +434,12 @@ int main(int argc, char **argv)
return -1;
}

std::vector<std::string> query_filters;
std::vector<std::vector<std::string>> query_filters;
if (filter_label != "")
{
query_filters.push_back(filter_label);
std::vector<std::string> tmp;
tmp.push_back(filter_label);
query_filters.push_back(tmp);
}
else if (query_filters_file != "")
{
Expand Down
Loading
Loading