diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..2144f6234 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,97 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "(gdb) Debug build_memory_index", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/apps/build_memory_index", + "args": [ + "--data_type", + "float", + "--dist_fn", + "l2", + "--data_path", + "data/sift/sift_learn.fbin", + "--index_path_prefix", + "data/sift/index_sift_learn_R32_L50_A1.2", + "-R", + "32", + "-L", + "50", + "--alpha", + "1.2" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build-debug" + }, + { + "name": "(gdb) Launch simple_test", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/apps/simple_test", + "args": [ + "${workspaceFolder}/test_data.bin" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "generate-test-data" + }, + { + "name": "(gdb) Launch test_streaming_scenario", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/apps/test_streaming_scenario", + "args": [ + "--data_type", + "float", + "--dist_fn", + "l2", + "--index_path_prefix", + "test_index", + "--data_path", + "YOUR_DATA_PATH.bin", + "--active_window", + "10000", + "--consolidate_interval", + "1000", + "--start_point_norm", + "1.0" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build-debug" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..d8777ff0b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,75 @@ +{ + "files.associations": { + "iostream": "cpp", + "ostream": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "any": "cpp", + "array": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "bit": "cpp", + "bitset": "cpp", + "chrono": "cpp", + "codecvt": "cpp", + "compare": "cpp", + "complex": "cpp", + "concepts": "cpp", + "condition_variable": "cpp", + "cstdint": "cpp", + "deque": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "future": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "numbers": "cpp", + "semaphore": "cpp", + "shared_mutex": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "stop_token": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "cfenv": "cpp", + "cinttypes": "cpp", + "typeindex": "cpp", + "typeinfo": "cpp", + "variant": "cpp" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000..088756f0d --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,29 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build-debug", + "type": "shell", + //"command": "rm -rf build && mkdir build && cd build && cmake .. && make -j", + "command": "cd build && cmake -DCMAKE_BUILD_TYPE=Debug .. && make -j", + "group": { + "kind": "build", + "isDefault": true + }, + "problemMatcher": [ + "$gcc" + ], + "detail": "Clean, configure and build the DiskANN project for debugging." + }, + { + "label": "generate-test-data", + "type": "shell", + "command": "${workspaceFolder}/build/apps/utils/generate_test_data test_data.bin 10000 128", + "dependsOn": [ + "build-debug" + ], + "problemMatcher": [], + "detail": "Generate a binary data file for testing." + } + ] +} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3d3d2b860..f2c0bf30f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,11 @@ cmake_minimum_required(VERSION 3.15) project(diskann) +# Set default build type to Debug for easier debugging setup +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE STRING "Choose the type of build (Debug, Release, etc.).") +endif() + set(CMAKE_STANDARD 17) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index e42c0b6cb..312f4e1e5 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -28,6 +28,10 @@ target_link_libraries(test_streaming_scenario ${PROJECT_NAME} ${DISKANN_TOOLS_TC add_executable(test_insert_deletes_consolidate test_insert_deletes_consolidate.cpp) target_link_libraries(test_insert_deletes_consolidate ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) +add_executable(simple_test simple_test.cpp) +target_link_libraries(simple_test ${PROJECT_NAME}) +target_compile_options(simple_test PRIVATE -g) + if (NOT MSVC) install(TARGETS build_memory_index build_stitched_index @@ -37,6 +41,7 @@ if (NOT MSVC) range_search_disk_index test_streaming_scenario test_insert_deletes_consolidate + simple_test RUNTIME ) endif() diff --git a/apps/simple_test.cpp b/apps/simple_test.cpp new file mode 100644 index 000000000..25984b631 --- /dev/null +++ b/apps/simple_test.cpp @@ -0,0 +1,61 @@ +#include +#include "index.h" +#include "utils.h" + +int main(int argc, char **argv) +{ + if (argc != 2) + { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + std::string data_path = argv[1]; + diskann::Metric metric = diskann::Metric::L2; + size_t num_points, dim; + diskann::get_bin_metadata(data_path, num_points, dim); + + // Build parameters + unsigned R = 24; + unsigned L = 100; + float alpha = 1.2f; + unsigned num_threads = 4; + + // Create index + auto write_params = + diskann::IndexWriteParametersBuilder(L, R).with_num_threads(num_threads).with_alpha(alpha).build(); + auto search_params = std::make_shared(L, num_threads); + diskann::Index index(metric, dim, num_points, + std::make_shared(write_params), search_params); + index.build(data_path.c_str(), num_points); + + // Search parameters + unsigned Lsearch = 100; + unsigned K = 10; // Number of neighbors to search for + unsigned num_queries = 1; + std::vector query_result_ids(num_queries * K); + std::vector query_result_dists(num_queries * K); + + // Load query data (using the first point from the dataset as a query) + float *query_data = nullptr; + diskann::alloc_aligned((void **)&query_data, dim * sizeof(float), 8 * sizeof(float)); + std::ifstream reader(data_path, std::ios::binary); + reader.seekg(2 * sizeof(int)); // Skip npts and dim + reader.read((char *)query_data, dim * sizeof(float)); + reader.close(); + + // Add a pause to allow the debugger to attach properly + // std::cout << "\nPress Enter to start the search..." << std::endl; + // std::cin.get(); + + // Perform search + index.search(query_data, K, Lsearch, query_result_ids.data(), query_result_dists.data()); + + std::cout << "Search complete. Found " << K << " neighbors." << std::endl; + std::cout << "Nearest neighbor ID: " << query_result_ids[0] << " with distance " << query_result_dists[0] + << std::endl; + + diskann::aligned_free(query_data); + + return 0; +} diff --git a/apps/utils/CMakeLists.txt b/apps/utils/CMakeLists.txt index 3b8cf223c..46f505e93 100644 --- a/apps/utils/CMakeLists.txt +++ b/apps/utils/CMakeLists.txt @@ -78,6 +78,8 @@ target_link_libraries(generate_synthetic_labels ${PROJECT_NAME} Boost::program_o add_executable(stats_label_data stats_label_data.cpp) target_link_libraries(stats_label_data ${PROJECT_NAME} Boost::program_options) +add_executable(generate_test_data generate_test_data.cpp) + if (NOT MSVC) include(GNUInstallDirs) install(TARGETS fvecs_to_bin @@ -105,6 +107,7 @@ if (NOT MSVC) create_disk_layout generate_synthetic_labels stats_label_data + generate_test_data RUNTIME ) -endif() \ No newline at end of file +endif() diff --git a/apps/utils/generate_test_data.cpp b/apps/utils/generate_test_data.cpp new file mode 100644 index 000000000..f143abf55 --- /dev/null +++ b/apps/utils/generate_test_data.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include + +void generate_random_data(const std::string& filename, int num_points, int dim) { + std::ofstream writer(filename, std::ios::binary); + if (!writer) { + std::cerr << "Error opening file for writing: " << filename << std::endl; + return; + } + + writer.write(reinterpret_cast(&num_points), sizeof(int)); + writer.write(reinterpret_cast(&dim), sizeof(int)); + + std::mt19937 rng(42); // Seed for reproducibility + std::uniform_real_distribution dist(0.0f, 1.0f); + + std::vector buffer(dim); + for (int i = 0; i < num_points; ++i) { + for (int j = 0; j < dim; ++j) { + buffer[j] = dist(rng); + } + writer.write(reinterpret_cast(buffer.data()), dim * sizeof(float)); + } + + writer.close(); + std::cout << "Successfully generated " << num_points << " points of dimension " << dim << " to " << filename << std::endl; +} + +int main(int argc, char** argv) { + if (argc != 4) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return 1; + } + + std::string filename = argv[1]; + int num_points = std::stoi(argv[2]); + int dim = std::stoi(argv[3]); + + generate_random_data(filename, num_points, dim); + + return 0; +} diff --git a/test_data.bin b/test_data.bin new file mode 100644 index 000000000..6ffeea3e7 Binary files /dev/null and b/test_data.bin differ