diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index e42c0b6cb..736460246 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -28,6 +28,12 @@ target_link_libraries(test_streaming_scenario ${PROJECT_NAME} ${DISKANN_TOOLS_TC add_executable(test_insert_deletes_consolidate test_insert_deletes_consolidate.cpp) target_link_libraries(test_insert_deletes_consolidate ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) +add_executable(benchmark_reads benchmark_reads.cpp) +target_link_libraries(benchmark_reads ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + +add_executable(benchmark_reads_single_threaded benchmark_reads_single_threaded.cpp) +target_link_libraries(benchmark_reads_single_threaded ${PROJECT_NAME} ${DISKANN_TOOLS_TCMALLOC_LINK_OPTIONS} Boost::program_options) + if (NOT MSVC) install(TARGETS build_memory_index build_stitched_index @@ -37,6 +43,7 @@ if (NOT MSVC) range_search_disk_index test_streaming_scenario test_insert_deletes_consolidate + benchmark_reads RUNTIME ) endif() diff --git a/apps/benchmark_reads.cpp b/apps/benchmark_reads.cpp new file mode 100644 index 000000000..006d7aa87 --- /dev/null +++ b/apps/benchmark_reads.cpp @@ -0,0 +1,104 @@ +#include +#include +#include "windows_aligned_file_reader.h" +#include "aligned_file_reader.h" +#include "utils.h" +#include "timer.h" +#include + +using namespace std; +using namespace diskann; + +#define SECTOR_LEN 4096 +#define TOTAL_READS 1000000 + +vector random_sector_ids(TOTAL_READS); + +void do_reads(WindowsAlignedFileReader* reader, vector& read_reqs, int batch_num) +{ + auto ctx = reader->get_ctx(); + size_t batch_size = read_reqs.size(); + + // Modify read requests + for (int i = 0; i < batch_size; i++) + { + read_reqs[i].offset = random_sector_ids[batch_num * batch_size + i] * SECTOR_LEN; + } + + reader->read(read_reqs, ctx, false); +} + +void do_multiple_reads_with_threads(int thread_count, int batches_of) +{ + string file_name = "F:\\indices\\turing_10m\\disk_index_disk.index"; + auto reader = new WindowsAlignedFileReader(); + reader->open(file_name.c_str()); + + vector> read_reqs(thread_count); + + omp_set_num_threads(thread_count); + +#pragma omp parallel for num_threads((int)thread_count) + for (int i = 0; i < thread_count; i++) + { + reader->register_thread(); + read_reqs[i].reserve(batches_of); + + // create read requests + for (size_t j = 0; j < batches_of; ++j) + { + char* buf = nullptr; + alloc_aligned((void**)&buf, SECTOR_LEN, SECTOR_LEN); + + AlignedRead read; + read.buf = buf; + read.len = SECTOR_LEN; + read_reqs[i].push_back(read); + } + } + + // Initialize a random number generator + uniform_int_distribution<> distrib(0, 1650000); + random_device rd; + mt19937 gen(rd()); + for (int i = 0; i < TOTAL_READS; i++) + { + random_sector_ids[i] = distrib(gen); + } + + int no_of_reads = TOTAL_READS / batches_of; + Timer timer; +#pragma omp parallel for schedule(dynamic, 1) + for (int i = 0; i < no_of_reads; i++) + { + do_reads(reader, read_reqs[omp_get_thread_num()], i); + } + // cout << "Time taken to read in microseconds: " << timer.elapsed() << endl; + cout << timer.elapsed() << endl; + + reader->close(); +} + +int main(int argc, char* argv[]) +{ + int thread_count = 1; + int batches_of = 128; + if (argc >= 2) { + std::istringstream iss(argv[1]); + if (iss >> thread_count) + { + // cout << "Got cmd argument" << endl; + } + } + if (argc >= 3) { + std::istringstream iss(argv[2]); + if (iss >> batches_of) { + // cout<<"Got batch size argument"< +#include +#include "windows_aligned_file_reader.h" +#include "aligned_file_reader.h" +#include "utils.h" +#include "timer.h" +#include +#include + +using namespace std; +using namespace diskann; + +#define SECTOR_LEN 4096 +#define TOTAL_READS 1000000 + +vector random_sector_ids(TOTAL_READS); + +void do_reads(WindowsAlignedFileReader* reader, vector& read_reqs, int batch_num) +{ + auto ctx = reader->get_ctx(); + size_t batch_size = read_reqs.size(); + + // Modify read requests + for (int i = 0; i < batch_size; i++) + { + read_reqs[i].offset = random_sector_ids[batch_num * batch_size + i] * SECTOR_LEN; + } + + reader->read(read_reqs, ctx, false); +} + +void do_reads_in_batches_of(int batches_of) +{ + string file_name = "F:\\indices\\turing_10m\\disk_index_disk.index"; + auto reader = new WindowsAlignedFileReader(); + reader->open(file_name.c_str()); + char* buf = nullptr; + alloc_aligned((void**)&buf, batches_of * SECTOR_LEN, SECTOR_LEN); + reader->register_thread(); + + std::vector read_reqs; + read_reqs.reserve(batches_of); + + // create read requests + for (size_t i = 0; i < batches_of; ++i) + { + AlignedRead read; + read.len = SECTOR_LEN; + read.buf = buf + i * SECTOR_LEN; + read_reqs.push_back(read); + } + + // Initialize a random number generator + uniform_int_distribution<> distrib(0, 1650000); + random_device rd; + mt19937 gen(rd()); + for (int i = 0; i < TOTAL_READS; i++) + { + random_sector_ids[i] = distrib(gen); + } + + int no_of_reads = TOTAL_READS / batches_of; + Timer timer; + for (int i = 0; i < no_of_reads; i++) + { + do_reads(reader, read_reqs, i); + } + // cout << "Time taken to read in microseconds: " << timer.elapsed() << endl; + cout << timer.elapsed() << endl; + + reader->close(); +} + +int main(int argc, char* argv[]) +{ + int val = 10; + if (argc >= 2) + { + std::istringstream iss(argv[1]); + + if (iss >> val) + { + // cout << "Got cmd argument" << endl; + } + } + // cout << "Using batches of " << val << endl; + + // cout << "Hello World" << endl; + + do_reads_in_batches_of(val); +} \ No newline at end of file diff --git a/src/windows_aligned_file_reader.cpp b/src/windows_aligned_file_reader.cpp index 3650b928a..1b77d6373 100644 --- a/src/windows_aligned_file_reader.cpp +++ b/src/windows_aligned_file_reader.cpp @@ -18,7 +18,7 @@ void WindowsAlignedFileReader::open(const std::string &fname) m_filename = fname; #endif - this->register_thread(); + // this->register_thread(); } void WindowsAlignedFileReader::close()