Skip to content

Commit d54db94

Browse files
committed
fix ssd index building
1 parent 595f3a0 commit d54db94

File tree

2 files changed

+30
-17
lines changed

2 files changed

+30
-17
lines changed

include/disk_utils.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ DISKANN_DLLEXPORT int build_merged_vamana_index(std::string base_file, diskann::
7979
uint32_t R, double sampling_rate, double ram_budget,
8080
std::string mem_index_path, std::string medoids_file,
8181
std::string centroids_file, size_t build_pq_bytes, bool use_opq,
82-
uint32_t num_threads, bool use_filters = false,
82+
uint32_t num_threads, bool use_filters = false, bool use_integer_labels = false,
8383
const std::string &label_file = std::string(""),
8484
const std::string &labels_to_medoids_file = std::string(""),
8585
const std::string &universal_label = "", const uint32_t Lf = 0);
@@ -95,7 +95,7 @@ DISKANN_DLLEXPORT int build_disk_index(
9595
const char *dataFilePath, const char *indexFilePath, const char *indexBuildParameters,
9696
diskann::Metric _compareMetric, bool use_opq = false,
9797
const std::string &codebook_prefix = "", // default is empty for no codebook pass in
98-
bool use_filters = false,
98+
bool use_filters = false, bool use_integer_labels = false,
9999
const std::string &label_file = std::string(""), // default is empty string for no label_file
100100
const std::string &universal_label = "", const uint32_t filter_threshold = 0,
101101
const uint32_t Lf = 0,

src/disk_utils.cpp

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ template <typename T, typename LabelT>
628628
int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R,
629629
double sampling_rate, double ram_budget, std::string mem_index_path,
630630
std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq,
631-
uint32_t num_threads, bool use_filters, const std::string &label_file,
631+
uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
632632
const std::string &labels_to_medoids_file, const std::string &universal_label,
633633
const uint32_t Lf, uint32_t universal_label_num = 0,
634634
const char* seller_file_path = nullptr,
@@ -663,6 +663,10 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
663663
std::make_shared<diskann::IndexWriteParameters>(paras), nullptr,
664664
defaults::NUM_FROZEN_POINTS_STATIC, false, false, false,
665665
build_pq_bytes > 0, build_pq_bytes, use_opq, use_filters);
666+
if (use_integer_labels)
667+
{
668+
_index.enable_integer_label();
669+
}
666670
if (!use_filters)
667671
_index.build(base_file.c_str(), base_num);
668672
else
@@ -1113,7 +1117,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index
11131117

11141118
template <typename T, typename LabelT>
11151119
int build_disk_index(const char *dataFilePath, const char *indexFilePath, const char *indexBuildParameters,
1116-
diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters,
1120+
diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters, bool use_integer_labels,
11171121
const std::string &label_file, const std::string &universal_label, const uint32_t filter_threshold,
11181122
const uint32_t Lf,
11191123
const char* reorderDataFilePath,
@@ -1203,6 +1207,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
12031207
std::string mem_labels_file = mem_index_path + "_labels.txt";
12041208
std::string disk_labels_file = disk_index_path + "_labels.txt";
12051209
std::string disk_bitmask_labels_file = disk_index_path + "_bitmask_labels.bin";
1210+
std::string disk_integer_labels_file = disk_index_path + "_integer_labels.bin";
12061211
std::string mem_univ_label_file = mem_index_path + "_universal_label.txt";
12071212
std::string disk_univ_label_file = disk_index_path + "_universal_label.txt";
12081213
std::string disk_labels_int_map_file = disk_index_path + "_labels_map.txt";
@@ -1346,7 +1351,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
13461351
timer.reset();
13471352
diskann::build_merged_vamana_index<T, LabelT>(data_file_to_use.c_str(), diskann::Metric::L2, L, R, p_val,
13481353
indexing_ram_budget, mem_index_path, medoids_path, centroids_path,
1349-
build_pq_bytes, use_opq, num_threads, use_filters, labels_file_to_use,
1354+
build_pq_bytes, use_opq, num_threads, use_filters, use_integer_labels, labels_file_to_use,
13501355
labels_to_medoids_path, universal_label, Lf, universal_label_id,
13511356
sellerFilePath, num_diverse_build);
13521357
diskann::cout << timer.elapsed_seconds_for_step("building merged vamana index") << std::endl;
@@ -1388,6 +1393,14 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
13881393
std::remove(bitmask_label_file.c_str());
13891394
}
13901395

1396+
// rename integer label file
1397+
std::string integer_label_file = std::string(mem_index_path) + "_integer_labels.bin";
1398+
if (file_exists(integer_label_file))
1399+
{
1400+
copy_file(integer_label_file, disk_integer_labels_file);
1401+
std::remove(integer_label_file.c_str());
1402+
}
1403+
13911404
std::remove(augmented_data_file.c_str());
13921405
std::remove(augmented_labels_file.c_str());
13931406
std::remove(labels_file_to_use.c_str());
@@ -1475,7 +1488,7 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint32_t>(const char *da
14751488
const char *indexBuildParameters,
14761489
diskann::Metric compareMetric, bool use_opq,
14771490
const std::string &codebook_prefix, bool use_filters,
1478-
const std::string &label_file,
1491+
bool use_integer_labels, const std::string &label_file,
14791492
const std::string &universal_label,
14801493
const uint32_t filter_threshold, const uint32_t Lf,
14811494
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1484,7 +1497,7 @@ template DISKANN_DLLEXPORT int build_disk_index<uint8_t, uint32_t>(const char *d
14841497
const char *indexBuildParameters,
14851498
diskann::Metric compareMetric, bool use_opq,
14861499
const std::string &codebook_prefix, bool use_filters,
1487-
const std::string &label_file,
1500+
bool use_integer_labels, const std::string &label_file,
14881501
const std::string &universal_label,
14891502
const uint32_t filter_threshold, const uint32_t Lf,
14901503
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1493,7 +1506,7 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint32_t>(const char *dat
14931506
const char *indexBuildParameters,
14941507
diskann::Metric compareMetric, bool use_opq,
14951508
const std::string &codebook_prefix, bool use_filters,
1496-
const std::string &label_file,
1509+
bool use_integer_labels, const std::string &label_file,
14971510
const std::string &universal_label,
14981511
const uint32_t filter_threshold, const uint32_t Lf,
14991512
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1503,7 +1516,7 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint16_t>(const char *da
15031516
const char *indexBuildParameters,
15041517
diskann::Metric compareMetric, bool use_opq,
15051518
const std::string &codebook_prefix, bool use_filters,
1506-
const std::string &label_file,
1519+
bool use_integer_labels, const std::string &label_file,
15071520
const std::string &universal_label,
15081521
const uint32_t filter_threshold, const uint32_t Lf,
15091522
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1512,7 +1525,7 @@ template DISKANN_DLLEXPORT int build_disk_index<uint8_t, uint16_t>(const char *d
15121525
const char *indexBuildParameters,
15131526
diskann::Metric compareMetric, bool use_opq,
15141527
const std::string &codebook_prefix, bool use_filters,
1515-
const std::string &label_file,
1528+
bool use_integer_labels, const std::string &label_file,
15161529
const std::string &universal_label,
15171530
const uint32_t filter_threshold, const uint32_t Lf,
15181531
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1521,7 +1534,7 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint16_t>(const char *dat
15211534
const char *indexBuildParameters,
15221535
diskann::Metric compareMetric, bool use_opq,
15231536
const std::string &codebook_prefix, bool use_filters,
1524-
const std::string &label_file,
1537+
bool use_integer_labels, const std::string &label_file,
15251538
const std::string &universal_label,
15261539
const uint32_t filter_threshold, const uint32_t Lf,
15271540
const char* reorderDataFilePath, const char* sellerFilePath,
@@ -1530,32 +1543,32 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint16_t>(const char *dat
15301543
template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t, uint32_t>(
15311544
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15321545
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1533-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1546+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15341547
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15351548
template DISKANN_DLLEXPORT int build_merged_vamana_index<float, uint32_t>(
15361549
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15371550
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1538-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1551+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15391552
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15401553
template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t, uint32_t>(
15411554
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15421555
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1543-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1556+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15441557
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15451558
// Label=16_t
15461559
template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t, uint16_t>(
15471560
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15481561
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1549-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1562+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15501563
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15511564
template DISKANN_DLLEXPORT int build_merged_vamana_index<float, uint16_t>(
15521565
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15531566
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1554-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1567+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15551568
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15561569
template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t, uint16_t>(
15571570
std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15581571
double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1559-
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1572+
size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15601573
const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15611574
}; // namespace diskann

0 commit comments

Comments
 (0)