@@ -628,7 +628,7 @@ template <typename T, typename LabelT>
628628int build_merged_vamana_index (std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R,
629629 double sampling_rate, double ram_budget, std::string mem_index_path,
630630 std::string medoids_file, std::string centroids_file, size_t build_pq_bytes, bool use_opq,
631- uint32_t num_threads, bool use_filters, const std::string &label_file,
631+ uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
632632 const std::string &labels_to_medoids_file, const std::string &universal_label,
633633 const uint32_t Lf, uint32_t universal_label_num = 0 ,
634634 const char * seller_file_path = nullptr ,
@@ -663,6 +663,10 @@ int build_merged_vamana_index(std::string base_file, diskann::Metric compareMetr
663663 std::make_shared<diskann::IndexWriteParameters>(paras), nullptr ,
664664 defaults::NUM_FROZEN_POINTS_STATIC, false , false , false ,
665665 build_pq_bytes > 0 , build_pq_bytes, use_opq, use_filters);
666+ if (use_integer_labels)
667+ {
668+ _index.enable_integer_label ();
669+ }
666670 if (!use_filters)
667671 _index.build (base_file.c_str (), base_num);
668672 else
@@ -1113,7 +1117,7 @@ void create_disk_layout(const std::string base_file, const std::string mem_index
11131117
11141118template <typename T, typename LabelT>
11151119int build_disk_index (const char *dataFilePath, const char *indexFilePath, const char *indexBuildParameters,
1116- diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters,
1120+ diskann::Metric compareMetric, bool use_opq, const std::string &codebook_prefix, bool use_filters, bool use_integer_labels,
11171121 const std::string &label_file, const std::string &universal_label, const uint32_t filter_threshold,
11181122 const uint32_t Lf,
11191123 const char * reorderDataFilePath,
@@ -1203,6 +1207,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
12031207 std::string mem_labels_file = mem_index_path + " _labels.txt" ;
12041208 std::string disk_labels_file = disk_index_path + " _labels.txt" ;
12051209 std::string disk_bitmask_labels_file = disk_index_path + " _bitmask_labels.bin" ;
1210+ std::string disk_integer_labels_file = disk_index_path + " _integer_labels.bin" ;
12061211 std::string mem_univ_label_file = mem_index_path + " _universal_label.txt" ;
12071212 std::string disk_univ_label_file = disk_index_path + " _universal_label.txt" ;
12081213 std::string disk_labels_int_map_file = disk_index_path + " _labels_map.txt" ;
@@ -1346,7 +1351,7 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
13461351 timer.reset ();
13471352 diskann::build_merged_vamana_index<T, LabelT>(data_file_to_use.c_str (), diskann::Metric::L2, L, R, p_val,
13481353 indexing_ram_budget, mem_index_path, medoids_path, centroids_path,
1349- build_pq_bytes, use_opq, num_threads, use_filters, labels_file_to_use,
1354+ build_pq_bytes, use_opq, num_threads, use_filters, use_integer_labels, labels_file_to_use,
13501355 labels_to_medoids_path, universal_label, Lf, universal_label_id,
13511356 sellerFilePath, num_diverse_build);
13521357 diskann::cout << timer.elapsed_seconds_for_step (" building merged vamana index" ) << std::endl;
@@ -1388,6 +1393,14 @@ int build_disk_index(const char *dataFilePath, const char *indexFilePath, const
13881393 std::remove (bitmask_label_file.c_str ());
13891394 }
13901395
1396+ // rename integer label file
1397+ std::string integer_label_file = std::string (mem_index_path) + " _integer_labels.bin" ;
1398+ if (file_exists (integer_label_file))
1399+ {
1400+ copy_file (integer_label_file, disk_integer_labels_file);
1401+ std::remove (integer_label_file.c_str ());
1402+ }
1403+
13911404 std::remove (augmented_data_file.c_str ());
13921405 std::remove (augmented_labels_file.c_str ());
13931406 std::remove (labels_file_to_use.c_str ());
@@ -1475,7 +1488,7 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint32_t>(const char *da
14751488 const char *indexBuildParameters,
14761489 diskann::Metric compareMetric, bool use_opq,
14771490 const std::string &codebook_prefix, bool use_filters,
1478- const std::string &label_file,
1491+ bool use_integer_labels, const std::string &label_file,
14791492 const std::string &universal_label,
14801493 const uint32_t filter_threshold, const uint32_t Lf,
14811494 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1484,7 +1497,7 @@ template DISKANN_DLLEXPORT int build_disk_index<uint8_t, uint32_t>(const char *d
14841497 const char *indexBuildParameters,
14851498 diskann::Metric compareMetric, bool use_opq,
14861499 const std::string &codebook_prefix, bool use_filters,
1487- const std::string &label_file,
1500+ bool use_integer_labels, const std::string &label_file,
14881501 const std::string &universal_label,
14891502 const uint32_t filter_threshold, const uint32_t Lf,
14901503 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1493,7 +1506,7 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint32_t>(const char *dat
14931506 const char *indexBuildParameters,
14941507 diskann::Metric compareMetric, bool use_opq,
14951508 const std::string &codebook_prefix, bool use_filters,
1496- const std::string &label_file,
1509+ bool use_integer_labels, const std::string &label_file,
14971510 const std::string &universal_label,
14981511 const uint32_t filter_threshold, const uint32_t Lf,
14991512 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1503,7 +1516,7 @@ template DISKANN_DLLEXPORT int build_disk_index<int8_t, uint16_t>(const char *da
15031516 const char *indexBuildParameters,
15041517 diskann::Metric compareMetric, bool use_opq,
15051518 const std::string &codebook_prefix, bool use_filters,
1506- const std::string &label_file,
1519+ bool use_integer_labels, const std::string &label_file,
15071520 const std::string &universal_label,
15081521 const uint32_t filter_threshold, const uint32_t Lf,
15091522 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1512,7 +1525,7 @@ template DISKANN_DLLEXPORT int build_disk_index<uint8_t, uint16_t>(const char *d
15121525 const char *indexBuildParameters,
15131526 diskann::Metric compareMetric, bool use_opq,
15141527 const std::string &codebook_prefix, bool use_filters,
1515- const std::string &label_file,
1528+ bool use_integer_labels, const std::string &label_file,
15161529 const std::string &universal_label,
15171530 const uint32_t filter_threshold, const uint32_t Lf,
15181531 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1521,7 +1534,7 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint16_t>(const char *dat
15211534 const char *indexBuildParameters,
15221535 diskann::Metric compareMetric, bool use_opq,
15231536 const std::string &codebook_prefix, bool use_filters,
1524- const std::string &label_file,
1537+ bool use_integer_labels, const std::string &label_file,
15251538 const std::string &universal_label,
15261539 const uint32_t filter_threshold, const uint32_t Lf,
15271540 const char * reorderDataFilePath, const char * sellerFilePath,
@@ -1530,32 +1543,32 @@ template DISKANN_DLLEXPORT int build_disk_index<float, uint16_t>(const char *dat
15301543template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t , uint32_t >(
15311544 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15321545 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1533- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1546+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15341547 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15351548template DISKANN_DLLEXPORT int build_merged_vamana_index<float , uint32_t >(
15361549 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15371550 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1538- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1551+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15391552 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15401553template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t , uint32_t >(
15411554 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15421555 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1543- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1556+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15441557 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15451558// Label=16_t
15461559template DISKANN_DLLEXPORT int build_merged_vamana_index<int8_t , uint16_t >(
15471560 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15481561 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1549- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1562+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15501563 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15511564template DISKANN_DLLEXPORT int build_merged_vamana_index<float , uint16_t >(
15521565 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15531566 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1554- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1567+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15551568 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15561569template DISKANN_DLLEXPORT int build_merged_vamana_index<uint8_t , uint16_t >(
15571570 std::string base_file, diskann::Metric compareMetric, uint32_t L, uint32_t R, double sampling_rate,
15581571 double ram_budget, std::string mem_index_path, std::string medoids_path, std::string centroids_file,
1559- size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, const std::string &label_file,
1572+ size_t build_pq_bytes, bool use_opq, uint32_t num_threads, bool use_filters, bool use_integer_labels, const std::string &label_file,
15601573 const std::string &labels_to_medoids_file, const std::string &universal_label, const uint32_t Lf);
15611574}; // namespace diskann
0 commit comments