|
21 | 21 | package com.apple.foundationdb.record.metadata; |
22 | 22 |
|
23 | 23 | import com.apple.foundationdb.annotation.API; |
| 24 | +import com.apple.foundationdb.async.hnsw.Config; |
24 | 25 | import com.apple.foundationdb.async.rtree.RTree; |
25 | 26 | import com.apple.foundationdb.record.provider.foundationdb.IndexMaintainer; |
26 | 27 |
|
@@ -223,6 +224,143 @@ public class IndexOptions { |
223 | 224 | */ |
224 | 225 | public static final String RTREE_USE_NODE_SLOT_INDEX = "rtreeUseNodeSlotIndex"; |
225 | 226 |
|
| 227 | + /** |
| 228 | + * HNSW-only: The random seed that is used to probabilistically determine the highest layer of an insert into an |
| 229 | + * HNSW structure. See {@link Config#getRandomSeed()}. The default random seed is |
| 230 | + * {@link Config#DEFAULT_RANDOM_SEED}. |
| 231 | + */ |
| 232 | + public static final String HNSW_RANDOM_SEED = "hnswRandomSeed"; |
| 233 | + |
| 234 | + /** |
| 235 | + * HNSW-only: The metric that is used to determine distances between vectors. The default metric is |
| 236 | + * {@link Config#DEFAULT_METRIC}. See {@link Config#getMetric()}. |
| 237 | + */ |
| 238 | + public static final String HNSW_METRIC = "hnswMetric"; |
| 239 | + |
| 240 | + /** |
| 241 | + * HNSW-only: The number of dimensions used. All vectors must have exactly this number of dimensions. This option |
| 242 | + * must be set when interacting with a vector index as it there is no default. |
| 243 | + * See {@link Config#getNumDimensions()}. |
| 244 | + */ |
| 245 | + public static final String HNSW_NUM_DIMENSIONS = "hnswNumDimensions"; |
| 246 | + |
| 247 | + /** |
| 248 | + * HNSW-only: Indicator if all layers except layer {@code 0} use inlining. If inlining is used, each node is |
| 249 | + * persisted as a key/value pair per neighbor which includes the vectors of the neighbors but not for itself. If |
| 250 | + * inlining is not used, each node is persisted as exactly one key/value pair per node which stores its own vector |
| 251 | + * but specifically excludes the vectors of the neighbors. The default value is set to |
| 252 | + * {@link Config#DEFAULT_USE_INLINING}. See {@link Config#isUseInlining()}. |
| 253 | + */ |
| 254 | + public static final String HNSW_USE_INLINING = "hnswUseInlining"; |
| 255 | + |
| 256 | + /** |
| 257 | + * HNSW-only: This option (named {@code M} by the HNSW paper) is the connectivity value for all nodes stored on |
| 258 | + * any layer. While by no means enforced or even enforceable, we strive to create and maintain exactly {@code m} |
| 259 | + * neighbors for a node. Due to insert/delete operations it is possible that the actual number of neighbors a node |
| 260 | + * references is not exactly {@code m} at any given time. The default value is set to {@link Config#DEFAULT_M}. |
| 261 | + * See {@link Config#getM()}. |
| 262 | + */ |
| 263 | + public static final String HNSW_M = "hnswM"; |
| 264 | + |
| 265 | + /** |
| 266 | + * HNSW-only: This attribute (named {@code M_max} by the HNSW paper) is the maximum connectivity value for nodes |
| 267 | + * stored on a layer greater than {@code 0}. A node can never have more that {@code mMax} neighbors. That means that |
| 268 | + * neighbors of a node are pruned if the actual number of neighbors would otherwise exceed {@code mMax}. Note that |
| 269 | + * this option must be greater than or equal to {@link #HNSW_M}. The default value is set to |
| 270 | + * {@link Config#DEFAULT_M_MAX}. See {@link Config#getMMax()}. |
| 271 | + */ |
| 272 | + public static final String HNSW_M_MAX = "hnswMax"; |
| 273 | + |
| 274 | + /** |
| 275 | + * HNSW-only: This option (named {@code M_max0} by the HNSW paper) is the maximum connectivity value for nodes |
| 276 | + * stored on layer {@code 0}. We will never create more that {@code mMax0} neighbors for a node that is stored on |
| 277 | + * that layer. That means that we even prune the neighbors of a node if the actual number of neighbors would |
| 278 | + * otherwise exceed {@code mMax0}. Note that this option must be greater than or equal to {@link #HNSW_M_MAX}. |
| 279 | + * The default value is set to {@link Config#DEFAULT_M_MAX_0}. See {@link Config#getMMax0()}. |
| 280 | + */ |
| 281 | + public static final String HNSW_M_MAX_0 = "hnswMax0"; |
| 282 | + |
| 283 | + /** |
| 284 | + * HNSW-only: Maximum size of the search queues (one independent queue per layer) that are used during the insertion |
| 285 | + * of a new node. If {@code HNSW_EF_CONSTRUCTION} is set to {@code 1}, the search naturally follows a greedy |
| 286 | + * approach (monotonous descent), whereas a high number for {@code HNSW_EF_CONSTRUCTION} allows for a more nuanced |
| 287 | + * search that can tolerate (false) local minima. The default value is set to {@link Config#DEFAULT_EF_CONSTRUCTION}. |
| 288 | + * See {@link Config#getEfConstruction()}. |
| 289 | + */ |
| 290 | + public static final String HNSW_EF_CONSTRUCTION = "hnswEfConstruction"; |
| 291 | + |
| 292 | + /** |
| 293 | + * HNSW-only: Indicator to signal if, during the insertion of a node, the set of nearest neighbors of that node is |
| 294 | + * to be extended by the actual neighbors of those neighbors to form a set of candidates that the new node may be |
| 295 | + * connected to during the insert operation. The default value is set to {@link Config#DEFAULT_EXTEND_CANDIDATES}. |
| 296 | + * See {@link Config#isExtendCandidates()}. |
| 297 | + */ |
| 298 | + public static final String HNSW_EXTEND_CANDIDATES = "hnswExtendCandidates"; |
| 299 | + |
| 300 | + /** |
| 301 | + * HNSW-only: Indicator to signal if, during the insertion of a node, candidates that have been discarded due to not |
| 302 | + * satisfying the select-neighbor heuristic may get added back in to pad the set of neighbors if the new node would |
| 303 | + * otherwise have too few neighbors (see {@link Config#getM()}). The default value is set to |
| 304 | + * {@link Config#DEFAULT_KEEP_PRUNED_CONNECTIONS}. See {@link Config#isKeepPrunedConnections()}. |
| 305 | + */ |
| 306 | + public static final String HNSW_KEEP_PRUNED_CONNECTIONS = "hnswKeepPrunedConnections"; |
| 307 | + |
| 308 | + /** |
| 309 | + * HNSW-only: If sampling is necessary (currently iff {@link #HNSW_USE_RABITQ} is {@code "true"}), this option |
| 310 | + * represents the probability of a vector being inserted to also be written into the samples subspace of the hnsw |
| 311 | + * structure. The vectors in that subspace are continuously aggregated until a total {@link #HNSW_STATS_THRESHOLD} |
| 312 | + * has been reached. The default value is set to {@link Config#DEFAULT_SAMPLE_VECTOR_STATS_PROBABILITY}. See |
| 313 | + * {@link Config#getSampleVectorStatsProbability()}. |
| 314 | + */ |
| 315 | + public static final String HNSW_SAMPLE_VECTOR_STATS_PROBABILITY = "hnswSampleVectorStatsProbability"; |
| 316 | + |
| 317 | + /** |
| 318 | + * HNSW-only: If sampling is necessary (currently iff {@link #HNSW_USE_RABITQ} is {@code "true"}), this option |
| 319 | + * represents the probability of the samples subspace to be further aggregated (rolled-up) when a new vector is |
| 320 | + * inserted. The vectors in that subspace are continuously aggregated until a total |
| 321 | + * {@link #HNSW_STATS_THRESHOLD} has been reached. The default value is set to |
| 322 | + * {@link Config#DEFAULT_MAINTAIN_STATS_PROBABILITY}. See {@link Config#getMaintainStatsProbability()}. |
| 323 | + */ |
| 324 | + public static final String HNSW_MAINTAIN_STATS_PROBABILITY = "hnswMaintainStatsProbability"; |
| 325 | + |
| 326 | + /** |
| 327 | + * HNSW-only: If sampling is necessary (currently iff {@link #HNSW_USE_RABITQ} is {@code "true"}), this option |
| 328 | + * represents the threshold (being a number of vectors) that when reached causes the stats maintenance logic to |
| 329 | + * compute the actual statistics (currently the centroid of the vectors that have been inserted to far). The result |
| 330 | + * is then inserted into the access info subspace of the index. The default value is set to |
| 331 | + * {@link Config#DEFAULT_STATS_THRESHOLD}. See {@link Config#getStatsThreshold()}. |
| 332 | + */ |
| 333 | + public static final String HNSW_STATS_THRESHOLD = "hnswStatsThreshold"; |
| 334 | + |
| 335 | + /** |
| 336 | + * HNSW-only: Indicator if we should RaBitQ quantization. See {@link com.apple.foundationdb.rabitq.RaBitQuantizer} |
| 337 | + * for more details. The default value is set to {@link Config#DEFAULT_USE_RABITQ}. |
| 338 | + * See {@link Config#isUseRaBitQ()}. |
| 339 | + */ |
| 340 | + public static final String HNSW_USE_RABITQ = "hnswUseRaBitQ"; |
| 341 | + |
| 342 | + /** |
| 343 | + * HNSW-only: Number of bits per dimensions iff {@link #HNSW_USE_RABITQ} is set to {@code "true"}, ignored |
| 344 | + * otherwise. If RaBitQ encoding is used, a vector is stored using roughly |
| 345 | + * {@code 25 + numDimensions * (numExBits + 1) / 8} bytes. The default value is set to |
| 346 | + * {@link Config#DEFAULT_RABITQ_NUM_EX_BITS}. See {@link Config#getRaBitQNumExBits()}. |
| 347 | + */ |
| 348 | + public static final String HNSW_RABITQ_NUM_EX_BITS = "hnswRaBitQNumExBits"; |
| 349 | + |
| 350 | + /** |
| 351 | + * HNSW-only: Maximum number of concurrent node fetches during search and modification operations. The default value |
| 352 | + * is set to {@link Config#DEFAULT_MAX_NUM_CONCURRENT_NODE_FETCHES}. |
| 353 | + * See {@link Config#getMaxNumConcurrentNodeFetches()}. |
| 354 | + */ |
| 355 | + public static final String HNSW_MAX_NUM_CONCURRENT_NODE_FETCHES = "hnswMaxNumConcurrentNodeFetches"; |
| 356 | + |
| 357 | + /** |
| 358 | + * HNSW-only: Maximum number of concurrent neighborhood fetches during modification operations when the neighbors |
| 359 | + * are pruned. The default value is set to {@link Config#DEFAULT_MAX_NUM_CONCURRENT_NEIGHBOR_FETCHES}. |
| 360 | + * See {@link Config#getMaxNumConcurrentNeighborhoodFetches()}. |
| 361 | + */ |
| 362 | + public static final String HNSW_MAX_NUM_CONCURRENT_NEIGHBORHOOD_FETCHES = "hnswMaxNumConcurrentNeighborhoodFetches"; |
| 363 | + |
226 | 364 | private IndexOptions() { |
227 | 365 | } |
228 | 366 | } |
0 commit comments