|
| 1 | +package org.fastfilter.xor; |
| 2 | + |
| 3 | +import java.util.Locale; |
| 4 | + |
| 5 | +import org.fastfilter.utils.Hash; |
| 6 | +import org.fastfilter.utils.RandomGenerator; |
| 7 | + |
| 8 | +public class ProbabilityFuse { |
| 9 | + |
| 10 | + private static final int HASHES = 3; |
| 11 | + private static final int FUSE_ARITY = 3; |
| 12 | + private static final int FUSE_SEGMENT_COUNT = 100; |
| 13 | + private static final int FUSE_SLOTS = FUSE_SEGMENT_COUNT + FUSE_ARITY - 1; |
| 14 | + |
| 15 | + public static void main(String... args) { |
| 16 | + for(int size = 100; size < 200000; size *= 1.1) { |
| 17 | + System.out.print("size " + size); |
| 18 | + double start = Math.max(0.1, Math.min(0.8, Math.log10(size / 100) /4)); |
| 19 | + for(double factor = start + 0.1; factor > 0.0; factor -= 0.01) { |
| 20 | + int successCount = 0; |
| 21 | + int testCount = 100; |
| 22 | + for(int seed = 0; seed < testCount; seed++) { |
| 23 | + long[] keys = new long[size]; |
| 24 | + RandomGenerator.createRandomUniqueListFast(keys, seed); |
| 25 | + boolean success = testMapping(keys, factor, seed); |
| 26 | + if (success) { |
| 27 | + successCount++; |
| 28 | + } |
| 29 | + } |
| 30 | + double p = 1.0 * successCount / testCount; |
| 31 | + if (p > 0.9 || factor < 0.15) { |
| 32 | + System.out.printf(Locale.ENGLISH, " %2.2f %2.2f", factor, start); |
| 33 | + break; |
| 34 | + } |
| 35 | + } |
| 36 | + System.out.println(); |
| 37 | + } |
| 38 | + } |
| 39 | + |
| 40 | + public static boolean testMapping(long[] keys, double factor, long seed) { |
| 41 | + int size = keys.length; |
| 42 | + int arrayLength = getArrayLength(size, factor); |
| 43 | + int segmentLength = arrayLength / FUSE_SLOTS; |
| 44 | + int m = arrayLength; |
| 45 | + long[] reverseOrder = new long[size]; |
| 46 | + byte[] reverseH = new byte[size]; |
| 47 | + int reverseOrderPos; |
| 48 | + seed = Hash.randomSeed(); |
| 49 | + byte[] t2count = new byte[m]; |
| 50 | + long[] t2 = new long[m]; |
| 51 | + for (long k : keys) { |
| 52 | + for (int hi = 0; hi < HASHES; hi++) { |
| 53 | + int h = getHash(segmentLength, k, seed, hi); |
| 54 | + t2[h] ^= k; |
| 55 | + if (t2count[h] > 120) { |
| 56 | + // probably something wrong with the hash function |
| 57 | + throw new IllegalArgumentException(); |
| 58 | + } |
| 59 | + t2count[h]++; |
| 60 | + } |
| 61 | + } |
| 62 | + reverseOrderPos = 0; |
| 63 | + int[] alone = new int[arrayLength]; |
| 64 | + int alonePos = 0; |
| 65 | + for (int i = 0; i < arrayLength; i++) { |
| 66 | + if (t2count[ i] == 1) { |
| 67 | + alone[alonePos++] = i; |
| 68 | + } |
| 69 | + } |
| 70 | + int found = -1; |
| 71 | + while (alonePos > 0) { |
| 72 | + int i = alone[--alonePos]; |
| 73 | + if (t2count[i] <= 0) { |
| 74 | + continue; |
| 75 | + } |
| 76 | + if (t2count[i] != 1) { |
| 77 | + throw new AssertionError(); |
| 78 | + } |
| 79 | + --t2count[i]; |
| 80 | + long k = t2[i]; |
| 81 | + for (int hi = 0; hi < HASHES; hi++) { |
| 82 | + int h = getHash(segmentLength, k, seed, hi); |
| 83 | + int newCount = --t2count[h]; |
| 84 | + if (h == i) { |
| 85 | + found = hi; |
| 86 | + } else { |
| 87 | + if (newCount == 1) { |
| 88 | + alone[alonePos++] = h; |
| 89 | + } |
| 90 | + t2[h] ^= k; |
| 91 | + } |
| 92 | + } |
| 93 | + reverseOrder[reverseOrderPos] = k; |
| 94 | + reverseH[reverseOrderPos] = (byte) found; |
| 95 | + reverseOrderPos++; |
| 96 | + } |
| 97 | + return reverseOrderPos == size; |
| 98 | + } |
| 99 | + |
| 100 | + private static int getHash(int segmentLength, long key, long seed, int index) { |
| 101 | + long hash = Hash.hash64(key, seed); |
| 102 | + int r0 = (int) ((0xBF58476D1CE4E5B9L * hash) >> 32); |
| 103 | + int seg = Hash.reduce(r0, FUSE_SEGMENT_COUNT); |
| 104 | + int r = (int) Long.rotateLeft(hash, 21 * index); |
| 105 | + return (seg + index) * segmentLength + Hash.reduce(r, segmentLength); |
| 106 | + } |
| 107 | + |
| 108 | + private static int getArrayLength(int size, double factor) { |
| 109 | + int capacity = (int) (1.0 / factor * size) + 64; |
| 110 | + capacity = (capacity + FUSE_SLOTS - 1) / FUSE_SLOTS * FUSE_SLOTS; |
| 111 | + return capacity; |
| 112 | + } |
| 113 | + |
| 114 | +} |
0 commit comments