Skip to content

Commit 3295a0e

Browse files
committed
Fuse Filters #21
1 parent 5eeaa9b commit 3295a0e

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package org.fastfilter.xor;
2+
3+
import java.util.Locale;
4+
5+
import org.fastfilter.utils.Hash;
6+
import org.fastfilter.utils.RandomGenerator;
7+
8+
public class ProbabilityFuse {
9+
10+
private static final int HASHES = 3;
11+
private static final int FUSE_ARITY = 3;
12+
private static final int FUSE_SEGMENT_COUNT = 100;
13+
private static final int FUSE_SLOTS = FUSE_SEGMENT_COUNT + FUSE_ARITY - 1;
14+
15+
public static void main(String... args) {
16+
for(int size = 100; size < 200000; size *= 1.1) {
17+
System.out.print("size " + size);
18+
double start = Math.max(0.1, Math.min(0.8, Math.log10(size / 100) /4));
19+
for(double factor = start + 0.1; factor > 0.0; factor -= 0.01) {
20+
int successCount = 0;
21+
int testCount = 100;
22+
for(int seed = 0; seed < testCount; seed++) {
23+
long[] keys = new long[size];
24+
RandomGenerator.createRandomUniqueListFast(keys, seed);
25+
boolean success = testMapping(keys, factor, seed);
26+
if (success) {
27+
successCount++;
28+
}
29+
}
30+
double p = 1.0 * successCount / testCount;
31+
if (p > 0.9 || factor < 0.15) {
32+
System.out.printf(Locale.ENGLISH, " %2.2f %2.2f", factor, start);
33+
break;
34+
}
35+
}
36+
System.out.println();
37+
}
38+
}
39+
40+
public static boolean testMapping(long[] keys, double factor, long seed) {
41+
int size = keys.length;
42+
int arrayLength = getArrayLength(size, factor);
43+
int segmentLength = arrayLength / FUSE_SLOTS;
44+
int m = arrayLength;
45+
long[] reverseOrder = new long[size];
46+
byte[] reverseH = new byte[size];
47+
int reverseOrderPos;
48+
seed = Hash.randomSeed();
49+
byte[] t2count = new byte[m];
50+
long[] t2 = new long[m];
51+
for (long k : keys) {
52+
for (int hi = 0; hi < HASHES; hi++) {
53+
int h = getHash(segmentLength, k, seed, hi);
54+
t2[h] ^= k;
55+
if (t2count[h] > 120) {
56+
// probably something wrong with the hash function
57+
throw new IllegalArgumentException();
58+
}
59+
t2count[h]++;
60+
}
61+
}
62+
reverseOrderPos = 0;
63+
int[] alone = new int[arrayLength];
64+
int alonePos = 0;
65+
for (int i = 0; i < arrayLength; i++) {
66+
if (t2count[ i] == 1) {
67+
alone[alonePos++] = i;
68+
}
69+
}
70+
int found = -1;
71+
while (alonePos > 0) {
72+
int i = alone[--alonePos];
73+
if (t2count[i] <= 0) {
74+
continue;
75+
}
76+
if (t2count[i] != 1) {
77+
throw new AssertionError();
78+
}
79+
--t2count[i];
80+
long k = t2[i];
81+
for (int hi = 0; hi < HASHES; hi++) {
82+
int h = getHash(segmentLength, k, seed, hi);
83+
int newCount = --t2count[h];
84+
if (h == i) {
85+
found = hi;
86+
} else {
87+
if (newCount == 1) {
88+
alone[alonePos++] = h;
89+
}
90+
t2[h] ^= k;
91+
}
92+
}
93+
reverseOrder[reverseOrderPos] = k;
94+
reverseH[reverseOrderPos] = (byte) found;
95+
reverseOrderPos++;
96+
}
97+
return reverseOrderPos == size;
98+
}
99+
100+
private static int getHash(int segmentLength, long key, long seed, int index) {
101+
long hash = Hash.hash64(key, seed);
102+
int r0 = (int) ((0xBF58476D1CE4E5B9L * hash) >> 32);
103+
int seg = Hash.reduce(r0, FUSE_SEGMENT_COUNT);
104+
int r = (int) Long.rotateLeft(hash, 21 * index);
105+
return (seg + index) * segmentLength + Hash.reduce(r, segmentLength);
106+
}
107+
108+
private static int getArrayLength(int size, double factor) {
109+
int capacity = (int) (1.0 / factor * size) + 64;
110+
capacity = (capacity + FUSE_SLOTS - 1) / FUSE_SLOTS * FUSE_SLOTS;
111+
return capacity;
112+
}
113+
114+
}

0 commit comments

Comments
 (0)