From 911eb40a15fe0f81fb021e026fc0084339661f92 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 09:58:53 +0800 Subject: [PATCH 1/9] Add bootstrap script for intializing TDD MakefileTest.am The goal is to be able to do something like this: $ make check If something breaks $ prove test --- autogen.sh | 3 +++ test/.gitignore | 2 ++ test/Makefile.am | 2 ++ test/MakefileTest.am.stub | 3 +++ test/README.tdd | 15 +++++++++++++++ test/bootstrap.sh | 26 ++++++++++++++++++++++++++ 6 files changed, 51 insertions(+) create mode 100644 test/.gitignore create mode 100644 test/MakefileTest.am.stub create mode 100644 test/README.tdd create mode 100755 test/bootstrap.sh diff --git a/autogen.sh b/autogen.sh index 3666e6f..75ca7f2 100755 --- a/autogen.sh +++ b/autogen.sh @@ -19,6 +19,9 @@ DIE=0 cd modules; ./build_modules.sh bootstrap cd ../ +cd test +./bootstrap.sh +cd ../ echo "running... [ "${BLD_SRT}"libtoolize"${BLD_END}" ]" (libtoolize --automake || glibtoolize --automake) diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 0000000..c8a0ce0 --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,2 @@ +MakefileTest.am +*.t diff --git a/test/Makefile.am b/test/Makefile.am index cfa0c35..88472ca 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -4,6 +4,8 @@ ferite_SOURCES = main.c dot.c dot.h cache_ferite_SOURCES = cache-ferite.c ferite_script_SOURCES = main_script.c dot.c dot.h +include $(srcdir)/MakefileTest.am + EXTRA_DIST = dot.h ferite_DEPENDENCIES = $(top_builddir)/src/libferite.la diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub new file mode 100644 index 0000000..696c2c4 --- /dev/null +++ b/test/MakefileTest.am.stub @@ -0,0 +1,3 @@ +TEST_EXTENSIONS = .t +TESTS = +check_PROGRAMS = diff --git a/test/README.tdd b/test/README.tdd new file mode 100644 index 0000000..6dff168 --- /dev/null +++ b/test/README.tdd @@ -0,0 +1,15 @@ +To add a new test file: + +1. Create the new file, say, foo-test.c +2. Edit MakefileTest.am.stub and add this entry: + + TEST: foo-test.c + +3. Run bootstrap.sh to update MakefileTest.am: + + $ ./bootstrap.sh + +4. Finally run make check. This will compile the new test and run it with all + the existing tests: + + $ make check diff --git a/test/bootstrap.sh b/test/bootstrap.sh new file mode 100755 index 0000000..191078f --- /dev/null +++ b/test/bootstrap.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +echo "# This file was generated by bootstrap.sh" > MakefileTest.am +awk ' +/^TEST: .*\.c\>/ { + src = $2 + exe = $2 + derived = $2 + sources = $0 + gsub("TEST: *"src" *", "", sources) + if (length(sources)) + sources = sources " tap.c" + else + sources = sources "tap.c" + + gsub("\\.c$", ".t", exe) + gsub("[-.]", "_", derived) + gsub("_c$", "_t", derived) + + print "TESTS += " exe\ + "\ncheck_PROGRAMS += " exe\ + "\n"derived"_SOURCES = " src" "sources\ + "\n"derived"_DEPENDENCIES = $(top_builddir)/src/libferite.la\n" + next +} +{ print }' MakefileTest.am.stub >> MakefileTest.am From 39ff48fe1e8d6c943bdd96daab0cc250badc4455 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:48:55 +0800 Subject: [PATCH 2/9] Add simple stupid tap producer If the need arises the we should choose a more mature tap producer [1]. [1] http://en.wikipedia.org/wiki/Test_Anything_Protocol#List_of_TAP_producers --- test/tap.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test/tap.h | 11 +++++++ 2 files changed, 97 insertions(+) create mode 100644 test/tap.c create mode 100644 test/tap.h diff --git a/test/tap.c b/test/tap.c new file mode 100644 index 0000000..3bfa776 --- /dev/null +++ b/test/tap.c @@ -0,0 +1,86 @@ +#include +#include "tap.h" + +int ntest = 0; +int nfailed = 0; + +char *OK = "ok"; +char *NOT_OK = "not ok"; + +char *update_counters_and_get_status_str(int status) { + ntest++; + if (!status) { + nfailed++; + return NOT_OK; + } + return OK; +} + +int vok(int status, char *msg, va_list ap) { + char *out = update_counters_and_get_status_str(status); + printf("%s %d - ", out, ntest); + vprintf(msg, ap); + printf("\n"); + return status; +} + +int ok(int status, char *msg, ...) { + va_list ap; + char *out = update_counters_and_get_status_str(status); + printf("%s %d - ", out, ntest); + va_start(ap, msg); + vprintf(msg, ap); + va_end(ap); + printf("\n"); + return status; +} + +int is(int got, int want, char *msg, ...) { + int success = 0; + va_list ap; + if (got == want) { + success = 1; + } + va_start(ap, msg); + vok (success, msg, ap); + va_end(ap); + if (! success) { + printf("want: %d\n", want); + printf(" got: %d\n", got); + } + return success; +} + +int is_str(char *got, char *want, char *msg, ...) { + int success = 0; + va_list ap; + if (got == NULL && want == NULL) { + success = 1; + } else if (got == NULL || want == NULL) { + success = 0; + } else { + success = strcmp(got, want) == 0; + } + va_start(ap, msg); + vok (success, msg, ap); + va_end(ap); + if (! success) { + printf("want: \"%s\"\n", want); + printf(" got: \"%s\"\n", got); + } + return success; +} + +void diag(char *msg, ...) { + va_list ap; + printf("# "); + va_start(ap, msg); + vprintf(msg, ap); + va_end(ap); + printf("\n"); +} + +int done_testing() { + printf("1..%d\n", ntest); + return nfailed != 0; +} diff --git a/test/tap.h b/test/tap.h new file mode 100644 index 0000000..b78fdc9 --- /dev/null +++ b/test/tap.h @@ -0,0 +1,11 @@ +#ifndef TAP_H +#define TAP_H +#include +#include +int vok(int status, char *msg, va_list ap); +int ok(int status, char *msg, ...); +int is_str(char *a, char *b, char *msg, ...); +int is(int a, int b, char *msg, ...); +void diag(char *msg, ...); +int done_testing(); +#endif From 32d6de01c7af74029edb0eb3fcbb8070503ceb54 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:11:52 +0800 Subject: [PATCH 3/9] Add test for ferite amt array --- test/MakefileTest.am.stub | 2 ++ test/ferite_amt-array.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 test/ferite_amt-array.c diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index 696c2c4..bc9a9df 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -1,3 +1,5 @@ TEST_EXTENSIONS = .t TESTS = check_PROGRAMS = + +TEST: ferite_amt-array.c diff --git a/test/ferite_amt-array.c b/test/ferite_amt-array.c new file mode 100644 index 0000000..2e10cb1 --- /dev/null +++ b/test/ferite_amt-array.c @@ -0,0 +1,26 @@ +#include "tap.h" +#include "ferite.h" + +void testArrayCreate() { + FeriteAMT *amt = ferite_AMTArray_Create(NULL); + is (amt->lower_bound, 0, "Create: lower bound must be 0"); + is (amt->upper_bound, 0, "Create: upper bound must be 0"); +} + +void testArraySetAndGet() { + char *got; + char *want = "Item 10"; + + FeriteAMT *amt = ferite_AMTArray_Create(NULL); + ferite_amt_set(NULL, amt, 10, want); + is (amt->upper_bound, 10, "After add at 10 upper bound must be 10"); + got = ferite_amt_get(NULL, amt, 10); + is_str(got, want, "Get array[10] must return '%s'", want); +} + +int main(int argc, char *argv[]) { + ferite_init(argc, argv); + testArrayCreate(); + testArraySetAndGet(); + return done_testing(); +} From fd3e6b2f119cde1feb9dc3ad10467b20e27bc605 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:15:25 +0800 Subject: [PATCH 4/9] Bugfix ferite_loworderindex breaks when shiftAmount is 2 Buggy behavior is shown by ferite_amt-hash-index-from-hash.c The buggy code was: return (((index << (shiftAmount - AMT_SHIFT_AMOUNT)) >> 27) & 0x1F); where shiftAmount is unsigned int and AMT_SHIFT_AMOUNT is 5. The function was called successively with shiftAmount set to 32, 27, 22, 17, 12, 7, 2 (minus AMT_SHIFT_AMOUNT each time). When shiftAmount is less than AMT_SHIFT_AMOUNT, shiftAmount - AMT_SHIFT_AMOUNT becomes: (unsigned int)(2) - 5 which is a small unsigned integer that underflow to a very large unsigned integer hence the buggy behavior. Note that this bug has no effect on the behavior of the AMT as the value is expected to be any value within 0-31 - the only visible effect is in the actual placement of the hash items in memory. --- src/ferite_amt.c | 2 +- test/MakefileTest.am.stub | 1 + test/ferite_amt-index-from-hash.c | 134 ++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 test/ferite_amt-index-from-hash.c diff --git a/src/ferite_amt.c b/src/ferite_amt.c index 052301c..a00b0db 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -192,7 +192,7 @@ unsigned int ferite_highorderindex( unsigned int index, unsigned int shiftAmount return (((index << (AMT_SHIFT_START - shiftAmount)) >> 27) & 0x1F); } unsigned int ferite_loworderindex( unsigned int index, unsigned int shiftAmount ) { - return (((index << (shiftAmount - AMT_SHIFT_AMOUNT)) >> 27) & 0x1F); + return (index >> (AMT_SHIFT_START - shiftAmount)) & 0x1F; } FeriteAMTTree *ferite_amt_tree_create( FeriteScript *script, FeriteAMTTree *parent, int index_type ) { FeriteAMTTree *tree = fmalloc(sizeof(FeriteAMTTree)); diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index bc9a9df..cd2e444 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -3,3 +3,4 @@ TESTS = check_PROGRAMS = TEST: ferite_amt-array.c +TEST: ferite_amt-index-from-hash.c diff --git a/test/ferite_amt-index-from-hash.c b/test/ferite_amt-index-from-hash.c new file mode 100644 index 0000000..f20f2f1 --- /dev/null +++ b/test/ferite_amt-index-from-hash.c @@ -0,0 +1,134 @@ +#include "tap.h" +#include "ferite.h" + +#define FeriteAMT_SHIFT_AMOUNT (5) // This must equal to the one defined in ObjAMT.c +#define FeriteAMT_SHIFT_START (32) + +struct TestVector { + char *desc; + unsigned int hash; + int values[7]; +}; + +struct TestVector highOrderIndexTests[] = { + { + "High Order All Ones", + 0xffffffff, + // Hex: f f f f f f f f + // Binary: 1111 1111 1111 1111 1111 1111 1111 1111 ... + //5-bit Decimal: 31 31 31 31 31 31 24 + { 31, 31, 31, 31, 31, 31, 24 } + }, + { + "High Order All Zeroes", + 0x00000000, + // Hex: 0 0 0 0 0 0 0 0 + // Binary: 0000 0000 0000 0000 0000 0000 0000 0000 ... + //5-bit Decimal: 0 0 0 0 0 0 0 + { 0, 0, 0, 0, 0, 0, 0 } + }, + { + "High Order Sequence", + 0x08864299, + // Hex: 0 8 8 6 4 2 9 9 + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001 ... + //5-bit Decimal: 1 2 3 4 5 6 8 + { 1, 2, 3, 4, 5, 6, 8 } + }, + { + "High Order Sequence Desc", + 0xffbbcde9, + // Hex: f f b b c d e 9 + // Binary: 1111 1111 1011 1011 1100 1101 1110 1001 ... + //5-bit Decimal: 31 30 29 28 27 26 8 + { 31, 30, 29, 28, 27, 26, 8 } + } + +}; + +struct TestVector lowOrderIndexTests[] = { + { + "Low Order All Ones", + 0xffffffff, + // Hex: f f f f f f f f + // Binary: ... 1111 1111 1111 1111 1111 1111 1111 1111 + //x5-bit Decimal: 3 31 31 31 31 31 31 + { 3, 31, 31, 31, 31, 31, 31 } + }, + { + "Low Order All Zeroes", + 0x00000000, + // Hex: 0 0 0 0 0 0 0 0 + // Binary: ... 0000 0000 0000 0000 0000 0000 0000 0000 + //x5-bit Decimal: 0 0 0 0 0 0 0 + { 0, 0, 0, 0, 0, 0, 0 } + }, + { + "Low Order Sequence", + 0xcc520c41, + // Hex: c c 5 2 0 c 4 1 + // Binary: ... 1100 1100 0101 0010 0000 1100 0100 0001 + // 5-bit Decimal: 3 6 5 4 3 2 1 + { 3, 6, 5, 4, 3, 2, 1 } + }, + { + "Low Order Sequence Desc", + 0xfdde6f59, + // Hex: f d d e 6 f 5 9 + // Binary: ... 1111 1101 1101 1110 0110 1111 0101 1001 + // 5-bit Decimal: 3 30 29 28 27 26 25 + { 3, 30, 29, 28, 27, 26, 25 } + } +}; + + +void runHighOrderIndexTests(char *desc, unsigned int hash, int values[], size_t sz) { + int shiftAmount = 32; + size_t i = 0; + while (i < sz) { + int want = values[i]; + int got = ferite_highorderindex(hash, shiftAmount); + is(got, want, "%s: ferite_highorderindex(0x%x, %d) must return %d", + desc, hash, shiftAmount, want); + shiftAmount -= FeriteAMT_SHIFT_AMOUNT; + i++; + } +} + +void runLowOrderIndexTests(char *desc, unsigned int hash, int values[], size_t sz) { + int shiftAmount = 32; + size_t i = 0; + + while (i < sz) { + int want = values[sz - 1 - i]; + int got = ferite_loworderindex(hash, shiftAmount); + is (got, want, "%s: ferite_loworderindex(0x%x, %d) must return %d", + desc, hash, shiftAmount, want); + shiftAmount -= FeriteAMT_SHIFT_AMOUNT; + i++; + } +} + +void testHighOrderIndex() { + size_t i; + for (i = 0; i < sizeof(highOrderIndexTests)/sizeof(highOrderIndexTests[0]); i++) { + struct TestVector *t = &highOrderIndexTests[i]; + runHighOrderIndexTests(t->desc, t->hash, t->values, + sizeof(t->values)/sizeof(t->values[0])) ; + } +} + +void testLowOrderIndex() { + size_t i; + for (i = 0; i < sizeof(lowOrderIndexTests)/sizeof(lowOrderIndexTests[0]); i++) { + struct TestVector *t = &lowOrderIndexTests[i]; + runLowOrderIndexTests(t->desc, t->hash, t->values, + sizeof(t->values)/sizeof(t->values[0])) ; + } +} + +int main() { + testHighOrderIndex(); + testLowOrderIndex(); + return done_testing(); +} From 22f6ae15b80a356087caaffe850174c9dcda4ec6 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:20:49 +0800 Subject: [PATCH 5/9] Add ferite_hamt_index() with tests This function is for when the hash bits are exhausted - the case when the hashes of two different keys collides. --- src/ferite_amt.c | 105 ++++++++++++++++ test/MakefileTest.am.stub | 1 + test/ferite_amt-index-from-hash-and-string.c | 120 +++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 test/ferite_amt-index-from-hash-and-string.c diff --git a/src/ferite_amt.c b/src/ferite_amt.c index a00b0db..9671100 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -194,6 +194,111 @@ unsigned int ferite_highorderindex( unsigned int index, unsigned int shiftAmount unsigned int ferite_loworderindex( unsigned int index, unsigned int shiftAmount ) { return (index >> (AMT_SHIFT_START - shiftAmount)) & 0x1F; } + +/* Return the successive AMT_SHIFT_AMOUNT-bit sized value taken from the bits + * in hash and key based on depth. The bits in hash is exhausted first, + * followed by the bits in key, starting from the last character since that's + * the most likely place that the keys differ from each other. + * + * depth 0 corresponds to the first hamt level. + * + * The bits in key are used starting from msb. + * + * Note that the key could be an empty string, indicated by len == 0. + */ +size_t ferite_hamt_index(unsigned int hash, char *key, size_t len, unsigned int depth) { + const size_t nHashBits = 8 * sizeof(unsigned int); + size_t i; + int hi; // Index to key where the msb bits are to be taken from + int li; // Index to key where the lsb bits are to be taken from + unsigned char fbi; // offset to the first bit in the byte, from MSB + + size_t keyBitsUsed; + + // The MSB bit offset of index in key, starting from the MSB of the last + // character in key + size_t keyBitIdx; + + size_t nbits = 8 * sizeof(hash); + + // The number of complete AMT_SHIFT_AMOUNT bits that we can get from + // the hash + size_t nHashIndeces = nbits / AMT_SHIFT_AMOUNT; + size_t remBits = AMT_SHIFT_START % AMT_SHIFT_AMOUNT; + + if (depth < nHashIndeces) { + return ferite_highorderindex(hash, + AMT_SHIFT_START - (depth * AMT_SHIFT_AMOUNT)); + } + + if (depth == nHashIndeces) { + // We're at the boundary between last bits of hash and the + // first character in key. Combine the MSB bits from the last + // character in key with the remainder bit from the hash + // to complete the AMT_SHIFT_AMOUNT-bit index. + // + // The bits from the hash forms the MSB portion, while the MSB bits + // from the last character of key will from the LSB portion. + unsigned char hv = hash & ((1 << remBits) - 1); + unsigned char k; + unsigned char lv; + if (len == 0) { + k = '\0'; + lv = 0; + } else { + k = key[len-1]; + lv = k >> (8 - (AMT_SHIFT_AMOUNT - remBits)); + } + return (hv << (AMT_SHIFT_AMOUNT - remBits)) | lv; + } + + if (len == 0) { + return 0; + } + + i = depth - nHashIndeces - 1; + + // The number of bits already used in key. + // We start using bits from the end of key, character by character, starting + // from the MSB. + keyBitsUsed = depth * AMT_SHIFT_AMOUNT - nHashBits; + + // The MSB bit offset of index in key, starting from the MSB of the last + // character in key + keyBitIdx = keyBitsUsed; + + hi = (len - 1) - ((keyBitIdx) / 8); + li = (len - 1) - ((keyBitIdx + AMT_SHIFT_AMOUNT - 1) / 8); + + fbi = keyBitIdx % 8; + + if (li >= 0 && hi == li) { + // The bits are contained in the same byte + unsigned char v = key[hi]; + unsigned char sr = 8 - AMT_SHIFT_AMOUNT - fbi; + unsigned char indexMask = (1 << AMT_SHIFT_AMOUNT) - 1; + return (v >> sr) & indexMask; + } else if (li >= 0 && hi != li) { + // The bits are split across two bytes + unsigned char hv = key[hi]; + unsigned char lv = key[li]; + unsigned char nHiBits = 8 - fbi; + unsigned char nLoBits = AMT_SHIFT_AMOUNT - nHiBits; + unsigned char sr = 8 - nLoBits; + + hv = hv & ((1 << (8 - fbi)) - 1); + lv = lv >> sr; + return (hv << nLoBits) | lv; + } else { + // li is negative - We are at the first character of the key + unsigned char v = key[hi]; + unsigned char sl = fbi - (8 - AMT_SHIFT_AMOUNT); + + return (v & ((1 << (8 - fbi))-1)) << sl; + } +} + + FeriteAMTTree *ferite_amt_tree_create( FeriteScript *script, FeriteAMTTree *parent, int index_type ) { FeriteAMTTree *tree = fmalloc(sizeof(FeriteAMTTree)); memset(tree, 0, sizeof(FeriteAMTTree)); diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index cd2e444..ec782c0 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -4,3 +4,4 @@ check_PROGRAMS = TEST: ferite_amt-array.c TEST: ferite_amt-index-from-hash.c +TEST: ferite_amt-index-from-hash-and-string.c diff --git a/test/ferite_amt-index-from-hash-and-string.c b/test/ferite_amt-index-from-hash-and-string.c new file mode 100644 index 0000000..83e562a --- /dev/null +++ b/test/ferite_amt-index-from-hash-and-string.c @@ -0,0 +1,120 @@ +#include "tap.h" +#include "ferite.h" + +struct HashAndKeyAndIndeces { + char *desc; + unsigned int maxDepth; + unsigned int hash; + char *key; + int expectedIndeces[20]; +}; + +#define BYTES_IN_HASH sizeof(unsigned int) +#define NBITS(nbytes) (8 * (nbytes)) +#define NBITS2DEPTH(nbits) ((nbits/5) + (nbits%5?1:0)) +#define maxDepthForHashWithKey(str) NBITS2DEPTH(NBITS(BYTES_IN_HASH + sizeof(str) - 1)) +#define TEST(str, hash, key) str " with key = \"" key "\"", \ + maxDepthForHashWithKey(key), \ + hash, \ + key +struct HashAndKeyAndIndeces testVectors[] = { + { + TEST("Sequential", 0x08864299, ""), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 + // Hex: 0 8 8 6 4 2 9 9| + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|... + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 8 } + // depth: 0 1 2 3 4 5 6 + }, + { + TEST("Sequential", 0x08864298, ""), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 + // Hex: 0 8 8 6 4 2 9 8| + // Binary: 0000 1000 1000 0110 0100 0010 1001 1000|... + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 0 } + // depth: 0 1 2 3 4 5 6 + }, + { + TEST("Sequential", 0x08864299, "a"), + // MSB LSB + // a: 0110 0001 a + // Hex: 0 8 8 6 4 2 9 9|MSB LSB + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|0110 0001 + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 11, 1 } + // depth: 0 1 2 3 4 5 6 7 + }, + { + TEST("Sequential", 0x08864299, "ab"), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 b a + // Hex: 0 8 8 6 4 2 9 9| + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|0110 0010 0110 0001.. + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 11, 2, 12, 4 } + // depth: 0 1 2 3 4 5 6 7 8 9 + }, + { + TEST("Sequential", 0x08864290, "ab"), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 b a + // Hex: 0 8 8 6 4 2 9 0| + // Binary: 0000 1000 1000 0110 0100 0010 1001 0000|0110 0010 0110 0001.. + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 4, 3, 2, 12, 4 } + // depth: 0 1 2 3 4 5 6 7 8 9 + }, + { + TEST("Sequential", 0x08864299, "AIW"), + // W I A + // Hex: 0 8 8 6 4 2 9 9| 5 7 4 9 4 1 + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|0101 0111 0100 1001 0100 0001 .... + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 10, 23, 9, 5, 0, 16 } + // depth: 0 1 2 3 4 5 6 7 8 9, 10, 11 + }, + { + TEST("Sequential", 0x08864299, "abcdef"), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 f e d c b a + // Hex: 0 8 8 6 4 2 9 9| + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|0110 0110 0110 0101 0110 0100 0110 0011 0110 0010 0110 0001 + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 11, 6, 12, 21, 18, 6, 6, 24, 19, 1 } + // depth: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + }, + { + TEST("Sequential", 0x08864299, "abcdefg"), + // MSB...LSB MSB...LSB + // ab: 0110 0001 0110 0010 g f e d c b a + // Hex: 0 8 8 6 4 2 9 9| + // Binary: 0000 1000 1000 0110 0100 0010 1001 1001|0110 0111 0110 0110 0110 0101 0110 0100 0110 0011 0110 0010 0110 0001 .. + /*5-bit Decimal:*/{ 1, 2, 3, 4, 5, 6, 11, 7, 12, 25, 18, 22, 8, 24, 27, 2, 12, 4 } + // depth: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 + }, +}; + +void testIndexFromHashAndString() { + size_t ntest = sizeof(testVectors) / sizeof(testVectors[0]); + size_t i; + for (i = 0; i < ntest; i++) { + struct HashAndKeyAndIndeces *t = &testVectors[i]; + size_t d = 0; + size_t keyLen = strlen(t->key); + for (d = 0; d < t->maxDepth; d++) { + size_t got = ferite_hamt_index(t->hash, t->key, keyLen, d); + is (got, t->expectedIndeces[d], + "Index From Hash And String: %s depth = %d", t->desc, d); + } + } +} + +int main() +{ + is (maxDepthForHashWithKey("a"), 8, + "hash with key \"a\" must have depth %d", + maxDepthForHashWithKey("a")); + is (maxDepthForHashWithKey("ab"), 10, + "hash with key \"ab\" must have depth %d", + maxDepthForHashWithKey("ab")); + + testIndexFromHashAndString(); + return done_testing(); +} From 6b2cb0bb29dceae43f96d09e95c9386a99100ebf Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:29:01 +0800 Subject: [PATCH 6/9] Bugfix amt set and get did not handle hash collision Upon hash bits exhaustion we use the bits from the key. While this is not an optimal source of bits due to the non-random nature of the ascii characters in the keys which results in non optimal use of RAM, it should be good enough for use here under the assumption that hash collisions are rare. --- src/ferite_amt.c | 141 +++++++++++++++++++++---- test/MakefileTest.am.stub | 1 + test/ferite_amt-hash-set-and-get.c | 158 +++++++++++++++++++++++++++++ test/test_amt.c | 99 ++++++++++++++++++ test/test_amt.h | 30 ++++++ 5 files changed, 407 insertions(+), 22 deletions(-) create mode 100644 test/ferite_amt-hash-set-and-get.c create mode 100644 test/test_amt.c create mode 100644 test/test_amt.h diff --git a/src/ferite_amt.c b/src/ferite_amt.c index 9671100..e13916d 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -431,10 +431,16 @@ void ferite_amt_print( FeriteScript *script, FeriteAMT *tree ) { void *_ferite_amt_set( FeriteScript *script, FeriteAMT *tree, unsigned long index, char *optional_key, void *value ) { FeriteAMTTree *root = tree->root; - unsigned long shiftAmount = AMT_SHIFT_START; + int shiftAmount = AMT_SHIFT_START; unsigned long baseIndex = AMT_APPLY_SHIFT(tree, index); + size_t amtDepth = 0; + size_t keyLen = 0; FeriteAMTNode *baseItem = NULL; - + + if (optional_key != NULL) { + keyLen = strlen(optional_key); + } + if( index > tree->upper_bound ) tree->upper_bound = index; if( index < tree->lower_bound ) @@ -444,14 +450,45 @@ void *_ferite_amt_set( FeriteScript *script, FeriteAMT *tree, unsigned long inde while( FE_TRUE ) { if( !(GETBIT(root->map, baseIndex)) ) { tree->total++; - AMT_SET( script, root, AMT_APPLY_SHIFT(tree, index), ferite_amt_create_value_node(script, index, optional_key, value) ); + AMT_SET( script, root, baseIndex, ferite_amt_create_value_node(script, index, optional_key, value) ); return value; } else { baseItem = AMT_GET( script, root, baseIndex ); if( IS_NODE(baseItem) ) { - if( (baseItem->type == FeriteAMTType_ANode && baseItem->u.value.id == index) || - (baseItem->type == FeriteAMTType_HNode && baseItem->u.value.id == index) ) { + int replace = 0; + + // What to do on hash collision + // - Array: replace old with new + // - Hash: replace only if the keys exist and are matching. + // Otherwise split the node and use the keys for getting the + // unique hash bits. + if (baseItem->u.value.id == index) { + if ( baseItem->type == FeriteAMTType_ANode) { + replace = 1; + } else if (baseItem->type == FeriteAMTType_HNode) { + if (optional_key == NULL) { + fprintf(stderr, "ferite_amt_set(): " + "hash collides (%lu) but missing optional_key, " + "existing key = '%s'", + index, baseItem->u.value.key); + return NULL; + } + if (baseItem->u.value.key == NULL) { + fprintf(stderr, "ferite_amt_set(): " + "Error: Hash collides (%lu) but existing " + "key is NULL, while new " + "optional key = '%s'", + index, optional_key); + return NULL; + } + if (strcmp(baseItem->u.value.key, optional_key) == 0) { + replace = 1; + } + } + } + + if( replace ) { void *old_value = NODE_VALUE(baseItem); if( baseItem->type == FeriteAMTType_HNode ) ffree(baseItem->u.value.key); @@ -466,19 +503,33 @@ void *_ferite_amt_set( FeriteScript *script, FeriteAMT *tree, unsigned long inde return old_value; } else { /* We have to split this node */ + size_t oldKeyLen = 0; FeriteAMTNode *old_node = baseItem; - int old_id = 0, new_id = 0; - while( AMT_APPLY_SHIFT(tree, old_node->u.value.id) == AMT_APPLY_SHIFT(tree, index) ) { + if (old_node->type == FeriteAMTType_HNode) { + oldKeyLen = strlen(old_node->u.value.key); + } + int old_id = AMT_APPLY_SHIFT(tree, old_node->u.value.id); + int new_id = AMT_APPLY_SHIFT(tree, index); + while( old_id == new_id ) { + amtDepth++; FeriteAMTNode *new_node = ferite_amt_create_tree_node( script,root ); AMT_SET( script, root, baseIndex, new_node ); baseItem = AMT_GET( script, root, baseIndex ); root = baseItem->u.tree; - AMT_SHIFT_AND_CHECK( NULL ); - baseIndex = AMT_APPLY_SHIFT(tree, index); + + if ((shiftAmount - AMT_SHIFT_AMOUNT) > 0) { + shiftAmount -= AMT_SHIFT_AMOUNT; + old_id = AMT_APPLY_SHIFT(tree, old_node->u.value.id); + new_id = AMT_APPLY_SHIFT(tree, index); + baseIndex = new_id; + } else { + // We ran out of the hash bits in index, harvest + // additional hash bits from optional_key + old_id = ferite_hamt_index(index, old_node->u.value.key, oldKeyLen, amtDepth); + new_id = ferite_hamt_index(index, optional_key, keyLen, amtDepth); + baseIndex = new_id; + } } - // Old Node - old_id = AMT_APPLY_SHIFT(tree, old_node->u.value.id); - new_id = AMT_APPLY_SHIFT(tree, index); AMT_SET( script, root, old_id, old_node ); AMT_SET( script, root, new_id, ferite_amt_create_value_node(script, index, optional_key, value) ); tree->total++; @@ -486,11 +537,22 @@ void *_ferite_amt_set( FeriteScript *script, FeriteAMT *tree, unsigned long inde } break; } else if( baseItem->type == FeriteAMTType_Tree ) { - AMT_SHIFT_AND_CHECK( NULL ); - baseIndex = AMT_APPLY_SHIFT(tree, index); + amtDepth++; + if ((shiftAmount - AMT_SHIFT_AMOUNT) > 0) { + shiftAmount -= AMT_SHIFT_AMOUNT; + baseIndex = AMT_APPLY_SHIFT(tree, index); + } else { + // Ran out of hash bits, optional_key is no longer + // optional + if (optional_key == NULL) { + fprintf(stderr, "ferite_amt_set(): " + "Error: Ran out of hash bits for index %lu, but optional_key is null", + index); + return NULL; + } + baseIndex = ferite_hamt_index(index, optional_key, keyLen, amtDepth); + } root = baseItem->u.tree; - } else { - printf("base item: %p -> %d\n", baseItem, baseItem->type); } } } @@ -502,11 +564,10 @@ void *ferite_amt_set( FeriteScript *script, FeriteAMT *tree, unsigned long index void *ferite_amt_add( FeriteScript *script, FeriteAMT *tree, void *value ) { return _ferite_amt_set( script, tree, tree->upper_bound++, NULL, value ); } -FeriteAMTNode *_ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned long _index ) { +FeriteAMTNode *_ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned long index ) { FeriteAMTTree *root = tree->root; - unsigned int index = _index; - unsigned int shiftAmount = AMT_SHIFT_START; - unsigned int baseIndex = AMT_APPLY_SHIFT(tree, index); + int shiftAmount = AMT_SHIFT_START; + unsigned long baseIndex = AMT_APPLY_SHIFT(tree, index); FeriteAMTNode *baseItem = NULL; tree->last_index = index; @@ -518,7 +579,10 @@ FeriteAMTNode *_ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned if( IS_NODE(baseItem) ) { return baseItem; } else if( baseItem->type == FeriteAMTType_Tree ) { - AMT_SHIFT_AND_CHECK( NULL ); + shiftAmount -= 5; + if( shiftAmount <= 0 ) { + return NULL; + } baseIndex = AMT_APPLY_SHIFT(tree, index); root = baseItem->u.tree; } @@ -526,6 +590,39 @@ FeriteAMTNode *_ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned } } +FeriteAMTNode *_ferite_hamt_get_with_key( FeriteScript *script, FeriteAMT *tree, unsigned long index, char *key ) { + FeriteAMTTree *root = tree->root; + int shiftAmount = AMT_SHIFT_START; + unsigned long baseIndex = AMT_APPLY_SHIFT(tree, index); + FeriteAMTNode *baseItem = NULL; + size_t keyLen = strlen(key); + size_t amtDepth = 0; + + tree->last_index = index; + while( FE_TRUE ) { + if( !(GETBIT(root->map, baseIndex)) ) + return NULL; + else { + baseItem = AMT_GET( script, root, baseIndex ); + if( baseItem->type == FeriteAMTType_ANode || baseItem->type == FeriteAMTType_HNode ) { + return baseItem; + } else if( baseItem->type == FeriteAMTType_Tree ) { + amtDepth++; + if ((shiftAmount - AMT_SHIFT_AMOUNT) > 0) { + shiftAmount -= AMT_SHIFT_AMOUNT; + baseIndex = AMT_APPLY_SHIFT(tree, index); + } else { + // We ran out of the hash bits in index, use the key for + // the hash bits + baseIndex = ferite_hamt_index(index, key, keyLen, amtDepth); + } + root = baseItem->u.tree; + } + } + } +} + + void *ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned long index ) { FeriteAMTNode *baseItem = _ferite_amt_get( script, tree, index ); if( baseItem && NODE_ID(baseItem) == index ) { @@ -641,7 +738,7 @@ void *ferite_hamt_set( FeriteScript *script, FeriteAMT *tree, char *key, void *v return _ferite_amt_set( script, tree, ferite_hamt_hash_gen(key), key, value ); } void *ferite_hamt_get( FeriteScript *script, FeriteAMT *tree, char *key ) { - FeriteAMTNode *baseItem = _ferite_amt_get( script, tree, ferite_hamt_hash_gen(key) ); + FeriteAMTNode *baseItem = _ferite_hamt_get_with_key( script, tree, ferite_hamt_hash_gen(key), key ); if( baseItem && strcmp(baseItem->u.value.key,key) == 0 ) return NODE_VALUE(baseItem); return NULL; diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index ec782c0..29f51dd 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -5,3 +5,4 @@ check_PROGRAMS = TEST: ferite_amt-array.c TEST: ferite_amt-index-from-hash.c TEST: ferite_amt-index-from-hash-and-string.c +TEST: ferite_amt-hash-set-and-get.c test_amt.c diff --git a/test/ferite_amt-hash-set-and-get.c b/test/ferite_amt-hash-set-and-get.c new file mode 100644 index 0000000..68ccfd6 --- /dev/null +++ b/test/ferite_amt-hash-set-and-get.c @@ -0,0 +1,158 @@ +#include "tap.h" +#include "test_amt.h" + +#include "ferite.h" + +void test2HashCollisions() { + // The int hashes for the keys in the following key-value pair collides + struct HashTest collides[] = { + /* + * Note: ferite_AMTHash_create() delegates to + * ferite_amt_create(script, FeriteAMT_High) which sets the FeriteAMT to use the + * ferite_highorderindex for shifting the hash, and + * ferite_highorderindex() has this signature: + * + * unsigned int ferite_highorderindex( unsigned int index, unsigned int shiftAmount ); + * + * So only the lower 32 bits (the int equivalent) of the long hash are + * used for getting the index. + * + * This should be fixed so that the index function uses all the bits in + * the long hash instead of casting it to int. + */ + + /* Different long hash but same int hash */ + // long 16871661377530893130 int 174592842 + { "object:Workflow.Tag/view:1/44" , "foo", "foo" }, + // long 12494831704221422410 int 174592842 + { "object:Workflow.Action/view:1/231293" , "bar", "bar" }, + + /* Same long hash and same int hash */ + // long 99461941 int 99461941 + { "hones", "value1", "value1" }, + // long 99461941 int 99461941 + { "hop's", "value2", "value2" }, + }; + int ntest = sizeof(collides) / sizeof(collides[0]); + + testHashPutAndGet("Hash Collision", collides, ntest); +} + +void testNoCollision() { + struct HashTest tests[] = { + { "one", "two", "two" }, + { "three", "four", "four" }, + { "five", "six", "six" }, + { "seven", "8", "8" }, + { "nine", "10", "10" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testHashPutAndGet("No Collision", tests, ntest); + +} + +void testPutWithZeroLengthKey() { + struct CustomHashTest tests[] = { + { 1, "", "value for empty key", "value for empty key" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testCustomHashPutAndGet("One Zero Length", tests, ntest); +} + +void testPutWithZeroLengthKeyWithNonZeroLengthKey() { + struct CustomHashTest tests[] = { + { 1, "", "value for empty key", "value for empty key" }, + { 1, "foo", "value for foo", "value for foo" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testCustomHashPutAndGet("Zero Length With Nonzero Length", tests, ntest); +} + +void testCustomSameHash() { + struct CustomHashTest tests[] = { + { 1, "key1", "value1", "value1" }, + { 1, "key2", "value2", "value2" }, + { 1, "k", "value3", "value3" }, + { 1, "key2a", "value4", "value4" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testCustomHashPutAndGet("CustomSameHash", tests, ntest); +} + +void testCollisionWithMissingOptionalKeyMustReturnNull() { + char *desc = "Missing Optional Key"; + FeriteScript *script = NULL; + FeriteAMT *amt = ferite_AMTHash_Create( script ); + char *key = "foo"; + char *value = "a"; + char *got = (char *) _ferite_amt_set(NULL, amt, 1, key, value); + + is_str(got, value, "%s: Put '%s' must return '%s'", desc, key, value); + + key = NULL; + value ="b"; + got = (char *) _ferite_amt_set(NULL, amt, 1, key, value); + is_str(got, NULL, "%s: NULL key must return NULL", desc); +} + +void testCollisionWithMatchingKeyMustReplaceExistingOne() { + char *desc = "Collision With Matching Key"; + FeriteScript *script = NULL; + FeriteAMT *amt = ferite_AMTHash_Create( script ); + char *key = "foo"; + char *value = "a"; + char *got = (char *) _ferite_amt_set(NULL, amt, 1, key, value); + + is_str(got, value, "%s: Put '%s' must return '%s'", desc, key, value); + + key = "foo"; + value ="b"; + got = (char *) _ferite_amt_set(NULL, amt, 1, key, value); + is_str(got, "a", "%s: Put '%s' must return '%s'", desc, key, "a"); + + got = (char *)ferite_hamt_get(NULL, amt, key); + ok (got != NULL, "%s: ferite_hamt_get must not return NULL", desc); + if (got) { + is_str(got, "b", "%s: Get '%s' must return '%s'", desc, key, value); + } +} + +void testReplaceExistingMustReturnOldValue() { + struct HashTest tests[] = { + { "foo", "elephant", "elephant" }, + { "foo", "hippo", "elephant" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testHashPut("Replace must return old", tests, ntest); + +} + +void test3HashCollisions() { + struct CustomHashTest tests[] = { + { 1, "f", "mammal", "mammal" }, + { 1, "fo", "bird", "bird" }, + { 1, "foo", "fish", "fish" }, + }; + int ntest = sizeof(tests) / sizeof(tests[0]); + + testCustomHashPutAndGet("3 Hash Collisions", tests, ntest); +} + +int main(int argc, char *argv[]) { + ferite_init(argc, argv); + testNoCollision(); + test2HashCollisions(); + testCustomSameHash(); + testCollisionWithMissingOptionalKeyMustReturnNull(); + testCollisionWithMatchingKeyMustReplaceExistingOne(); + testReplaceExistingMustReturnOldValue(); + testPutWithZeroLengthKey(); + testPutWithZeroLengthKeyWithNonZeroLengthKey(); + test3HashCollisions(); + return done_testing(); +} diff --git a/test/test_amt.c b/test/test_amt.c new file mode 100644 index 0000000..14eca48 --- /dev/null +++ b/test/test_amt.c @@ -0,0 +1,99 @@ +#include "test_amt.h" +#include "tap.h" + +FeriteAMT *_testHashPut(FeriteAMT *amt, char *desc, struct HashTest *tests, size_t ntests) { + struct HashTest *p; + size_t i; + + if (amt == NULL) { + amt = ferite_AMTHash_Create( NULL ); + } + + for (i = 0; i < ntests; i++) { + p = &tests[i]; + char *key = p->key; + char *value = p->value; + char *want = p->returnedValue; + char *ret = ferite_hamt_set( NULL, amt, key, value ); + is_str (ret, want, "%s: Put '%s' with value '%s' must return '%s'", desc, key, value, want); + } + return amt; +} + +void _testHashGet(FeriteAMT *amt, char *desc, struct HashTest *tests, size_t ntests) { + struct HashTest *p; + char *got; + size_t i; + + if (amt == NULL) { + amt = ferite_AMTHash_Create( NULL ); + } + for (i = 0; i < ntests; i++) { + p = &tests[i]; + char *key = p->key; + char *want = p->value; + got = (char *)ferite_hamt_get(NULL, amt, key); + is_str(got, want, "%s: Get '%s' must return '%s'", desc, key, want); + } +} + +FeriteAMT *testHashPut(char *desc, struct HashTest *tests, size_t ntests) { + return _testHashPut(NULL, desc, tests, ntests); +} + +FeriteAMT *testHashPutAndGet(char *desc, struct HashTest *tests, size_t ntests) { + FeriteScript *script = NULL; + FeriteAMT *amt = ferite_AMTHash_Create( script ); + + _testHashPut(amt, desc, tests, ntests); + _testHashGet(amt, desc, tests, ntests); + return amt; +} + +FeriteAMT *testCustomHashPut(FeriteAMT *amt, char *desc, struct CustomHashTest *tests, size_t ntests) { + size_t i; + FeriteScript *script = NULL; + + if (amt == NULL) { + amt = ferite_AMTHash_Create( script ); + } + + for (i = 0; i < ntests; i++) { + struct CustomHashTest *t = &tests[i]; + unsigned long hash = t->hash; + char *key =t->key; + char *value = t->value; + char *want = t->returnedValue; + char *got = (char *) _ferite_amt_set( NULL, amt, hash, key, value ); + is_str (got, want, "%s: hash %lu Put '%s' with value '%s' must return '%s'", + desc, hash, key, value, want); + } + return amt; +} + +void testCustomHashGet(FeriteAMT *amt, char *desc, struct CustomHashTest *tests, size_t ntests) { + size_t i; + + for (i = 0; i < ntests; i++) { + struct CustomHashTest *t = &tests[i]; + unsigned long hash = t->hash; + char *key =t->key; + char *want = t->value; + FeriteAMTNode *baseItem = _ferite_hamt_get_with_key(NULL, amt, hash, key); + + ok (baseItem != NULL, "Must get baseItem for key '%s'", key); + if (baseItem) { + char *got = baseItem->u.value.data; + is_str(got, want, "%s: hash %lu Get '%s' must return '%s'", + desc, hash, key, want); + } + } +} + +FeriteAMT *testCustomHashPutAndGet(char *desc, struct CustomHashTest *tests, size_t ntests) { + FeriteAMT *amt = testCustomHashPut(NULL, desc, tests, ntests); + + testCustomHashGet(amt, desc, tests, ntests); + return amt; +} + diff --git a/test/test_amt.h b/test/test_amt.h new file mode 100644 index 0000000..51c3a4f --- /dev/null +++ b/test/test_amt.h @@ -0,0 +1,30 @@ +#ifndef TEST_AMT_H +#define TEST_AMT_H +#include "ferite.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +struct HashTest { + char *key; + char *value; + char *returnedValue; /* Expected returned value when storing */ +}; + +struct CustomHashTest { + unsigned long hash; /* Custom hash for the key, skips + ferite_hamt_hash_gen() */ + char *key; + char *value; + char *returnedValue; +}; + +FeriteAMT *_testHashPut(FeriteAMT *amt, char *desc, struct HashTest *tests, size_t ntests); +void _testHashGet(FeriteAMT *amt, char *desc, struct HashTest *tests, size_t ntests); +FeriteAMT * testHashPut(char *desc, struct HashTest *tests, size_t ntests); +FeriteAMT *testHashPutAndGet(char *desc, struct HashTest *tests, size_t ntests); +FeriteAMT *testCustomHashPutAndGet(char *desc, struct CustomHashTest *tests, size_t ntests); +FeriteAMT *testCustomHashPut(FeriteAMT *amt, char *desc, struct CustomHashTest *tests, size_t ntests); +void testCustomHashGet(FeriteAMT *amt, char *desc, struct CustomHashTest *tests, size_t ntests); + + +#endif From 38093d33ca90a1d77bfe8b6b176996a25fc40708 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 12:30:12 +0800 Subject: [PATCH 7/9] Teach ferite amt how to delete collided hash items --- src/ferite_amt.c | 21 ++++++++++++-- test/MakefileTest.am.stub | 1 + test/ferite_amt-hash-delete.c | 54 +++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 test/ferite_amt-hash-delete.c diff --git a/src/ferite_amt.c b/src/ferite_amt.c index e13916d..1bff9c3 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -634,9 +634,16 @@ void *ferite_amt_get( FeriteScript *script, FeriteAMT *tree, unsigned long index void *_ferite_amt_delete( FeriteScript *script, FeriteAMT *tree, unsigned long index, char *optional_key ) { FeriteAMTTree *root = tree->root; - unsigned long shiftAmount = AMT_SHIFT_START; + int shiftAmount = AMT_SHIFT_START; unsigned long baseIndex = AMT_APPLY_SHIFT(tree, index); FeriteAMTNode *baseItem = NULL; + unsigned long rootIndex = baseIndex; + size_t amtDepth = 0; + size_t keyLen = 0; + + if (optional_key != NULL) { + keyLen = strlen(optional_key); + } while( FE_TRUE ) { if( !(GETBIT(root->map, baseIndex)) ) @@ -670,8 +677,16 @@ void *_ferite_amt_delete( FeriteScript *script, FeriteAMT *tree, unsigned long i } return data; } else if( baseItem->type == FeriteAMTType_Tree ) { - AMT_SHIFT_AND_CHECK( NULL ); - baseIndex = AMT_APPLY_SHIFT(tree, index); + amtDepth++; + if ((shiftAmount - AMT_SHIFT_AMOUNT) > 0) { + shiftAmount -= AMT_SHIFT_AMOUNT; + baseIndex = AMT_APPLY_SHIFT(tree, index); + } else { + // We ran out of the hash bits in index, use the key for + // the hash bits + baseIndex = ferite_hamt_index(index, optional_key, keyLen, amtDepth); + } + root = baseItem->u.tree; } } diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index 29f51dd..7d381fa 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -6,3 +6,4 @@ TEST: ferite_amt-array.c TEST: ferite_amt-index-from-hash.c TEST: ferite_amt-index-from-hash-and-string.c TEST: ferite_amt-hash-set-and-get.c test_amt.c +TEST: ferite_amt-hash-delete.c test_amt.c diff --git a/test/ferite_amt-hash-delete.c b/test/ferite_amt-hash-delete.c new file mode 100644 index 0000000..b375a89 --- /dev/null +++ b/test/ferite_amt-hash-delete.c @@ -0,0 +1,54 @@ +#include "tap.h" +#include "test_amt.h" +#include "ferite.h" + +void testDeleteOneItem() { + struct HashTest tests[] = { + { "foo", "bar", "bar" }, + }; + int ntests = ARRAY_SIZE(tests); + FeriteAMT *amt = testHashPut("One Item", tests, ntests); + char *value = ferite_hamt_delete(NULL, amt, "foo"); + is_str(value, "bar", "One Item: Delete '%s' must return '%s'", "foo", "bar"); + value = ferite_hamt_get(NULL, amt, "foo"); + ok(value == NULL, "One Item: Get deleted item must return NULL"); +} + +void testDeleteTwoItems() { + struct HashTest tests[] = { + { "foo", "bar", "bar" }, + { "baz", "quux", "quux" }, + }; + int ntests = ARRAY_SIZE(tests); + FeriteAMT *amt = testHashPut("Two items:", tests, ntests); + char *value = ferite_hamt_delete(NULL, amt, "foo"); + is_str(value, "bar", "Two Items: Delete '%s' must return '%s'", "foo", "bar"); + value = ferite_hamt_get(NULL, amt, "foo"); + ok(value == NULL, "Two Items: Get deleted item must return NULL"); + + value = ferite_hamt_delete(NULL, amt, "baz"); + is_str(value, "quux", "Two Items: Delete '%s' must return '%s'", "baz", "quux"); + value = ferite_hamt_get(NULL, amt, "baz"); + ok(value == NULL, "Two Items: Get deleted 2nd item must return NULL"); +} + +void testDeleteWithCollision() { + char *desc = "Delete With Collison"; + struct CustomHashTest tests[] = { + { 1, "f", "january", "january" }, + { 1, "fo", "february", "february" }, + { 1, "fooo", "march", "march" }, + }; + int ntests = ARRAY_SIZE(tests); + FeriteAMT *amt = testCustomHashPut(NULL, desc, tests, ntests); + char *value = _ferite_amt_delete(NULL, amt, 1, tests[0].key); + is_str(value, tests[0].value, "%s: Delete '%s' must return '%s'", desc, tests[0].key, tests[0].value); +} + +int main(int argc, char *argv[]) { + ferite_init(argc, argv); + testDeleteOneItem(); + testDeleteTwoItems(); + testDeleteWithCollision(); + return done_testing(); +} From c4180210e788480d9a9a7a98d0c646031038e923 Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Fri, 8 Aug 2014 16:47:14 +0800 Subject: [PATCH 8/9] ferite_hamt_hash_gen(): Avoid unnecessary strlen(key) --- src/ferite_amt.c | 9 ++++----- test/MakefileTest.am.stub | 1 + test/ferite_amt-hash-gen.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 test/ferite_amt-hash-gen.c diff --git a/src/ferite_amt.c b/src/ferite_amt.c index 1bff9c3..7150cfc 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -741,11 +741,10 @@ int ferite_amt_cmp( FeriteScript *script, FeriteAMT *left, FeriteAMT *right, AMT } unsigned long ferite_hamt_hash_gen( char *key ) { - size_t i, keylen = strlen(key); - unsigned long hashval = 0; - - for( i = 0; i < keylen; i++ ) - hashval = *key++ + 31 * hashval; + unsigned long hashval = 0; + + while(*key) + hashval = *key++ + 31 * hashval; return hashval; } diff --git a/test/MakefileTest.am.stub b/test/MakefileTest.am.stub index 7d381fa..fe6ada6 100644 --- a/test/MakefileTest.am.stub +++ b/test/MakefileTest.am.stub @@ -5,5 +5,6 @@ check_PROGRAMS = TEST: ferite_amt-array.c TEST: ferite_amt-index-from-hash.c TEST: ferite_amt-index-from-hash-and-string.c +TEST: ferite_amt-hash-gen.c TEST: ferite_amt-hash-set-and-get.c test_amt.c TEST: ferite_amt-hash-delete.c test_amt.c diff --git a/test/ferite_amt-hash-gen.c b/test/ferite_amt-hash-gen.c new file mode 100644 index 0000000..711be1b --- /dev/null +++ b/test/ferite_amt-hash-gen.c @@ -0,0 +1,34 @@ +#include "tap.h" +#include "ferite.h" + +struct TestVector { + char *key; + unsigned long hash; +} tests[] = { + { + "elephant", + 2877622596369UL, + }, + { + "giraffe", + 94527212404UL, + } +}; + +void testHashGen() { + size_t i; + size_t ntests = sizeof(tests) / sizeof(tests[0]); + + for (i = 0; i < ntests; i++) { + unsigned long got = ferite_hamt_hash_gen(tests[i].key); + is (got, tests[i].hash, + "key %s must hash to %lu", + tests[i].key, tests[i].hash); + } +} + +int main() { + testHashGen(); + done_testing(); + return 0; +} From 98fe984ad1f422494ca7d0131eaeb2caeda107cd Mon Sep 17 00:00:00 2001 From: Nazri Ramliy Date: Tue, 12 Aug 2014 11:09:31 +0800 Subject: [PATCH 9/9] Remove unused macro AMT_SHIFT_AND_CHECK --- src/ferite_amt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ferite_amt.c b/src/ferite_amt.c index 7150cfc..7d6a9db 100644 --- a/src/ferite_amt.c +++ b/src/ferite_amt.c @@ -73,7 +73,6 @@ static inline int bitcount (unsigned int n) #define AMT_SHIFT_AMOUNT 5 #define AMT_SHIFT_START (32) #define AMT_APPLY_SHIFT( FE_FE_TRUEE, INDEX ) (FE_FE_TRUEE->index_function)( INDEX, shiftAmount ) -#define AMT_SHIFT_AND_CHECK( VALUE ) do { shiftAmount -= 5; if( shiftAmount <= 0 ) { return NULL; } } while(0) #define AMT_UNSHIFT_AND_CHECK( VALUE ) do { shiftAmount += 5; if( shiftAmount > AMT_SHIFT_START ) { return NULL; } } while(0) #define AMT_CompressedBaseSize 4