diff --git a/.gitmodules b/.gitmodules index 85e69a6..7cda21a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "bats"] path = test/bats-core url = https://github.com/bats-core/bats-core +[submodule "test/test_helper/bats-assert"] + path = test/test_helper/bats-assert + url = https://github.com/bats-core/bats-assert.git +[submodule "test/test_helper/bats-support"] + path = test/test_helper/bats-support + url = https://github.com/bats-core/bats-support.git diff --git a/Makefile b/Makefile index dfea7e7..a5aa287 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ $(ODIR)/%.o: $(SRC)%.c $(CC) -g -c $< -o $@ $(CFLAGS) dirs: - mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/bufferpool build/operators + mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/operators build/util/hashmap clean: rm -f ./build/squel $(OBJ) diff --git a/perf/results/count.csv b/perf/results/count.csv index aaf89ab..5af0628 100644 --- a/perf/results/count.csv +++ b/perf/results/count.csv @@ -1,7 +1,7 @@ filetype;records;time -CSV;100000;0:00.05 +CSV;100000;0:00.03 TDB;100000;0:00.00 -CSV;1000000;0:00.46 +CSV;1000000;0:00.30 TDB;1000000;0:00.04 -Command terminated by signal 2 -CSV;10000000;0:01.30 +CSV;10000000;0:03.36 +TDB;10000000;0:00.36 diff --git a/perf/results/join.csv b/perf/results/join.csv index 5d44775..7760768 100644 --- a/perf/results/join.csv +++ b/perf/results/join.csv @@ -1,9 +1,9 @@ filetype;records_left;records_right;time -CSV;10000;100;0:00.06 -TDB;10000;100;0:00.04 -CSV;10000;1000;0:00.40 -TDB;10000;1000;0:00.39 -CSV;100000;100;0:00.45 -TDB;100000;100;0:00.38 -CSV;100000;1000;0:03.93 -TDB;100000;1000;0:03.80 +CSV;10000;100;0:00.05 +TDB;10000;100;0:00.05 +CSV;10000;1000;0:00.50 +TDB;10000;1000;0:00.51 +CSV;100000;100;0:00.55 +TDB;100000;100;0:00.50 +CSV;100000;1000;0:05.08 +TDB;100000;1000;0:05.01 diff --git a/src/bufferpool/bufferpool.c b/src/bufferpool/bufferpool.c deleted file mode 100644 index d843741..0000000 --- a/src/bufferpool/bufferpool.c +++ /dev/null @@ -1,84 +0,0 @@ -#include "../include/bufferpool/bufferpool.h" -#include "../include/planner/planner.h" - - -void growBufferpoolIfNeedBe(size_t size) { - - if (buffpool->used + (long) size < buffpool->capacity) return; - long oldCapacity = buffpool->capacity; - buffpool->capacity *= 2; - buffpool->pool = realloc(buffpool->pool, buffpool->capacity); - checkPtrNotNull(buffpool->pool, "Could not allocate memory for bufferpool"); - memset(buffpool->pool + oldCapacity, 0, oldCapacity); - -} - -void copyToBufferPool(int destinationoffset, void* source, size_t size) { - growBufferpoolIfNeedBe(size); - void* destination = getTuple(destinationoffset); - memcpy(destination, source, size); -} - -int addToBufferPoolFromOffset(int originOffset, size_t size) { - growBufferpoolIfNeedBe(size); - void* target = getNextFreeSlot(); - memcpy(target, getTuple(originOffset), size); - int offset = buffpool->used; - buffpool->used += size; - return offset; -} - - -int addToBufferPool(void* source, size_t size) { - growBufferpoolIfNeedBe(size); - void* target = getNextFreeSlot(); - memcpy(target, source, size); - int offset = buffpool->used; - buffpool->used += size; - return offset; -} - -void reserveSpaceBufferpool(int offset, size_t size) { - growBufferpoolIfNeedBe(size); - void* from = getTuple(offset); - memset(from, 0, size); - buffpool->used += size; -} - - -int getCurrentOffset() { return buffpool->used; } - -void* getCol(int pooloffset, size_t colOffset) { - return buffpool->pool + pooloffset + colOffset; -} - -void* getTuple(int pooloffset) { - return buffpool->pool + pooloffset; -} - -void* getNextFreeSlot() { - return buffpool->pool + buffpool->used; -} - -void getColAsChar(char* target, int pooloffset, size_t colOffset, Datatype type) { - if (type == DTYPE_STR) { - strcpy(target, getCol(pooloffset, colOffset)); - return; - } - if (type == DTYPE_INT) { - char tmp[CHARMAXSIZE]; - sprintf(tmp, "%d", *(int*) getCol(pooloffset, colOffset)); - memcpy(target, tmp, strlen(tmp)); - return; - } - if (type == DTYPE_LONG) { - char tmp[CHARMAXSIZE]; - sprintf(tmp, "%ld", *(long*) getCol(pooloffset, colOffset)); - memcpy(target, tmp, strlen(tmp)); - return; - } - printf("Don't know how to represent type %d as char\n", type); - exit(1); -} - - diff --git a/src/executor/executeStatement.c b/src/executor/executeStatement.c index ace3cca..e6fbd79 100644 --- a/src/executor/executeStatement.c +++ b/src/executor/executeStatement.c @@ -16,6 +16,9 @@ void executeStatement(Node* node) { case STMTINSERT: executeInsert(node); break; + case STMTEXPLAIN: + executeExplain(node); + break; default: printf("Don't know how execute statement of type %d\n", node->type); exit(1); diff --git a/src/executor/executor.c b/src/executor/executor.c index 726676e..053caa8 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -1,7 +1,7 @@ #include "../include/executor/executor.h" +#include "../include/executor/tuple.h" -Bufferpool* buffpool; void assignGetTupleFunction(Operator *op) { @@ -25,19 +25,19 @@ void assignGetTupleFunction(Operator *op) { case (OP_JOIN): op->getTuple = &joinGetTuple; break; + case (OP_HASHJOIN): + op->getTuple = &hashjoinGetTuple; + break; case (OP_AGGREGATE): op->getTuple = &aggregateGetTuple; break; default: - printf("Don't know how to handle op-type %d\n", op->type); + printf("EXECUTOR-error: Don't know how to handle op-type %d\n", op->type); exit(1); } } - - - void doAssignGetTupleFunction(Operator* p_op) { if (p_op == NULL) { @@ -50,23 +50,19 @@ void doAssignGetTupleFunction(Operator* p_op) { doAssignGetTupleFunction(p_op->child); } - if (p_op->type == OP_JOIN) { + if (p_op->type == OP_JOIN || p_op->type == OP_HASHJOIN) { doAssignGetTupleFunction(p_op->info.join.left); doAssignGetTupleFunction(p_op->info.join.right); } } -void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset)) { +void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) { if (op == NULL) { return; } - buffpool = calloc(1, sizeof(Bufferpool)); - buffpool->pool = calloc(BUFFERPOOLSIZE, 1); - buffpool->capacity = BUFFERPOOLSIZE; - buffpool->used = 0; doAssignGetTupleFunction(op); @@ -86,14 +82,15 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffs } // Get tuples one by one - int offset; + Tuple* tpl = initTupleOfSize(TUPLESIZE); for (;;) { - offset = op->getTuple(op); - if (offset == -1) break; + op->getTuple(op, tpl); + if (isTupleEmpty(tpl)) break; + + tupleHandler(tpl); - tupleHandler(offset); }; + freeTuple(tpl); + - free(buffpool->pool); - free(buffpool); } diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c new file mode 100644 index 0000000..8314855 --- /dev/null +++ b/src/executor/statements/explain.c @@ -0,0 +1,69 @@ +#include "../../include/executor/executor.h" + +void printOp(Operator* op) { + + switch (op->type) { + case OP_SCANTDB: + printf("OP_SCANTDB"); + break; + case OP_SCAN: + printf("OP_SCAN"); + break; + case OP_PROJECT: + printf("OP_PROJECT"); + break; + case OP_FILTER: + printf("OP_FILTER"); + break; + case OP_JOIN: + printf("OP_JOIN"); + break; + case OP_AGGREGATE: + printf("OP_AGGREGATE"); + break; + case OP_HASHJOIN: + printf("OP_HASHJOIN"); + break; + default: + printf("EXPLAIN-error: Unknown operation type"); + break; + } + +} + +void explainOp(Operator* op) { + + if (!op) return; + + printOp(op); + printf("\n"); + + if (op->type == OP_FILTER) { + explainOp(op->info.filter.next); + } + + if (op->type == OP_JOIN || op->type == OP_HASHJOIN) { + explainOp(op->info.join.filter); + explainOp(op->info.join.left); + explainOp(op->info.join.right); + } + + if (op->child) { + explainOp(op->child); + } +} + + +void executeExplain(Node* node) { + + /* Plan the query */ + Operator* queryplan = planQuery(node->next); + + /* Print the query plan */ + printf("******* EXPLAIN **********\n"); + explainOp(queryplan); + printf("**************************\n"); + + freeQueryplan(queryplan); +} + diff --git a/src/executor/statements/insert.c b/src/executor/statements/insert.c index c443c70..277725f 100644 --- a/src/executor/statements/insert.c +++ b/src/executor/statements/insert.c @@ -5,14 +5,14 @@ size_t tupleSize = 0; FILE* f = NULL; -void handleTupleInsert(int offset) { +void handleTupleInsert(Tuple* tpl) { if (f == NULL) { printf("No file to insert to\n"); exit(1); } - size_t bytesWritten = fwrite(getTuple(offset), tupleSize, 1, f); + size_t bytesWritten = fwrite(tpl->data, tupleSize, 1, f); assert(bytesWritten > 0); } diff --git a/src/executor/tuple.c b/src/executor/tuple.c new file mode 100644 index 0000000..90b29ac --- /dev/null +++ b/src/executor/tuple.c @@ -0,0 +1,36 @@ +#include "../include/executor/tuple.h" + + +Tuple* initTuple() { + Tuple* tpl = malloc(sizeof(Tuple)); + tpl->size = 0; + return tpl; +} + + +Tuple* initTupleOfSize(size_t p_size) { + Tuple* tpl = malloc(sizeof(Tuple)); + tpl->data = calloc(1, p_size); + tpl->size = p_size; + return tpl; +} + + +void* getTupleCol(Tuple* tpl, size_t colOffset) { + return tpl->data + colOffset; +} + +void freeTuple(Tuple* tpl) { + if (tpl->data) { + free(tpl->data); + } + free(tpl); +} + +size_t isTupleEmpty(Tuple* tpl) { + return tpl->size == 0 ? 1 : 0; +} + +void markTupleAsEmpty(Tuple* tpl) { + tpl->size = 0; +} \ No newline at end of file diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c new file mode 100644 index 0000000..74080fa --- /dev/null +++ b/src/executor/tuplebuffer.c @@ -0,0 +1,76 @@ +#include "../include/executor/tuplebuffer.h" + + + +TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize) { + TupleBuffer* buff = malloc(sizeof(TupleBuffer)); + buff->capacity = p_capacity; + buff->tupledatasize = p_tuplesize; + buff->tuples = malloc(p_capacity * sizeof(Tuple)); + buff->data = malloc(p_capacity * p_tuplesize); + buff->size = 0; + buff->cursor = 0; + return buff; +} + + +void resizeTupleBuffer(TupleBuffer* buff) { + + buff->capacity *= 2; + + Tuple* tmpTpl = realloc(buff->tuples, buff->capacity * sizeof(Tuple)); + + if (tmpTpl == NULL) { + printf("ERROR: Could resize tuplebuffer from %ld to %ld\n", buff->capacity, buff->capacity * 2); + exit(1); + } + + buff->tuples = tmpTpl; + + void* tmpData = realloc(buff->data, buff->capacity * buff->tupledatasize); + + if (tmpData == NULL) { + printf("ERROR: Could resize tuplebuffer data from %ld to %ld\n", buff->capacity * buff->tupledatasize, buff->capacity * buff->tupledatasize * 2); + exit(1); + } + + buff->data = tmpData; +} + + +Tuple* getTupleFromBuffer(TupleBuffer* buff) { + + if (buff->size >= (buff->capacity-1)) { + resizeTupleBuffer(buff); + } + + Tuple* tpl = &buff->tuples[buff->size++]; + tpl->size = buff->tupledatasize; + tpl->data = buff->data + buff->cursor; + buff->cursor += buff->tupledatasize; + return tpl; +} + +void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx) { + tpl->data = buff->data + (idx * buff->tupledatasize); +} + + +void freeTupleBuffer(TupleBuffer* buff) { + free(buff->tuples); + free(buff->data); + free(buff); +} + + +Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) { + updateTupleDataptr(buff, &buff->tuples[idx], idx); + return &buff->tuples[idx]; +} + +size_t isTupleBufferEmpty(TupleBuffer* buff) { + if (buff->size > 0) { + return 0; + } + return 1; +} \ No newline at end of file diff --git a/src/include/bufferpool/bufferpool.h b/src/include/bufferpool/bufferpool.h deleted file mode 100644 index abc0797..0000000 --- a/src/include/bufferpool/bufferpool.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once -#include -#include "../const.h" -#include "../parser/parsetree.h" - -/* - The bufferpool maintains a pool for tuples - to which tuples can be added and removed (freed) from. - - A tuple is struct with a fixed size despite - the fact that the data is likely not fixed in size. - - A tuple contains it's data as a string. Columns - are pointers to the string. - - The rest of the system passes around pointers to the buffer pool. - -*/ - - -typedef struct { - void* pool; - long capacity; - long used; -} Bufferpool; - -extern Bufferpool* buffpool; - - -void* getNextFreeSlot(); -void getColAsChar(char* target, int pooloffset, size_t colIdx, Datatype type); -void copyToBufferPool(int destinationoffset, void* source, size_t size); -int addToBufferPool(void* source, size_t size); -int addToBufferPoolFromOffset(int offset, size_t size); -void reserveSpaceBufferpool(int offset, size_t size); - -int getCurrentOffset(); - -void* getTuple(int pooloffset); -void* getCol(int pooloffset, size_t colOffset); \ No newline at end of file diff --git a/src/include/const.h b/src/include/const.h index 74e230e..287f302 100644 --- a/src/include/const.h +++ b/src/include/const.h @@ -14,9 +14,10 @@ #define JOINTUPLESIZE 1000 #define SCANTUPLESIZE 2000 -// Bufferpool -#define BUFFERPOOLSIZE 100000 -#define JOINPTRBUFFER 100000 +// Defaults for query execution +#define JOINBUFFSIZE 100000 +#define TUPLESIZE 500 +#define HTSIZE 3000 // Define max size (in chars) of expressions and query #define MAXQUERYSIZE 1000 diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 6cb1b0c..58d11da 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -4,8 +4,8 @@ #include "../const.h" #include "../parser/parsetree.h" #include "../planner/planner.h" -#include "../bufferpool/bufferpool.h" #include "../operators/join.h" +#include "../operators/hashjoin.h" #include "../operators/filter.h" #include "../operators/scan.h" #include "../operators/scanTDB.h" @@ -13,13 +13,11 @@ #include "../operators/aggregate.h" #include "../io/tdb.h" - extern char *buffercache; extern char *bufferscan; -extern Bufferpool* buffpool; - -void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset)); +void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)); void executeStatement(Node* node); void executeCreateTable(Node* node); -void executeInsert(Node* node); \ No newline at end of file +void executeInsert(Node* node); +void executeExplain(Node* node); \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h new file mode 100644 index 0000000..935d03b --- /dev/null +++ b/src/include/executor/tuple.h @@ -0,0 +1,23 @@ +#pragma once +#include +#include + +typedef struct { + void* data; + size_t size; +} Tuple; + + + + +Tuple* initTuple(); + +Tuple* initTupleOfSize(size_t p_size); + +void* getTupleCol(Tuple* tpl, size_t colOffset); + +void freeTuple(Tuple* tpl); + +size_t isTupleEmpty(Tuple* tpl); + +void markTupleAsEmpty(Tuple* tpl); diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h new file mode 100644 index 0000000..4dc2ce2 --- /dev/null +++ b/src/include/executor/tuplebuffer.h @@ -0,0 +1,21 @@ +#pragma once +#include "tuple.h" +#include + +typedef struct { + Tuple* tuples; + void* data; + size_t cursor; + size_t tupledatasize; + size_t size; + size_t capacity; +} TupleBuffer; + +TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize); +void resizeTupleBuffer(TupleBuffer* buff); +void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff); +void freeTupleBuffer(TupleBuffer* buff); +void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx); +Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx); +Tuple* getTupleFromBuffer(TupleBuffer* buff); +size_t isTupleBufferEmpty(TupleBuffer* buff); diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h index 82060cc..f0586a9 100644 --- a/src/include/operators/aggregate.h +++ b/src/include/operators/aggregate.h @@ -1,7 +1,8 @@ #pragma once #include -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" -int aggregateGetTuple(Operator* op); \ No newline at end of file + +void aggregateGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h index 17c5eb1..f1582ea 100644 --- a/src/include/operators/filter.h +++ b/src/include/operators/filter.h @@ -1,7 +1,7 @@ #pragma once #include -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" +#include "../executor/tuple.h" -int filterGetTuple(Operator* op); -bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op); \ No newline at end of file +void filterGetTuple(Operator* op, Tuple* tpl); +bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op); \ No newline at end of file diff --git a/src/include/operators/hashjoin.h b/src/include/operators/hashjoin.h new file mode 100644 index 0000000..24a5d86 --- /dev/null +++ b/src/include/operators/hashjoin.h @@ -0,0 +1,10 @@ +#pragma once +#include "../planner/planner.h" +#include "../executor/executor.h" +#include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" +#include "./join.h" +#include "../util/hashmap.h" + + +void hashjoinGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/join.h b/src/include/operators/join.h index d6e7983..2a6fbec 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -1,6 +1,9 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" -int joinGetTuple(Operator* op); \ No newline at end of file + +void joinGetTuple(Operator* op, Tuple* tpl); +void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right); \ No newline at end of file diff --git a/src/include/operators/project.h b/src/include/operators/project.h index ad551fb..e4b4a10 100644 --- a/src/include/operators/project.h +++ b/src/include/operators/project.h @@ -1,5 +1,5 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" +#include "../executor/tuple.h" -int projectGetTuple(Operator* op); \ No newline at end of file +void projectGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h index 1e223c0..da6ad44 100644 --- a/src/include/operators/scan.h +++ b/src/include/operators/scan.h @@ -1,6 +1,6 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" -int scanGetTuple(Operator* op); \ No newline at end of file +void scanGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h index 692df5f..f34c20d 100644 --- a/src/include/operators/scanTDB.h +++ b/src/include/operators/scanTDB.h @@ -1,7 +1,8 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" #include -int scanTDBGetTuple(Operator* op); \ No newline at end of file +void scanTDBGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/parser/parsetree.h b/src/include/parser/parsetree.h index 7b6e84f..0482b18 100644 --- a/src/include/parser/parsetree.h +++ b/src/include/parser/parsetree.h @@ -32,6 +32,7 @@ enum nodeType { AND, STMTCREATE, STMTINSERT, + STMTEXPLAIN, TABLE, OR }; diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index d99a45f..01f86ea 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -6,6 +6,9 @@ #include "../io/tdb.h" #include "../parser/utils.h" #include "../parser/parsetree.h" +#include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" +#include "../util/hashmap.h" typedef enum { @@ -14,7 +17,8 @@ typedef enum { OP_PROJECT, OP_FILTER, OP_JOIN, - OP_AGGREGATE + OP_AGGREGATE, + OP_HASHJOIN } OperatorType; typedef enum ComparisonType { @@ -98,11 +102,11 @@ typedef struct { struct Operator* left; struct Operator* right; struct Operator* filter; - int lastTupleOffset; - int filterTupleOffset; - int rightTuples[JOINPTRBUFFER]; - int rightTupleIdx; - int rightTupleCount; + Hashmap* hashmap; + TupleBuffer* rightTuples; + Tuple* leftTuple; + size_t rightTupleIdx; + size_t rightTupleCount; bool rightTuplesCollected; } JoinInfo; @@ -127,7 +131,7 @@ typedef struct Operator { ResultSet resultDescription; int iteratorTupleOffset; struct Operator* child; - int (*getTuple) (struct Operator* op); + void (*getTuple) (struct Operator* op, Tuple* tpl); } Operator; void freeQueryplan(Operator *node); diff --git a/src/include/squel.h b/src/include/squel.h new file mode 100644 index 0000000..d72a828 --- /dev/null +++ b/src/include/squel.h @@ -0,0 +1,16 @@ +#pragma once + + +typedef enum { + OPT_HTSIZE +} Option; + +typedef struct Options { + size_t htsize; +} Options; + +extern Options* OPTIONS; + +Options* initOptions(); + +size_t getOption(Option opt); \ No newline at end of file diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h new file mode 100644 index 0000000..97a3bff --- /dev/null +++ b/src/include/util/hashmap.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include +#include + + + +typedef struct MapNode { + char key[100]; // TODO no magic; + size_t values[10000]; + size_t obs; + size_t cursor; + struct MapNode* next; +} MapNode; + + +typedef struct { + MapNode* data; + size_t table_size; +} Hashmap; + + +Hashmap* initHashmap(size_t table_size); +void insertToHashmap(Hashmap* map, const char* key, size_t value); +size_t isInHashmap(Hashmap* map, const char* value); +size_t _isInHashmap(Hashmap* map, MapNode* node, const char* key); +void freeHashmap(Hashmap* map); +size_t getValueFromHashmap(Hashmap* map, const char* key); +size_t _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key); +void resetCursor(Hashmap* map, const char* key); +void _tryInsert(Hashmap* map, const char* key, size_t value, MapNode* node); + +unsigned int hash(const char *key, size_t table_size); \ No newline at end of file diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index dcacb28..1742aa8 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -1,103 +1,31 @@ #include "../include/operators/aggregate.h" -long doCount(Operator* opToIterate) { - int offset = opToIterate->getTuple(opToIterate); - int result = 0; - while (offset >= 0) { - offset = opToIterate->getTuple(opToIterate); - result++; - }; - - return result; +long count(long result, long num __attribute__((unused))) { + return result + 1; } -long doAverage(Operator* opToIterate, size_t colOffset) { - - - int offset = 0; - long sum = 0; - long count = 0; - - for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { - break; - } - sum += *(long*) getCol(offset,colOffset); - count++; - }; - long result = 0.0; - if (count > 0) { - result = sum / (double) count; - } - return result; +long max(long result, long num) { + return num > result ? num : result; } -long doSum(Operator* opToIterate, size_t colOffset) { - - - int offset = 0; - long long result = 0; - - for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { - break; - } - result += *(long*) getCol(offset,colOffset); - - }; - - return result; +long sum(long result, long num) { + return num + result; } -long doMax(Operator* opToIterate, size_t colOffset) { - - - int offset = 0; - long result = 0, tmp = 0; - - for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { - break; - } - tmp = *(long*) getCol(offset,colOffset); - result = tmp > result ? tmp : result; - - }; - - return result; +long min(long result, long num) { + return num < result ? num : result; } -long doMin(Operator* opToIterate, size_t colOffset) { - int offset = 0; - long result = __LONG_MAX__, tmp = 0; - - for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { - break; - } - tmp = *(long*) getCol(offset,colOffset); - result = tmp < result ? tmp : result; - - }; - - return result; -} - - - -int aggregateGetTuple(Operator* op) { +void aggregateGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op->child, "OP_AGGREGATE has no child."); checkPtrNotNull(op->child->getTuple, "Child of OP_AGGREGATE has no getTuple-method."); if (op->info.aggregate.aggregationDone) { - return -1; + markTupleAsEmpty(tpl); + return; } // TODO: @@ -107,35 +35,64 @@ int aggregateGetTuple(Operator* op) { // } - // Build new tuple to store result + size_t colOffset = op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]; + + + long (*agg_fun)(long result, long num); + long result = 0, tmp = 0; - long result = 0; switch(op->info.aggregate.aggtype) { case COUNT: - result = doCount(op->child); + agg_fun = count; break; case SUM: - result = doSum(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); + agg_fun = sum; break; case AVG: - result = doAverage(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); + agg_fun = sum; // See below why break; case MAX: - result = doMax(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); + agg_fun = max; break; case MIN: - result = doMin(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); + agg_fun = min; + result = __LONG_MAX__; break; default: printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype); exit(1); } + + size_t observations = 0; + + Tuple* tmpTpl = initTupleOfSize(TUPLESIZE); + + for (;;) { + + op->child->getTuple(op->child, tmpTpl); + if (isTupleEmpty(tmpTpl)) { + break; + } + tmp = *(long*) (tmpTpl->data + colOffset); + result = agg_fun(result, tmp); + observations++; + }; + + freeTuple(tmpTpl); + + + if (op->info.aggregate.aggtype == AVG) { + result = result / observations; + } + + op->resultDescription.columnCount = 1; op->resultDescription.pCols[0] = 0; op->info.aggregate.aggregationDone = true; + + *(long*)(tpl->data) = result; - return addToBufferPool(&result, sizeof(result)); } diff --git a/src/operators/filter.c b/src/operators/filter.c index 475b1ab..1d8b587 100644 --- a/src/operators/filter.c +++ b/src/operators/filter.c @@ -1,13 +1,13 @@ #include "../include/operators/filter.h" -bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op) { +bool evaluateTupleAgainstFilterOp(Tuple* tpl1, Tuple* tpl2, Operator* op) { - if (poolOffset1 == -1) { + if (tpl1 == NULL) { return false; } - if (poolOffset2 == -1) { + if (tpl2 == NULL) { return false; } @@ -44,18 +44,18 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op switch (dtype1) { case DTYPE_STR: cmpRes = strcmp( - (char*) getCol(poolOffset1,idx1Offset), - (char*) getCol(poolOffset2,idx2Offset) + (char*) getTupleCol(tpl1,idx1Offset), + (char*) getTupleCol(tpl2,idx2Offset) ); break; case DTYPE_INT: - int number1 = *(int*) getCol(poolOffset1,idx1Offset); - int number2 = *(int*) getCol(poolOffset2,idx2Offset); + int number1 = *(int*) getTupleCol(tpl1,idx1Offset); + int number2 = *(int*) getTupleCol(tpl2,idx2Offset); cmpRes = number1 - number2; break; case DTYPE_LONG: - long lnumber1 = *(long*) getCol(poolOffset1,idx1Offset); - long lnumber2 = *(long*) getCol(poolOffset2,idx2Offset); + long lnumber1 = *(long*) getTupleCol(tpl1,idx1Offset); + long lnumber2 = *(long*) getTupleCol(tpl2,idx2Offset); cmpRes = lnumber1 - lnumber2; break; default: @@ -87,14 +87,14 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op Datatype constDatatype = dtype2; size_t colOffset = idx1Offset; size_t constIdx = 2; - int poolOffset = poolOffset1; + Tuple* tpl = tpl1; if (compType == CMP_CONST_COL) { // Guess was wrong, fix it constDatatype = dtype1; constIdx = 0; colOffset = idx2Offset; - poolOffset = poolOffset2; + tpl = tpl2; } // Now we have to only deal with 4 combinations of all the eight possible // 'cause datatypes must match @@ -105,10 +105,10 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op // DTYPE_INT vs. IDENT_COL + NUMBER switch (constDatatype) { case DTYPE_STR: - cmpRes = strcmp(op->info.filter.charConstants[constIdx], getCol(poolOffset,colOffset)); + cmpRes = strcmp(op->info.filter.charConstants[constIdx], getTupleCol(tpl,colOffset)); break; case DTYPE_LONG: - long colNumber = *(long*) getCol(poolOffset,colOffset); + long colNumber = *(long*) getTupleCol(tpl,colOffset); long constNumber = (long) op->info.filter.numConstants[constIdx]; // Order matters here if (constIdx == 0) { @@ -145,7 +145,7 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op return matches; } -bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op) { +bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op) { bool rtrnValue = true, result = true; @@ -155,7 +155,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* while (p_op != NULL) { - result = evaluateTupleAgainstFilterOp(poolOffset1, poolOffset2, p_op); + result = evaluateTupleAgainstFilterOp(tpl1, tpl2, p_op); switch (boolOp) { case AND: @@ -176,7 +176,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* return rtrnValue; } -int filterGetTuple(Operator* op) { +void filterGetTuple(Operator* op, Tuple* tpl) { if (op == NULL) { printf("FILTER_OP: Passed a NULL-pointer to filterGetTuple\n"); @@ -197,21 +197,17 @@ int filterGetTuple(Operator* op) { exit(1); } - - int poolOffset = 0; - while (true) { /* Get new tuples until found something that passes the filter */ - poolOffset = op->child->getTuple(op->child); + op->child->getTuple(op->child, tpl); - if (poolOffset == -1) { - return -1; + if (isTupleEmpty(tpl)) { + break; } - if (evaluateTuplesAgainstFilterOps(poolOffset, poolOffset, op)) break; + if (evaluateTuplesAgainstFilterOps(tpl, tpl, op)) break; } - return poolOffset; } diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c new file mode 100644 index 0000000..eede747 --- /dev/null +++ b/src/operators/hashjoin.c @@ -0,0 +1,98 @@ +#include "../include/operators/hashjoin.h" +#include "../include/squel.h" + + +void hashjoinGetTuple(Operator* op, Tuple* tpl) { + if ( + op->info.join.left == NULL || + op->info.join.right == NULL + ) { + printf("Join left or right operator is NULL\n"); + exit(1); + } + + int joinColIdx = op->info.join.filter->info.filter.boolExprList[2]; + int joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; + + if (!op->info.join.hashmap) { + op->info.join.hashmap = initHashmap(getOption(OPT_HTSIZE)); + op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); + } + + + Tuple* rightTuple; + const char* joinValue; + + // This is only entered first time the operator is called + while (!op->info.join.rightTuplesCollected) { + + rightTuple = getTupleFromBuffer(op->info.join.rightTuples); + + op->info.join.right->getTuple(op->info.join.right, rightTuple); + + if (isTupleEmpty(rightTuple)) { + op->info.join.rightTuplesCollected = true; + continue; + } + // Get value of join column + joinValue = (const char*) getTupleCol(rightTuple, joinColOffset); + + insertToHashmap(op->info.join.hashmap, joinValue, op->info.join.rightTupleCount); + + op->info.join.rightTupleCount++; + } + + + // Join + + if (op->info.join.leftTuple == NULL) { + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + } + + if (isTupleEmpty(op->info.join.leftTuple)) { + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + } + + joinColIdx = op->info.join.filter->info.filter.boolExprList[0]; + joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; + + + int tupleIdx; + do { + joinValue = (const char*) getTupleCol(op->info.join.leftTuple, joinColOffset); + + if (!isInHashmap(op->info.join.hashmap, joinValue)) { + resetCursor(op->info.join.hashmap, joinValue); + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + continue; + } + + + tupleIdx = getValueFromHashmap(op->info.join.hashmap, joinValue); + if (tupleIdx < 0) continue; + + rightTuple = getTupleByIndex(op->info.join.rightTuples, tupleIdx); + + // Create a new tuple by concating the tuples + concatTuples( + tpl, + op->info.join.leftTuple, + rightTuple, + &op->info.join.left->resultDescription, + &op->info.join.right->resultDescription + ); + + return; + + } while (!isTupleEmpty(op->info.join.leftTuple)); + + // Join complete, we can free the buffer and the tuples associated + freeTupleBuffer(op->info.join.rightTuples); + freeTuple(op->info.join.leftTuple); + + freeHashmap(op->info.join.hashmap); + markTupleAsEmpty(tpl); + +} + + diff --git a/src/operators/join.c b/src/operators/join.c index 69b5076..8e629da 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -1,7 +1,7 @@ #include "../include/operators/join.h" -void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* left, ResultSet* right) { +void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right) { if ( left == NULL || @@ -11,16 +11,12 @@ void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* lef exit(1); } - void* address = getTuple(tupleOffset); - - memset(address, 0, left->size + right->size); - memcpy(address, getTuple(leftOffset), left->size); - memcpy(address + left->size, getTuple(rightOffset), right->size); + memcpy(returnTpl->data, leftTpl->data, left->size); + memcpy(returnTpl->data + left->size, rightTpl->data, right->size); } -int joinGetTuple(Operator* op) { - +void joinGetTuple(Operator* op, Tuple* tpl) { if ( op->info.join.left == NULL || op->info.join.right == NULL @@ -37,83 +33,76 @@ int joinGetTuple(Operator* op) { We store one of the tables in the join in memory. Which is why the tuples from the right table are copied - to the buffer pool. Their original location will be - rewritten by child operators iterating over tuples. - - + to a buffer. */ - - - int rightTupleOffset = 0, originalOffset; - // Reuse this and only create a new tuple if it passes the filter - int offset = 0; - - // Reserve space from the buffer pool so that we can concatenate tuples - if (op->info.join.filterTupleOffset == -1) { - op->info.join.filterTupleOffset = getCurrentOffset(); - reserveSpaceBufferpool(op->info.join.filterTupleOffset, JOINTUPLESIZE); - } + if (!op->info.join.rightTuples) { + op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); + } + Tuple* rightTuple; // This is only entered first time the operator is called while (!op->info.join.rightTuplesCollected) { - - originalOffset = op->info.join.right->getTuple(op->info.join.right); + + rightTuple = getTupleFromBuffer(op->info.join.rightTuples); + + op->info.join.right->getTuple(op->info.join.right, rightTuple); - if (originalOffset == -1) { + if (isTupleEmpty(rightTuple)) { op->info.join.rightTuplesCollected = true; - op->info.join.lastTupleOffset = -1; - op->info.join.rightTupleIdx = 0; - continue; + continue; } - rightTupleOffset = addToBufferPoolFromOffset(originalOffset, op->info.join.right->resultDescription.size); - - op->info.join.rightTuples[op->info.join.rightTupleIdx++] = rightTupleOffset; op->info.join.rightTupleCount++; + } - if (op->info.join.rightTupleCount >= JOINPTRBUFFER) { - printf("Can't fit the right table in the query into joinbuffer. Increase JOINPTRBUFFER\n"); - exit(1); - } + + // Nested join loop + // For each tuple if left relation + // For each tuple in right relation + // if join_predicates(left,right) return tuple(left,right) + + if (op->info.join.leftTuple == NULL) { + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + } + + if (isTupleEmpty(op->info.join.leftTuple)) { + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); } - // Join loop do { if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) { op->info.join.rightTupleIdx = 0; - op->info.join.lastTupleOffset = -1; - } - - if (op->info.join.lastTupleOffset == -1) { - offset = op->info.join.left->getTuple(op->info.join.left); - if (offset == -1) { - return -1; + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + if (isTupleEmpty(op->info.join.leftTuple)) { + break; } - op->info.join.lastTupleOffset = offset; + + continue; } - rightTupleOffset = op->info.join.rightTuples[op->info.join.rightTupleIdx++]; + rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++); - - if (evaluateTuplesAgainstFilterOps(op->info.join.lastTupleOffset, rightTupleOffset, op->info.join.filter)) { + if (evaluateTuplesAgainstFilterOps(op->info.join.leftTuple, rightTuple, op->info.join.filter)) { // Create a new tuple by concating the tuples concatTuples( - op->info.join.filterTupleOffset, - op->info.join.lastTupleOffset, - rightTupleOffset, + tpl, + op->info.join.leftTuple, + rightTuple, &op->info.join.left->resultDescription, &op->info.join.right->resultDescription ); - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(getTuple(op->info.join.filterTupleOffset), op->resultDescription.size); - } else { - copyToBufferPool(op->iteratorTupleOffset, getTuple(op->info.join.filterTupleOffset), op->resultDescription.size); - - } - return op->iteratorTupleOffset; + + return; } - } while(true); - + } while(!isTupleEmpty(op->info.join.leftTuple)); + + // Join complete, we can free the buffer and the tuples associated + freeTupleBuffer(op->info.join.rightTuples); + freeTuple(op->info.join.leftTuple); + markTupleAsEmpty(tpl); + } + + diff --git a/src/operators/project.c b/src/operators/project.c index 009f6ff..2a498aa 100644 --- a/src/operators/project.c +++ b/src/operators/project.c @@ -1,6 +1,6 @@ #include "../include/operators/project.h" -int projectGetTuple(Operator* op) { +void projectGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op->child, "OP_PROJECT has no child"); checkPtrNotNull(op->child->getTuple, "Child of OP_PROJECT has no getTuple-method"); @@ -13,11 +13,5 @@ int projectGetTuple(Operator* op) { This is an unfortunate extra function call :( */ - int pooloffset = op->child->getTuple(op->child); - - if (pooloffset == -1) { - return -1; - } - - return pooloffset; + op->child->getTuple(op->child, tpl); } \ No newline at end of file diff --git a/src/operators/scan.c b/src/operators/scan.c index 40082d9..ea34d39 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -1,6 +1,6 @@ #include "../include/operators/scan.h" -int scanGetTuple(Operator* op) { +void scanGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op, "NULL pointer passed to scanGetTuple"); @@ -24,7 +24,8 @@ int scanGetTuple(Operator* op) { if (line == NULL) { free(lineBuffer); fclose(op->info.scan.tablefile); - return -1; + markTupleAsEmpty(tpl); + return; } @@ -44,7 +45,7 @@ int scanGetTuple(Operator* op) { size_t tplSize = 0; - void* diskBuffer = calloc(1, SCANTUPLESIZE); + void* diskBuffer = tpl->data; void* diskBufferCursor = diskBuffer; checkPtrNotNull(diskBuffer, "could not allocate buffer for scan"); @@ -117,14 +118,6 @@ int scanGetTuple(Operator* op) { i++; }; - // Write to bufferpool - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(diskBuffer, tplSize); - } else { - copyToBufferPool(op->iteratorTupleOffset, diskBuffer, tplSize); - } - - // // ---------------- Useful for debuggin. Leave it be for a while ------------------ // tpldata = diskBuffer; // printf("tpldata at: ", diskBuffer); @@ -147,8 +140,5 @@ int scanGetTuple(Operator* op) { op->resultDescription.size = tplSize; free(lineBuffer); - free(diskBuffer); - - return op->iteratorTupleOffset; } diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c index 88dfd0b..5a44475 100644 --- a/src/operators/scanTDB.c +++ b/src/operators/scanTDB.c @@ -29,28 +29,26 @@ void fillBuffer(Operator* op) { } -int scanTDBGetTuple(Operator* op) { +void scanTDBGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op, "NULL pointer passed to scanTDBGetTuple"); if (op->info.scan.fileRead && op->info.scan.recordsInBuffer == 0) { free(op->info.scan.buffer); - return -1; + markTupleAsEmpty(tpl); + return; } if (op->info.scan.recordsInBuffer == 0) { fillBuffer(op); - return scanTDBGetTuple(op); + scanTDBGetTuple(op, tpl); + return; } size_t bufferDataOffset = (op->info.scan.recordsInBuffer - 1) * op->info.scan.recordSize; op->info.scan.recordsInBuffer--; - // Write to bufferpool - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); - } else { - copyToBufferPool(op->iteratorTupleOffset, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); - } - return op->iteratorTupleOffset; + + memcpy(tpl->data, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); + } \ No newline at end of file diff --git a/src/parser/parser.c b/src/parser/parser.c index 9c85243..b45325d 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -442,6 +442,13 @@ void insert() { } +void explain() { + keyword("EXPLAIN", STMTEXPLAIN); + skipWhite(); + query(); +} + + size_t parse(char* input, Node* p_root) { root = p_root; @@ -451,6 +458,12 @@ size_t parse(char* input, Node* p_root) { qsize = strlen(rawSql); getNextChar(); + + if (peekWordMatches("EXPLAIN")) { + explain(); + return nodeCount; + } + if (peekWordMatches("CREATE")) { create(); return nodeCount; diff --git a/src/planner/operators/join.c b/src/planner/operators/join.c index 52b7354..e586650 100644 --- a/src/planner/operators/join.c +++ b/src/planner/operators/join.c @@ -40,6 +40,28 @@ Operator* makeJoinFilterOps( return filterOps; } +OperatorType deduceJoinType(Operator* filterOp) { + // Atm we can do a hash join + // if and only if: + // - There's only one join condition + // - The condition is an equality comparison + + if (filterOp->info.filter.next) { + return OP_JOIN; + } + + if (filterOp->info.filter.boolExprListSize < 3) { + return OP_JOIN; + } + + if (filterOp->info.filter.boolExprList[1] != -1) { + return OP_JOIN; + } + + return OP_HASHJOIN; + +} + Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { @@ -54,12 +76,10 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { Operator* opJoin = (Operator*) calloc(1, sizeof(Operator)); opJoin->info.join.left = left; opJoin->info.join.right = right; - opJoin->type = OP_JOIN; opJoin->info.join.rightTupleCount = 0; opJoin->info.join.rightTupleIdx = 0; opJoin->info.join.rightTuplesCollected = false; opJoin->iteratorTupleOffset = -1; - opJoin->info.join.filterTupleOffset = -1; copyResultDescription(opJoin->info.join.left, opJoin, 0); @@ -95,5 +115,9 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { Operator* opFilter = makeJoinFilterOps(ON, opJoin, left->resultDescription, right->resultDescription); opJoin->info.join.filter = opFilter; + + opJoin->type = deduceJoinType(opFilter); + + return opJoin; } \ No newline at end of file diff --git a/src/planner/operators/scanTDB.c b/src/planner/operators/scanTDB.c index d0005cf..9b569d4 100644 --- a/src/planner/operators/scanTDB.c +++ b/src/planner/operators/scanTDB.c @@ -38,7 +38,6 @@ Operator* makeScanTDBOp(Node* node) { op->info.scan.fileRead = false; op->info.scan.recordsInBuffer = 0; op->iteratorTupleOffset = -1; - op->info.scan.columnOffsets[0] = 0; @@ -55,8 +54,8 @@ Operator* makeScanTDBOp(Node* node) { op->resultDescription.columnCount = tbldef.colCount; op->resultDescription.size = op->info.scan.recordSize; - op->info.scan.bufferSize = op->info.scan.recordSize * TDBSCANBUFFRECORDS; - op->info.scan.buffer = malloc(op->info.scan.bufferSize); + op->info.scan.bufferSize = op->info.scan.recordSize * TDBSCANBUFFRECORDS; + op->info.scan.buffer = malloc(op->info.scan.bufferSize); if (op->info.scan.buffer == NULL) { printf("Failed to allocate memory for scanTDB\n"); diff --git a/src/planner/planner.c b/src/planner/planner.c index b595a0d..1bc1df3 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -11,7 +11,7 @@ void freeQueryplan(Operator *node) { } - if (node->type == OP_JOIN) { + if (node->type == OP_JOIN || node->type == OP_HASHJOIN) { freeQueryplan(node->info.join.left); freeQueryplan(node->info.join.right); freeQueryplan(node->info.join.filter); diff --git a/src/squel.c b/src/squel.c index 83571dd..945ad14 100644 --- a/src/squel.c +++ b/src/squel.c @@ -2,12 +2,14 @@ #include "./include/parser/parser.h" #include "./include/planner/planner.h" #include "./include/io/tdb.h" -#include "./include/bufferpool/bufferpool.h" +#include "./include/const.h" +#include "./include/squel.h" #define METADATABUFFSIZE 10 - +// Globals :/ ResultSet* resultDescToPrint = NULL; +Options* OPTIONS; void printTree(Node *node) { @@ -28,7 +30,29 @@ void printTree(Node *node) { } } -void printTuple(int offset) { +void valueToChar(char* target, Tuple* tpl, size_t colOffset, Datatype type) { + if (type == DTYPE_STR) { + strcpy(target, tpl->data + colOffset); + return; + } + if (type == DTYPE_INT) { + char tmp[CHARMAXSIZE]; + sprintf(tmp, "%d", *(int*) (tpl->data + colOffset)); + memcpy(target, tmp, strlen(tmp)); + return; + } + if (type == DTYPE_LONG) { + char tmp[CHARMAXSIZE]; + sprintf(tmp, "%ld", *(long*) (tpl->data + colOffset)); + memcpy(target, tmp, strlen(tmp)); + return; + } + printf("Don't know how to represent type %d as char\n", type); + exit(1); +} + + +void printTuple(Tuple* tpl) { if (resultDescToPrint == NULL) { printf("No result set to print?\n"); @@ -39,7 +63,7 @@ void printTuple(int offset) { for (size_t i = 0; i < resultDescToPrint->columnCount; i++) { memset(buff, 0, CHARMAXSIZE); - getColAsChar(buff, offset ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type); + valueToChar(buff, tpl ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type); if (i == 0) printf("%s",buff); else printf(";%s",buff); @@ -50,6 +74,22 @@ void printTuple(int offset) { } +Options* initOptions() { + OPTIONS = malloc(sizeof(Options)); + OPTIONS->htsize = HTSIZE; + return OPTIONS; +} + +size_t getOption(Option opt) { + switch(opt) { + case OPT_HTSIZE: + return OPTIONS->htsize; + } + + printf("getOption: Tried to retrieve an unknown option\n"); + exit(1); +} + int main(int argc, char* argv[]) { if (argc == 1) { @@ -57,16 +97,42 @@ int main(int argc, char* argv[]) { exit(1); } - if (strlen(argv[1]) >= MAXQUERYSIZE) { + Options* opts = initOptions(); + + size_t query_arg = 1; + + // Loop through the arguments + for (int i = 1; i < argc; i++) { + + if (strcmp(argv[i], "--help") == 0) { + printf("Help: See README.md.\n"); + return 0; + } + else if (strcmp(argv[i], "--htsize") == 0) { + i++; + char* endptr; + size_t htsize = strtoull(argv[i], &endptr, 10); + + if (endptr == argv[i]) { + printf("--htsize expects an integer\n"); + exit(1); + } + + opts->htsize = htsize; + + query_arg += 2; + } + } + + + if (strlen(argv[query_arg]) >= MAXQUERYSIZE) { printf("Error: Query length exceeds maximum.\n"); exit(1); } /* Allocate memory for parse tree and parse the raw query */ Node* parsetree = createParsetree(); - parse(argv[1], parsetree); - - // printTree(parsetree); + parse(argv[query_arg], parsetree); // It's either a SELECT or a STMT Operator* queryplan = NULL; @@ -89,4 +155,6 @@ int main(int argc, char* argv[]) { freeQueryplan(queryplan); } + free(opts); + } \ No newline at end of file diff --git a/src/util/hashmap.c b/src/util/hashmap.c new file mode 100644 index 0000000..6acbd02 --- /dev/null +++ b/src/util/hashmap.c @@ -0,0 +1,126 @@ +#include "../include/util/hashmap.h" + +Hashmap* initHashmap(size_t table_size) { + Hashmap* map = malloc(sizeof(Hashmap)); + map->data = calloc(table_size, sizeof(MapNode)); + if (map->data == NULL) { + printf("Error: unable to reserve %ld bytes\n", (sizeof(MapNode) * table_size) / 1024); + exit(1); + } + map->table_size = table_size; + return map; +} + +void insertToHashmap(Hashmap* map, const char* key, size_t value) { + unsigned int idx = hash(key, map->table_size); + + MapNode* node = &map->data[idx]; + _tryInsert(map, key, value, node); +} + + + +void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t value, MapNode* node) { + + if (strlen(key) == 0) return; // Where do these come from? + + if (node->obs == 0) { + + memcpy(node->key, key, strlen(key)); + + } else { + + if (strcmp(key, node->key) != 0) { + if (!node->next) { + node->next = calloc(1, sizeof(MapNode)); + } + + _tryInsert(map, key, value, node->next); + return; + } + } + node->values[node->obs] = value; + + if (node->obs >= 10000) { + // printf("OUT OF BOUNDS\n"); // TODOs + return; + } + node->obs++; +} + +size_t _isInHashmap(Hashmap* map, MapNode* node, const char* key) { + if (strcmp(key, node->key) != 0) { + if (!node->next) { + return 0; + } + return _isInHashmap(map, node->next, key); + } + if (node->cursor == node->obs) return 0; + return node->obs > 0 ? 1 : 0; +} + + +size_t isInHashmap(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + MapNode* node = &map->data[idx]; + return _isInHashmap(map, node, key); +} + + +void resetCursor(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + map->data[idx].cursor = 0; +} + +size_t getValueFromHashmap(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + MapNode* node = &map->data[idx]; + return _getValueFromHashmap(map, node, key); +} + +size_t _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key) { + + if (strcmp(key, node->key) != 0) { + if (!node->next) { + return 0; + } + + return _getValueFromHashmap(map, node->next, key); + } + + if (node->cursor == node->obs) return -1; + + return node->values[node->cursor++]; + +} + +void freeHashMapNode(MapNode* node) { + if (!node) return; + + if (node->next) { + freeHashMapNode(node->next); + } + free(node); +} + +void freeHashmap(Hashmap* map) { + MapNode* node; + for (size_t i = 0; i < map->table_size; i++) { + node = &map->data[i]; + + if (node == 0) break; + + freeHashMapNode(node->next); // Only adjacents need to be freed + + } + free(map->data); + free(map); +} + +unsigned int hash(const char *key, size_t table_size) { + unsigned long int hashval = 0; + while (*key) { + hashval = (hashval << 5) + *key++; + } + return hashval % table_size; +} \ No newline at end of file diff --git a/test/bats-core b/test/bats-core index de96df0..261b029 160000 --- a/test/bats-core +++ b/test/bats-core @@ -1 +1 @@ -Subproject commit de96df03197ecc51635463fd9e35e26638191a90 +Subproject commit 261b029f3b3957a154f3e69abcbf19fe3e265c0a diff --git a/test/data/animals.csv b/test/data/animals.csv new file mode 100644 index 0000000..cff6b78 --- /dev/null +++ b/test/data/animals.csv @@ -0,0 +1,5 @@ +animal;size +monkey;small +cat;small +whale;very big +horse;medium \ No newline at end of file diff --git a/test/data/fruits.csv b/test/data/fruits.csv new file mode 100644 index 0000000..98b24cf --- /dev/null +++ b/test/data/fruits.csv @@ -0,0 +1,5 @@ +fruit;size +grape;small +strawberry;small +watermelon;very big +orange;medium \ No newline at end of file diff --git a/test/hashmap_test.c b/test/hashmap_test.c new file mode 100644 index 0000000..ec2df1f --- /dev/null +++ b/test/hashmap_test.c @@ -0,0 +1,28 @@ +#include "../src/util/hashmap.c" +#include + +int main() { + + Hashmap* map = initHashmap(1000); + + insertToHashmap(map, "12345", 1442); + insertToHashmap(map, "12346", 2); + insertToHashmap(map, "12X46", 3); + + if (isInHashmap(map, "12345")) { + printf("12345 in map with value %ld\n", getValueFromHashmap(map, "12345")); + } + + if (isInHashmap(map, "12X46")) { + printf("12X46 in map\n"); + } + + if (isInHashmap(map, "123fASFA")) { + printf("12X46 in map\n"); + } + + + freeHashmap(map); + + return 0; +} \ No newline at end of file diff --git a/test/test-explain.bats b/test/test-explain.bats new file mode 100644 index 0000000..3b84ca6 --- /dev/null +++ b/test/test-explain.bats @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +setup() { + load './test_helper/bats-support/load' + load './test_helper/bats-assert/load' + run make +} + +@test "EXPLAIN - subquery \w WHERE" { + run ./build/squel "EXPLAIN SELECT col3 FROM (SELECT col3,col1 FROM './test/data/small.csv') WHERE col3>100" + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_FILTER\nOP_PROJECT\nOP_SCAN\n**************************") + assert_output "$expected_output" +} + +@test "EXPLAIN - hash join" { + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3=int" + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n") + assert_output "$expected_output" + +} + +@test "EXPLAIN - join with nested loop join" { + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3>int" + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n") + assert_output "$expected_output" +} + + diff --git a/test/test_hashmap.bats b/test/test_hashmap.bats new file mode 100644 index 0000000..fd5eb73 --- /dev/null +++ b/test/test_hashmap.bats @@ -0,0 +1,14 @@ + +#!/usr/bin/env bats + +setup_file() { + run rm ./build/hashmap_test.o + run gcc ./test/hashmap_test.c -o ./build/hashmap_test.o +} + +@test "Hashmap functionality" { + run ./build/hashmap_test.o + [[ $"${lines[0]}" == "12345 in map with value 1442" ]] + [[ $"${lines[1]}" == "12X46 in map" ]] +} + diff --git a/test/test_helper/bats-assert b/test/test_helper/bats-assert new file mode 160000 index 0000000..0ec504e --- /dev/null +++ b/test/test_helper/bats-assert @@ -0,0 +1 @@ +Subproject commit 0ec504eb523fd87af924ad77e1221ee4fb8c1596 diff --git a/test/test_helper/bats-support b/test/test_helper/bats-support new file mode 160000 index 0000000..9bf10e8 --- /dev/null +++ b/test/test_helper/bats-support @@ -0,0 +1 @@ +Subproject commit 9bf10e876dd6b624fe44423f0b35e064225f7556 diff --git a/test/test_join_duplicate.bats b/test/test_join_duplicate.bats new file mode 100644 index 0000000..7bba3cc --- /dev/null +++ b/test/test_join_duplicate.bats @@ -0,0 +1,29 @@ +#!/usr/bin/env bats + +setup_file() { + run make +} + +@test "Join animals to fruits duplicating rows" { + run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/animals.csv' AS a JOIN './test/data/fruits.csv' AS f ON a.size=f.size" + [[ $"${lines[0]}" == "size;animal;fruit" ]] + [[ $"${lines[1]}" == "small;monkey;grape" ]] + [[ $"${lines[2]}" == "small;monkey;strawberry" ]] + [[ $"${lines[3]}" == "small;cat;grape" ]] + [[ $"${lines[4]}" == "small;cat;strawberry" ]] + [[ $"${lines[5]}" == "very big;whale;watermelon" ]] + [[ $"${lines[6]}" == "medium;horse;orange" ]] + +} + +@test "Join fruits to animals duplicating rows" { + run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/fruits.csv' AS f JOIN './test/data/animals.csv' AS a ON a.size=f.size" + [[ $"${lines[0]}" == "size;animal;fruit" ]] + [[ $"${lines[1]}" == "small;monkey;grape" ]] + [[ $"${lines[2]}" == "small;cat;grape" ]] + [[ $"${lines[3]}" == "small;monkey;strawberry" ]] + [[ $"${lines[4]}" == "small;cat;strawberry" ]] + [[ $"${lines[5]}" == "very big;whale;watermelon" ]] + [[ $"${lines[6]}" == "medium;horse;orange" ]] + +} diff --git a/test/test_simple_join.bats b/test/test_simple_join.bats index 57bc7d3..b5dc044 100644 --- a/test/test_simple_join.bats +++ b/test/test_simple_join.bats @@ -30,6 +30,11 @@ setup_file() { [[ $"${lines[5]}" == "" ]] } - +@test "Hashjoin with small hashtable" { + run ./build/squel --htsize 10 "SELECT COUNT(u.unemployed) FROM './test/data/lt_unemployed.csv' AS lt JOIN './test/data/unemployed.csv' AS u ON u.time=lt.time" + [[ $"${lines[0]}" == "unemployed" ]] + [[ $"${lines[1]}" == "213" ]] + [[ $"${lines[2]}" == "" ]] +} diff --git a/testi.csv b/testi.csv new file mode 100644 index 0000000..c4c625f --- /dev/null +++ b/testi.csv @@ -0,0 +1,215 @@ +unemployed +8471 +8361 +8119 +7931 +7765 +7808 +7828 +7544 +7130 +6984 +6774 +6683 +6525 +6228 +6018 +5840 +5719 +5649 +5659 +5323 +5027 +4817 +4629 +4572 +4478 +4308 +4197 +4049 +3921 +3958 +3996 +3744 +3660 +3497 +3392 +3459 +3373 +3308 +3243 +3249 +3233 +3383 +3410 +3419 +3493 +3583 +3779 +4045 +4295 +4433 +4607 +4714 +4900 +5159 +5391 +5358 +5351 +5364 +5389 +5467 +5527 +5403 +5364 +5315 +5259 +5351 +5494 +5420 +5341 +5182 +5119 +5194 +5281 +5418 +5420 +5555 +5737 +5748 +5899 +5818 +5668 +5850 +5921 +6137 +6242 +6283 +6315 +6491 +6636 +7013 +7373 +7508 +7600 +7858 +8086 +8573 +9041 +9381 +9644 +9919 +10181 +10732 +11069 +11106 +11453 +11663 +11947 +12598 +13118 +13409 +13800 +14243 +14675 +15583 +16116 +16222 +16356 +15726 +15790 +16350 +16883 +17204 +17503 +17620 +18374 +18778 +18944 +19141 +18852 +18290 +17914 +17893 +17934 +17337 +16466 +16050 +15858 +15983 +15973 +15576 +14888 +13789 +13055 +12698 +12440 +12124 +11758 +11545 +11309 +11372 +11364 +10980 +10714 +10673 +10556 +10671 +10639 +10442 +10328 +10265 +10297 +10572 +10777 +10618 +10565 +10363 +10331 +10546 +10619 +10636 +10843 +11197 +11587 +12439 +12853 +13033 +13535 +13857 +14439 +15406 +15980 +16730 +18785 +20460 +20858 +21261 +21570 +21226 +20678 +20262 +19745 +19655 +19255 +18849 +18038 +17137 +16875 +16871 +16850 +16308 +15919 +15575 +15325 +15450 +15451 +15413 +15454 +15613 +16583 +16814 +17118 +16871 +16678 +2234883 diff --git a/tmp.csv b/tmp.csv new file mode 100644 index 0000000..788d0f2 --- /dev/null +++ b/tmp.csv @@ -0,0 +1,3 @@ +long_term_unemployed;time +8413;2006-01-01 +8303;2006-02-01