From 41ff0f24c19767137deee39b2390d4410772c1b7 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Tue, 4 Mar 2025 23:54:13 +0200 Subject: [PATCH 01/29] operators return a pointer to tuple --- src/executor/executor.c | 10 +++---- src/executor/statements/insert.c | 4 +-- src/executor/tuple.c | 8 +++++ src/include/executor/executor.h | 2 +- src/include/executor/tuple.h | 11 +++++++ src/include/operators/aggregate.h | 4 ++- src/include/operators/filter.h | 3 +- src/include/operators/join.h | 3 +- src/include/operators/project.h | 3 +- src/include/operators/scan.h | 3 +- src/include/operators/scanTDB.h | 3 +- src/include/planner/planner.h | 3 +- src/operators/aggregate.c | 50 +++++++++++++++++-------------- src/operators/filter.c | 15 +++++----- src/operators/join.c | 8 +++-- src/operators/project.c | 10 ++----- src/operators/scan.c | 10 ++++--- src/operators/scanTDB.c | 11 +++++-- src/squel.c | 28 +++++++++++++++-- 19 files changed, 125 insertions(+), 64 deletions(-) create mode 100644 src/executor/tuple.c create mode 100644 src/include/executor/tuple.h diff --git a/src/executor/executor.c b/src/executor/executor.c index 726676e..a4ed7ca 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -57,7 +57,7 @@ void doAssignGetTupleFunction(Operator* p_op) { } -void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset)) { +void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) { if (op == NULL) { return; @@ -86,12 +86,12 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffs } // Get tuples one by one - int offset; + Tuple* tpl; for (;;) { - offset = op->getTuple(op); - if (offset == -1) break; + tpl = op->getTuple(op); + if (tpl == NULL) break; - tupleHandler(offset); + tupleHandler(tpl); }; free(buffpool->pool); diff --git a/src/executor/statements/insert.c b/src/executor/statements/insert.c index c443c70..277725f 100644 --- a/src/executor/statements/insert.c +++ b/src/executor/statements/insert.c @@ -5,14 +5,14 @@ size_t tupleSize = 0; FILE* f = NULL; -void handleTupleInsert(int offset) { +void handleTupleInsert(Tuple* tpl) { if (f == NULL) { printf("No file to insert to\n"); exit(1); } - size_t bytesWritten = fwrite(getTuple(offset), tupleSize, 1, f); + size_t bytesWritten = fwrite(tpl->data, tupleSize, 1, f); assert(bytesWritten > 0); } diff --git a/src/executor/tuple.c b/src/executor/tuple.c new file mode 100644 index 0000000..398931c --- /dev/null +++ b/src/executor/tuple.c @@ -0,0 +1,8 @@ +#include "../include/executor/tuple.h" + + +Tuple* initTuple(size_t size) { + Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation + tpl->size = size; + return tpl; +} \ No newline at end of file diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 6cb1b0c..a0abaee 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -19,7 +19,7 @@ extern char *bufferscan; extern Bufferpool* buffpool; -void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset)); +void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)); void executeStatement(Node* node); void executeCreateTable(Node* node); void executeInsert(Node* node); \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h new file mode 100644 index 0000000..6170267 --- /dev/null +++ b/src/include/executor/tuple.h @@ -0,0 +1,11 @@ +#pragma once +#include +#include + +typedef struct { + void* data; + size_t size; +} Tuple; + + +Tuple* initTuple(size_t size); \ No newline at end of file diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h index 82060cc..fd8f2db 100644 --- a/src/include/operators/aggregate.h +++ b/src/include/operators/aggregate.h @@ -3,5 +3,7 @@ #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" -int aggregateGetTuple(Operator* op); \ No newline at end of file + +Tuple* aggregateGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h index 17c5eb1..c969599 100644 --- a/src/include/operators/filter.h +++ b/src/include/operators/filter.h @@ -2,6 +2,7 @@ #include #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" +#include "../executor/tuple.h" -int filterGetTuple(Operator* op); +Tuple* filterGetTuple(Operator* op); bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op); \ No newline at end of file diff --git a/src/include/operators/join.h b/src/include/operators/join.h index d6e7983..25ffebe 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -2,5 +2,6 @@ #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" -int joinGetTuple(Operator* op); \ No newline at end of file +Tuple* joinGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/operators/project.h b/src/include/operators/project.h index ad551fb..6a228d9 100644 --- a/src/include/operators/project.h +++ b/src/include/operators/project.h @@ -1,5 +1,6 @@ #pragma once #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" +#include "../executor/tuple.h" -int projectGetTuple(Operator* op); \ No newline at end of file +Tuple* projectGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h index 1e223c0..f43aa63 100644 --- a/src/include/operators/scan.h +++ b/src/include/operators/scan.h @@ -2,5 +2,6 @@ #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" -int scanGetTuple(Operator* op); \ No newline at end of file +Tuple* scanGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h index 692df5f..8b97cdb 100644 --- a/src/include/operators/scanTDB.h +++ b/src/include/operators/scanTDB.h @@ -2,6 +2,7 @@ #include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" +#include "../executor/tuple.h" #include -int scanTDBGetTuple(Operator* op); \ No newline at end of file +Tuple* scanTDBGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index d99a45f..620eb25 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -6,6 +6,7 @@ #include "../io/tdb.h" #include "../parser/utils.h" #include "../parser/parsetree.h" +#include "../executor/tuple.h" typedef enum { @@ -127,7 +128,7 @@ typedef struct Operator { ResultSet resultDescription; int iteratorTupleOffset; struct Operator* child; - int (*getTuple) (struct Operator* op); + Tuple* (*getTuple) (struct Operator* op); } Operator; void freeQueryplan(Operator *node); diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index dcacb28..13deb22 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -1,10 +1,10 @@ #include "../include/operators/aggregate.h" long doCount(Operator* opToIterate) { - int offset = opToIterate->getTuple(opToIterate); + Tuple* tpl = opToIterate->getTuple(opToIterate); int result = 0; - while (offset >= 0) { - offset = opToIterate->getTuple(opToIterate); + while (tpl != NULL) { + tpl = opToIterate->getTuple(opToIterate); result++; }; @@ -14,16 +14,16 @@ long doCount(Operator* opToIterate) { long doAverage(Operator* opToIterate, size_t colOffset) { - int offset = 0; + Tuple* tpl; long sum = 0; long count = 0; for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { + tpl = opToIterate->getTuple(opToIterate); + if (tpl == NULL) { break; } - sum += *(long*) getCol(offset,colOffset); + sum += *(long*) tpl->data + colOffset; count++; }; long result = 0.0; @@ -36,15 +36,15 @@ long doAverage(Operator* opToIterate, size_t colOffset) { long doSum(Operator* opToIterate, size_t colOffset) { - int offset = 0; + Tuple* tpl ; long long result = 0; for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { + tpl = opToIterate->getTuple(opToIterate); + if (tpl == NULL) { break; } - result += *(long*) getCol(offset,colOffset); + result += *(long*) tpl->data + colOffset; }; @@ -54,15 +54,15 @@ long doSum(Operator* opToIterate, size_t colOffset) { long doMax(Operator* opToIterate, size_t colOffset) { - int offset = 0; + Tuple* tpl; long result = 0, tmp = 0; for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { + tpl = opToIterate->getTuple(opToIterate); + if (tpl == NULL) { break; } - tmp = *(long*) getCol(offset,colOffset); + tmp = *(long*) tpl->data + colOffset; result = tmp > result ? tmp : result; }; @@ -73,15 +73,15 @@ long doMax(Operator* opToIterate, size_t colOffset) { long doMin(Operator* opToIterate, size_t colOffset) { - int offset = 0; + Tuple* tpl; long result = __LONG_MAX__, tmp = 0; for (;;) { - offset = opToIterate->getTuple(opToIterate); - if (offset == -1) { + tpl = opToIterate->getTuple(opToIterate); + if (tpl == NULL) { break; } - tmp = *(long*) getCol(offset,colOffset); + tmp = *(long*) tpl->data + colOffset; result = tmp < result ? tmp : result; }; @@ -91,13 +91,13 @@ long doMin(Operator* opToIterate, size_t colOffset) { -int aggregateGetTuple(Operator* op) { +Tuple* aggregateGetTuple(Operator* op) { checkPtrNotNull(op->child, "OP_AGGREGATE has no child."); checkPtrNotNull(op->child->getTuple, "Child of OP_AGGREGATE has no getTuple-method."); if (op->info.aggregate.aggregationDone) { - return -1; + return NULL; } // TODO: @@ -137,5 +137,11 @@ int aggregateGetTuple(Operator* op) { op->info.aggregate.aggregationDone = true; - return addToBufferPool(&result, sizeof(result)); + Tuple* tpl = initTuple(sizeof(result)); + + long* res_ptr = malloc(sizeof(result)); + + *res_ptr = result; + + return tpl; } diff --git a/src/operators/filter.c b/src/operators/filter.c index 475b1ab..34ccc38 100644 --- a/src/operators/filter.c +++ b/src/operators/filter.c @@ -176,7 +176,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* return rtrnValue; } -int filterGetTuple(Operator* op) { +Tuple* filterGetTuple(Operator* op) { if (op == NULL) { printf("FILTER_OP: Passed a NULL-pointer to filterGetTuple\n"); @@ -198,20 +198,21 @@ int filterGetTuple(Operator* op) { } - int poolOffset = 0; + Tuple* tpl = 0; while (true) { /* Get new tuples until found something that passes the filter */ - poolOffset = op->child->getTuple(op->child); + tpl = op->child->getTuple(op->child); - if (poolOffset == -1) { - return -1; + if (tpl == NULL) { + break; } - if (evaluateTuplesAgainstFilterOps(poolOffset, poolOffset, op)) break; + if (evaluateTuplesAgainstFilterOps(0, 0, op)) break; } - return poolOffset; + + return tpl; } diff --git a/src/operators/join.c b/src/operators/join.c index 69b5076..7912cb6 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -19,7 +19,7 @@ void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* lef } -int joinGetTuple(Operator* op) { +Tuple* joinGetTuple(Operator* op) { if ( op->info.join.left == NULL || @@ -42,8 +42,9 @@ int joinGetTuple(Operator* op) { */ - - + return NULL; + + /* int rightTupleOffset = 0, originalOffset; // Reuse this and only create a new tuple if it passes the filter int offset = 0; @@ -115,5 +116,6 @@ int joinGetTuple(Operator* op) { return op->iteratorTupleOffset; } } while(true); + */ } diff --git a/src/operators/project.c b/src/operators/project.c index 009f6ff..858c8ab 100644 --- a/src/operators/project.c +++ b/src/operators/project.c @@ -1,6 +1,6 @@ #include "../include/operators/project.h" -int projectGetTuple(Operator* op) { +Tuple* projectGetTuple(Operator* op) { checkPtrNotNull(op->child, "OP_PROJECT has no child"); checkPtrNotNull(op->child->getTuple, "Child of OP_PROJECT has no getTuple-method"); @@ -13,11 +13,5 @@ int projectGetTuple(Operator* op) { This is an unfortunate extra function call :( */ - int pooloffset = op->child->getTuple(op->child); - - if (pooloffset == -1) { - return -1; - } - - return pooloffset; + return op->child->getTuple(op->child); } \ No newline at end of file diff --git a/src/operators/scan.c b/src/operators/scan.c index 40082d9..09cd80b 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -1,6 +1,6 @@ #include "../include/operators/scan.h" -int scanGetTuple(Operator* op) { +Tuple* scanGetTuple(Operator* op) { checkPtrNotNull(op, "NULL pointer passed to scanGetTuple"); @@ -24,7 +24,7 @@ int scanGetTuple(Operator* op) { if (line == NULL) { free(lineBuffer); fclose(op->info.scan.tablefile); - return -1; + return NULL; } @@ -147,8 +147,10 @@ int scanGetTuple(Operator* op) { op->resultDescription.size = tplSize; free(lineBuffer); - free(diskBuffer); - return op->iteratorTupleOffset; + Tuple* tpl = initTuple(tplSize); + tpl->data = diskBuffer; + + return tpl; } diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c index 88dfd0b..3de5574 100644 --- a/src/operators/scanTDB.c +++ b/src/operators/scanTDB.c @@ -29,13 +29,13 @@ void fillBuffer(Operator* op) { } -int scanTDBGetTuple(Operator* op) { +Tuple* scanTDBGetTuple(Operator* op) { checkPtrNotNull(op, "NULL pointer passed to scanTDBGetTuple"); if (op->info.scan.fileRead && op->info.scan.recordsInBuffer == 0) { free(op->info.scan.buffer); - return -1; + return NULL; } if (op->info.scan.recordsInBuffer == 0) { @@ -52,5 +52,10 @@ int scanTDBGetTuple(Operator* op) { } else { copyToBufferPool(op->iteratorTupleOffset, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); } - return op->iteratorTupleOffset; + + + Tuple* tpl = initTuple(op->info.scan.recordSize); + tpl->data = op->info.scan.buffer + bufferDataOffset; + + return tpl; } \ No newline at end of file diff --git a/src/squel.c b/src/squel.c index 83571dd..8469934 100644 --- a/src/squel.c +++ b/src/squel.c @@ -28,7 +28,31 @@ void printTree(Node *node) { } } -void printTuple(int offset) { +void valueToChar(char* target, Tuple* tpl, size_t colOffset, Datatype type) { + if (type == DTYPE_STR) { + strcpy(target, tpl->data + colOffset); + return; + } + if (type == DTYPE_INT) { + char tmp[CHARMAXSIZE]; + sprintf(tmp, "%d", *(int*) (tpl->data + colOffset)); + memcpy(target, tmp, strlen(tmp)); + return; + } + if (type == DTYPE_LONG) { + char tmp[CHARMAXSIZE]; + sprintf(tmp, "%ld", *(long*) (tpl->data + colOffset)); + memcpy(target, tmp, strlen(tmp)); + return; + } + printf("Don't know how to represent type %d as char\n", type); + exit(1); +} + + + + +void printTuple(Tuple* tpl) { if (resultDescToPrint == NULL) { printf("No result set to print?\n"); @@ -39,7 +63,7 @@ void printTuple(int offset) { for (size_t i = 0; i < resultDescToPrint->columnCount; i++) { memset(buff, 0, CHARMAXSIZE); - getColAsChar(buff, offset ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type); + valueToChar(buff, tpl ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type); if (i == 0) printf("%s",buff); else printf(";%s",buff); From 07407e82745d5e89332b0ef8841e077761a7a730 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Wed, 5 Mar 2025 00:02:47 +0200 Subject: [PATCH 02/29] filter uses Tuple --- src/executor/tuple.c | 4 ++++ src/include/executor/tuple.h | 4 +++- src/include/operators/filter.h | 2 +- src/operators/filter.c | 32 ++++++++++++++++---------------- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/executor/tuple.c b/src/executor/tuple.c index 398931c..ed2143f 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -5,4 +5,8 @@ Tuple* initTuple(size_t size) { Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation tpl->size = size; return tpl; +} + +void* getTupleCol(Tuple* tpl, size_t colOffset) { + return tpl->data + colOffset; } \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h index 6170267..6e3978f 100644 --- a/src/include/executor/tuple.h +++ b/src/include/executor/tuple.h @@ -8,4 +8,6 @@ typedef struct { } Tuple; -Tuple* initTuple(size_t size); \ No newline at end of file +Tuple* initTuple(size_t size); + +void* getTupleCol(Tuple* tpl, size_t colOffset); \ No newline at end of file diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h index c969599..f4ff59e 100644 --- a/src/include/operators/filter.h +++ b/src/include/operators/filter.h @@ -5,4 +5,4 @@ #include "../executor/tuple.h" Tuple* filterGetTuple(Operator* op); -bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op); \ No newline at end of file +bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op); \ No newline at end of file diff --git a/src/operators/filter.c b/src/operators/filter.c index 34ccc38..13d9d54 100644 --- a/src/operators/filter.c +++ b/src/operators/filter.c @@ -1,13 +1,13 @@ #include "../include/operators/filter.h" -bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op) { +bool evaluateTupleAgainstFilterOp(Tuple* tpl1, Tuple* tpl2, Operator* op) { - if (poolOffset1 == -1) { + if (tpl1 == NULL) { return false; } - if (poolOffset2 == -1) { + if (tpl2 == NULL) { return false; } @@ -44,18 +44,18 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op switch (dtype1) { case DTYPE_STR: cmpRes = strcmp( - (char*) getCol(poolOffset1,idx1Offset), - (char*) getCol(poolOffset2,idx2Offset) + (char*) getTupleCol(tpl1,idx1Offset), + (char*) getTupleCol(tpl2,idx2Offset) ); break; case DTYPE_INT: - int number1 = *(int*) getCol(poolOffset1,idx1Offset); - int number2 = *(int*) getCol(poolOffset2,idx2Offset); + int number1 = *(int*) getTupleCol(tpl1,idx1Offset); + int number2 = *(int*) getTupleCol(tpl2,idx2Offset); cmpRes = number1 - number2; break; case DTYPE_LONG: - long lnumber1 = *(long*) getCol(poolOffset1,idx1Offset); - long lnumber2 = *(long*) getCol(poolOffset2,idx2Offset); + long lnumber1 = *(long*) getTupleCol(tpl1,idx1Offset); + long lnumber2 = *(long*) getTupleCol(tpl2,idx2Offset); cmpRes = lnumber1 - lnumber2; break; default: @@ -87,14 +87,14 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op Datatype constDatatype = dtype2; size_t colOffset = idx1Offset; size_t constIdx = 2; - int poolOffset = poolOffset1; + Tuple* tpl = tpl1; if (compType == CMP_CONST_COL) { // Guess was wrong, fix it constDatatype = dtype1; constIdx = 0; colOffset = idx2Offset; - poolOffset = poolOffset2; + tpl = tpl2; } // Now we have to only deal with 4 combinations of all the eight possible // 'cause datatypes must match @@ -105,10 +105,10 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op // DTYPE_INT vs. IDENT_COL + NUMBER switch (constDatatype) { case DTYPE_STR: - cmpRes = strcmp(op->info.filter.charConstants[constIdx], getCol(poolOffset,colOffset)); + cmpRes = strcmp(op->info.filter.charConstants[constIdx], getTupleCol(tpl,colOffset)); break; case DTYPE_LONG: - long colNumber = *(long*) getCol(poolOffset,colOffset); + long colNumber = *(long*) getTupleCol(tpl,colOffset); long constNumber = (long) op->info.filter.numConstants[constIdx]; // Order matters here if (constIdx == 0) { @@ -145,7 +145,7 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op return matches; } -bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op) { +bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op) { bool rtrnValue = true, result = true; @@ -155,7 +155,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* while (p_op != NULL) { - result = evaluateTupleAgainstFilterOp(poolOffset1, poolOffset2, p_op); + result = evaluateTupleAgainstFilterOp(tpl1, tpl2, p_op); switch (boolOp) { case AND: @@ -209,7 +209,7 @@ Tuple* filterGetTuple(Operator* op) { break; } - if (evaluateTuplesAgainstFilterOps(0, 0, op)) break; + if (evaluateTuplesAgainstFilterOps(tpl, tpl, op)) break; } From ec8bd3684e497c76cda12f5f6e409d9d7dcfd886 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Wed, 5 Mar 2025 20:33:47 +0200 Subject: [PATCH 03/29] free tuples after use --- src/executor/executor.c | 4 +++- src/executor/tuple.c | 7 +++++++ src/include/executor/tuple.h | 4 +++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/executor/executor.c b/src/executor/executor.c index a4ed7ca..2714c4a 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -1,5 +1,5 @@ #include "../include/executor/executor.h" - +#include "../include/executor/tuple.h" Bufferpool* buffpool; @@ -92,6 +92,8 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) if (tpl == NULL) break; tupleHandler(tpl); + + freeTuple(tpl); }; free(buffpool->pool); diff --git a/src/executor/tuple.c b/src/executor/tuple.c index ed2143f..99049fb 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -9,4 +9,11 @@ Tuple* initTuple(size_t size) { void* getTupleCol(Tuple* tpl, size_t colOffset) { return tpl->data + colOffset; +} + +void freeTuple(Tuple* tpl) { + if (tpl->data) { + free(tpl->data); + } + free(tpl); } \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h index 6e3978f..acb3320 100644 --- a/src/include/executor/tuple.h +++ b/src/include/executor/tuple.h @@ -10,4 +10,6 @@ typedef struct { Tuple* initTuple(size_t size); -void* getTupleCol(Tuple* tpl, size_t colOffset); \ No newline at end of file +void* getTupleCol(Tuple* tpl, size_t colOffset); + +void freeTuple(Tuple* tpl); \ No newline at end of file From d2ec05693f3d4ede0de753125f346e3c8a9cd6a7 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Wed, 5 Mar 2025 20:38:08 +0200 Subject: [PATCH 04/29] fix aggregate; clean bufferpool stuff --- src/operators/aggregate.c | 4 +--- src/operators/scan.c | 8 -------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index 13deb22..605f52f 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -136,12 +136,10 @@ Tuple* aggregateGetTuple(Operator* op) { op->resultDescription.pCols[0] = 0; op->info.aggregate.aggregationDone = true; - Tuple* tpl = initTuple(sizeof(result)); - long* res_ptr = malloc(sizeof(result)); - *res_ptr = result; + tpl->data = res_ptr; return tpl; } diff --git a/src/operators/scan.c b/src/operators/scan.c index 09cd80b..bc93cdd 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -117,14 +117,6 @@ Tuple* scanGetTuple(Operator* op) { i++; }; - // Write to bufferpool - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(diskBuffer, tplSize); - } else { - copyToBufferPool(op->iteratorTupleOffset, diskBuffer, tplSize); - } - - // // ---------------- Useful for debuggin. Leave it be for a while ------------------ // tpldata = diskBuffer; // printf("tpldata at: ", diskBuffer); From cf3cb6813b039f0868900ce77e2029b7db1b1d2b Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Wed, 5 Mar 2025 21:25:26 +0200 Subject: [PATCH 05/29] tuplebuffer added; some joins work now --- src/executor/tuple.c | 4 +- src/executor/tuplebuffer.c | 34 +++++++++++++ src/include/executor/tuple.h | 4 +- src/include/executor/tuplebuffer.h | 15 ++++++ src/include/operators/join.h | 2 + src/include/planner/planner.h | 9 ++-- src/operators/aggregate.c | 2 +- src/operators/join.c | 81 ++++++++++++------------------ src/operators/scan.c | 2 +- src/operators/scanTDB.c | 2 +- src/planner/operators/join.c | 1 - 11 files changed, 94 insertions(+), 62 deletions(-) create mode 100644 src/executor/tuplebuffer.c create mode 100644 src/include/executor/tuplebuffer.h diff --git a/src/executor/tuple.c b/src/executor/tuple.c index 99049fb..3436d47 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -1,9 +1,9 @@ #include "../include/executor/tuple.h" -Tuple* initTuple(size_t size) { +Tuple* initTuple() { Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation - tpl->size = size; + tpl->size = 0; return tpl; } diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c new file mode 100644 index 0000000..c7a0314 --- /dev/null +++ b/src/executor/tuplebuffer.c @@ -0,0 +1,34 @@ +#include "../include/executor/tuplebuffer.h" + + +TupleBuffer* initTupleBuffer(size_t p_capacity) { + TupleBuffer* buff = malloc(sizeof(TupleBuffer)); + buff->capacity = p_capacity; + buff->tuples = malloc(p_capacity * sizeof(Tuple*)); + buff->size = 0; + return buff; +} + + +void resizeTupleBuffer(TupleBuffer* buff) { + buff->capacity *= 2; + buff->tuples = realloc(buff->tuples, buff->capacity * sizeof(Tuple)); +} + +void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) { + if (buff->size == buff->capacity) { + resizeTupleBuffer(buff); + } + + buff->tuples[buff->size++] = tpl; +} + +void freeTupleBuffer(TupleBuffer* buff) { + free(buff->tuples); + free(buff); +} + + +Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) { + return buff->tuples[idx]; +} \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h index acb3320..2e32012 100644 --- a/src/include/executor/tuple.h +++ b/src/include/executor/tuple.h @@ -8,7 +8,9 @@ typedef struct { } Tuple; -Tuple* initTuple(size_t size); + + +Tuple* initTuple(); void* getTupleCol(Tuple* tpl, size_t colOffset); diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h new file mode 100644 index 0000000..f26839a --- /dev/null +++ b/src/include/executor/tuplebuffer.h @@ -0,0 +1,15 @@ +#pragma once +#include "tuple.h" + +typedef struct { + Tuple** tuples; + size_t size; + size_t capacity; +} TupleBuffer; + +TupleBuffer* initTupleBuffer(size_t p_capacity); +void resizeTupleBuffer(TupleBuffer* buff); +void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff); +void freeTupleBuffer(TupleBuffer* buff); +Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx); + diff --git a/src/include/operators/join.h b/src/include/operators/join.h index 25ffebe..7e2bf7e 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -3,5 +3,7 @@ #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" + Tuple* joinGetTuple(Operator* op); \ No newline at end of file diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index 620eb25..893608e 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -7,6 +7,7 @@ #include "../parser/utils.h" #include "../parser/parsetree.h" #include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" typedef enum { @@ -99,11 +100,9 @@ typedef struct { struct Operator* left; struct Operator* right; struct Operator* filter; - int lastTupleOffset; - int filterTupleOffset; - int rightTuples[JOINPTRBUFFER]; - int rightTupleIdx; - int rightTupleCount; + TupleBuffer* rightTuples; + size_t rightTupleIdx; + size_t rightTupleCount; bool rightTuplesCollected; } JoinInfo; diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index 605f52f..9122168 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -136,7 +136,7 @@ Tuple* aggregateGetTuple(Operator* op) { op->resultDescription.pCols[0] = 0; op->info.aggregate.aggregationDone = true; - Tuple* tpl = initTuple(sizeof(result)); + Tuple* tpl = initTuple(); long* res_ptr = malloc(sizeof(result)); *res_ptr = result; tpl->data = res_ptr; diff --git a/src/operators/join.c b/src/operators/join.c index 7912cb6..c16d942 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -1,7 +1,7 @@ #include "../include/operators/join.h" -void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* left, ResultSet* right) { +void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right) { if ( left == NULL || @@ -11,12 +11,10 @@ void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* lef exit(1); } - void* address = getTuple(tupleOffset); - - memset(address, 0, left->size + right->size); - memcpy(address, getTuple(leftOffset), left->size); - memcpy(address + left->size, getTuple(rightOffset), right->size); - + void* address = calloc(1, left->size + right->size); + memcpy(address, leftTpl->data, left->size); + memcpy(address + left->size, rightTpl->data, right->size); + returnTpl->data = address; } Tuple* joinGetTuple(Operator* op) { @@ -42,35 +40,23 @@ Tuple* joinGetTuple(Operator* op) { */ - return NULL; - /* - int rightTupleOffset = 0, originalOffset; - // Reuse this and only create a new tuple if it passes the filter - int offset = 0; - - // Reserve space from the buffer pool so that we can concatenate tuples - if (op->info.join.filterTupleOffset == -1) { - op->info.join.filterTupleOffset = getCurrentOffset(); - reserveSpaceBufferpool(op->info.join.filterTupleOffset, JOINTUPLESIZE); + if (!op->info.join.rightTuples) { + op->info.join.rightTuples = initTupleBuffer(100); // TODO NO MAGIC NUMBERS } - + Tuple* rightTuple; // This is only entered first time the operator is called while (!op->info.join.rightTuplesCollected) { - originalOffset = op->info.join.right->getTuple(op->info.join.right); + rightTuple = op->info.join.right->getTuple(op->info.join.right); - if (originalOffset == -1) { + if (rightTuple == NULL) { op->info.join.rightTuplesCollected = true; - op->info.join.lastTupleOffset = -1; - op->info.join.rightTupleIdx = 0; - continue; + continue; } - rightTupleOffset = addToBufferPoolFromOffset(originalOffset, op->info.join.right->resultDescription.size); - - op->info.join.rightTuples[op->info.join.rightTupleIdx++] = rightTupleOffset; + addTupleToBuffer(rightTuple, op->info.join.rightTuples); op->info.join.rightTupleCount++; if (op->info.join.rightTupleCount >= JOINPTRBUFFER) { @@ -79,43 +65,38 @@ Tuple* joinGetTuple(Operator* op) { } } - // Join loop + // Nested join loop + // For each tuple if left relation + // For each tuple in right relation + // if join_predicates(left,right) return tuple(left,right) + op->info.join.rightTupleIdx = 0; + Tuple* leftTuple = op->info.join.left->getTuple(op->info.join.left); do { if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) { - op->info.join.rightTupleIdx = 0; - op->info.join.lastTupleOffset = -1; - } - - if (op->info.join.lastTupleOffset == -1) { - offset = op->info.join.left->getTuple(op->info.join.left); - if (offset == -1) { - return -1; + op->info.join.rightTupleIdx = 0; + leftTuple = op->info.join.left->getTuple(op->info.join.left); + if (leftTuple == NULL) { + return NULL; } - op->info.join.lastTupleOffset = offset; } - rightTupleOffset = op->info.join.rightTuples[op->info.join.rightTupleIdx++]; + rightTuple = getTupleByIndex(op->info.join.rightTuples,op->info.join.rightTupleIdx++); - if (evaluateTuplesAgainstFilterOps(op->info.join.lastTupleOffset, rightTupleOffset, op->info.join.filter)) { + if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) { + + Tuple* newTuple = initTuple(); // Create a new tuple by concating the tuples concatTuples( - op->info.join.filterTupleOffset, - op->info.join.lastTupleOffset, - rightTupleOffset, + newTuple, + leftTuple, + rightTuple, &op->info.join.left->resultDescription, &op->info.join.right->resultDescription ); - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(getTuple(op->info.join.filterTupleOffset), op->resultDescription.size); - } else { - copyToBufferPool(op->iteratorTupleOffset, getTuple(op->info.join.filterTupleOffset), op->resultDescription.size); - - } - return op->iteratorTupleOffset; + return newTuple; } } while(true); - */ - } + diff --git a/src/operators/scan.c b/src/operators/scan.c index bc93cdd..abf5a97 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -140,7 +140,7 @@ Tuple* scanGetTuple(Operator* op) { free(lineBuffer); - Tuple* tpl = initTuple(tplSize); + Tuple* tpl = initTuple(); tpl->data = diskBuffer; return tpl; diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c index 3de5574..cd09fb0 100644 --- a/src/operators/scanTDB.c +++ b/src/operators/scanTDB.c @@ -54,7 +54,7 @@ Tuple* scanTDBGetTuple(Operator* op) { } - Tuple* tpl = initTuple(op->info.scan.recordSize); + Tuple* tpl = initTuple(); tpl->data = op->info.scan.buffer + bufferDataOffset; return tpl; diff --git a/src/planner/operators/join.c b/src/planner/operators/join.c index 52b7354..9e7c452 100644 --- a/src/planner/operators/join.c +++ b/src/planner/operators/join.c @@ -59,7 +59,6 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { opJoin->info.join.rightTupleIdx = 0; opJoin->info.join.rightTuplesCollected = false; opJoin->iteratorTupleOffset = -1; - opJoin->info.join.filterTupleOffset = -1; copyResultDescription(opJoin->info.join.left, opJoin, 0); From 386a228e761a93262530f9998282d909d42d6d63 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Wed, 5 Mar 2025 22:05:00 +0200 Subject: [PATCH 06/29] fix join memory leaks (and logic) --- src/executor/tuplebuffer.c | 1 + src/include/planner/planner.h | 1 + src/operators/join.c | 35 ++++++++++++++++++++--------------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c index c7a0314..3a70902 100644 --- a/src/executor/tuplebuffer.c +++ b/src/executor/tuplebuffer.c @@ -24,6 +24,7 @@ void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) { } void freeTupleBuffer(TupleBuffer* buff) { + for (size_t i = 0; i < buff->size; i++) freeTuple(buff->tuples[i]); free(buff->tuples); free(buff); } diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index 893608e..edbb075 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -101,6 +101,7 @@ typedef struct { struct Operator* right; struct Operator* filter; TupleBuffer* rightTuples; + Tuple* leftTuple; size_t rightTupleIdx; size_t rightTupleCount; bool rightTuplesCollected; diff --git a/src/operators/join.c b/src/operators/join.c index c16d942..eca8e0a 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -35,14 +35,11 @@ Tuple* joinGetTuple(Operator* op) { We store one of the tables in the join in memory. Which is why the tuples from the right table are copied - to the buffer pool. Their original location will be - rewritten by child operators iterating over tuples. - - + to a buffer. */ if (!op->info.join.rightTuples) { - op->info.join.rightTuples = initTupleBuffer(100); // TODO NO MAGIC NUMBERS + op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER); } Tuple* rightTuple; @@ -69,21 +66,25 @@ Tuple* joinGetTuple(Operator* op) { // For each tuple if left relation // For each tuple in right relation // if join_predicates(left,right) return tuple(left,right) - op->info.join.rightTupleIdx = 0; - Tuple* leftTuple = op->info.join.left->getTuple(op->info.join.left); + + if (op->info.join.leftTuple == NULL) { + op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left); + } + + Tuple* leftTuple = op->info.join.leftTuple; + do { if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) { - op->info.join.rightTupleIdx = 0; - leftTuple = op->info.join.left->getTuple(op->info.join.left); - if (leftTuple == NULL) { - return NULL; - } + op->info.join.rightTupleIdx = 0; + freeTuple(leftTuple); + op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left); + leftTuple = op->info.join.leftTuple; + continue; } - rightTuple = getTupleByIndex(op->info.join.rightTuples,op->info.join.rightTupleIdx++); + rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++); - if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) { Tuple* newTuple = initTuple(); @@ -97,6 +98,10 @@ Tuple* joinGetTuple(Operator* op) { ); return newTuple; } - } while(true); + } while(leftTuple != NULL); + + // Join complete, we can free the buffer and the tuples associated + freeTupleBuffer(op->info.join.rightTuples); + return NULL; } From c9356f727f6459a92bc92b0881cb4e0fd5bfe308 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Thu, 6 Mar 2025 21:50:45 +0200 Subject: [PATCH 07/29] all tests pass again --- src/executor/tuple.c | 9 ++ src/executor/tuplebuffer.c | 7 + src/include/executor/tuple.h | 2 + src/include/executor/tuplebuffer.h | 2 +- src/include/operators/scanTDB.h | 1 + src/operators/aggregate.c | 8 +- src/operators/scanTDB.c | 11 +- src/planner/operators/scanTDB.c | 5 +- testi.csv | 215 +++++++++++++++++++++++++++++ tmp.csv | 3 + 10 files changed, 246 insertions(+), 17 deletions(-) create mode 100644 testi.csv create mode 100644 tmp.csv diff --git a/src/executor/tuple.c b/src/executor/tuple.c index 3436d47..1667c2e 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -7,6 +7,15 @@ Tuple* initTuple() { return tpl; } + +Tuple* initTupleOfSize(size_t p_size) { + Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation + tpl->data = calloc(1, p_size); + tpl->size = p_size; + return tpl; +} + + void* getTupleCol(Tuple* tpl, size_t colOffset) { return tpl->data + colOffset; } diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c index 3a70902..c3985f6 100644 --- a/src/executor/tuplebuffer.c +++ b/src/executor/tuplebuffer.c @@ -32,4 +32,11 @@ void freeTupleBuffer(TupleBuffer* buff) { Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) { return buff->tuples[idx]; +} + +size_t isTupleBufferEmpty(TupleBuffer* buff) { + if (buff->size > 0) { + return 0; + } + return 1; } \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h index 2e32012..91c0dea 100644 --- a/src/include/executor/tuple.h +++ b/src/include/executor/tuple.h @@ -12,6 +12,8 @@ typedef struct { Tuple* initTuple(); +Tuple* initTupleOfSize(size_t p_size); + void* getTupleCol(Tuple* tpl, size_t colOffset); void freeTuple(Tuple* tpl); \ No newline at end of file diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h index f26839a..91c72ae 100644 --- a/src/include/executor/tuplebuffer.h +++ b/src/include/executor/tuplebuffer.h @@ -12,4 +12,4 @@ void resizeTupleBuffer(TupleBuffer* buff); void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff); void freeTupleBuffer(TupleBuffer* buff); Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx); - +size_t isTupleBufferEmpty(TupleBuffer* buff); diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h index 8b97cdb..f5da46a 100644 --- a/src/include/operators/scanTDB.h +++ b/src/include/operators/scanTDB.h @@ -3,6 +3,7 @@ #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" #include Tuple* scanTDBGetTuple(Operator* op); \ No newline at end of file diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index 9122168..f1089e5 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -23,7 +23,7 @@ long doAverage(Operator* opToIterate, size_t colOffset) { if (tpl == NULL) { break; } - sum += *(long*) tpl->data + colOffset; + sum += *(long*) (tpl->data + colOffset); count++; }; long result = 0.0; @@ -44,7 +44,7 @@ long doSum(Operator* opToIterate, size_t colOffset) { if (tpl == NULL) { break; } - result += *(long*) tpl->data + colOffset; + result += *(long*) (tpl->data + colOffset); }; @@ -62,7 +62,7 @@ long doMax(Operator* opToIterate, size_t colOffset) { if (tpl == NULL) { break; } - tmp = *(long*) tpl->data + colOffset; + tmp = *(long*) (tpl->data + colOffset); result = tmp > result ? tmp : result; }; @@ -81,7 +81,7 @@ long doMin(Operator* opToIterate, size_t colOffset) { if (tpl == NULL) { break; } - tmp = *(long*) tpl->data + colOffset; + tmp = *(long*) (tpl->data + colOffset); result = tmp < result ? tmp : result; }; diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c index cd09fb0..100e00a 100644 --- a/src/operators/scanTDB.c +++ b/src/operators/scanTDB.c @@ -46,16 +46,9 @@ Tuple* scanTDBGetTuple(Operator* op) { size_t bufferDataOffset = (op->info.scan.recordsInBuffer - 1) * op->info.scan.recordSize; op->info.scan.recordsInBuffer--; - // Write to bufferpool - if (op->iteratorTupleOffset == -1) { - op->iteratorTupleOffset = addToBufferPool(op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); - } else { - copyToBufferPool(op->iteratorTupleOffset, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); - } - - Tuple* tpl = initTuple(); - tpl->data = op->info.scan.buffer + bufferDataOffset; + Tuple* tpl = initTupleOfSize(op->info.scan.recordSize); + memcpy(tpl->data, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); return tpl; } \ No newline at end of file diff --git a/src/planner/operators/scanTDB.c b/src/planner/operators/scanTDB.c index d0005cf..9b569d4 100644 --- a/src/planner/operators/scanTDB.c +++ b/src/planner/operators/scanTDB.c @@ -38,7 +38,6 @@ Operator* makeScanTDBOp(Node* node) { op->info.scan.fileRead = false; op->info.scan.recordsInBuffer = 0; op->iteratorTupleOffset = -1; - op->info.scan.columnOffsets[0] = 0; @@ -55,8 +54,8 @@ Operator* makeScanTDBOp(Node* node) { op->resultDescription.columnCount = tbldef.colCount; op->resultDescription.size = op->info.scan.recordSize; - op->info.scan.bufferSize = op->info.scan.recordSize * TDBSCANBUFFRECORDS; - op->info.scan.buffer = malloc(op->info.scan.bufferSize); + op->info.scan.bufferSize = op->info.scan.recordSize * TDBSCANBUFFRECORDS; + op->info.scan.buffer = malloc(op->info.scan.bufferSize); if (op->info.scan.buffer == NULL) { printf("Failed to allocate memory for scanTDB\n"); diff --git a/testi.csv b/testi.csv new file mode 100644 index 0000000..c4c625f --- /dev/null +++ b/testi.csv @@ -0,0 +1,215 @@ +unemployed +8471 +8361 +8119 +7931 +7765 +7808 +7828 +7544 +7130 +6984 +6774 +6683 +6525 +6228 +6018 +5840 +5719 +5649 +5659 +5323 +5027 +4817 +4629 +4572 +4478 +4308 +4197 +4049 +3921 +3958 +3996 +3744 +3660 +3497 +3392 +3459 +3373 +3308 +3243 +3249 +3233 +3383 +3410 +3419 +3493 +3583 +3779 +4045 +4295 +4433 +4607 +4714 +4900 +5159 +5391 +5358 +5351 +5364 +5389 +5467 +5527 +5403 +5364 +5315 +5259 +5351 +5494 +5420 +5341 +5182 +5119 +5194 +5281 +5418 +5420 +5555 +5737 +5748 +5899 +5818 +5668 +5850 +5921 +6137 +6242 +6283 +6315 +6491 +6636 +7013 +7373 +7508 +7600 +7858 +8086 +8573 +9041 +9381 +9644 +9919 +10181 +10732 +11069 +11106 +11453 +11663 +11947 +12598 +13118 +13409 +13800 +14243 +14675 +15583 +16116 +16222 +16356 +15726 +15790 +16350 +16883 +17204 +17503 +17620 +18374 +18778 +18944 +19141 +18852 +18290 +17914 +17893 +17934 +17337 +16466 +16050 +15858 +15983 +15973 +15576 +14888 +13789 +13055 +12698 +12440 +12124 +11758 +11545 +11309 +11372 +11364 +10980 +10714 +10673 +10556 +10671 +10639 +10442 +10328 +10265 +10297 +10572 +10777 +10618 +10565 +10363 +10331 +10546 +10619 +10636 +10843 +11197 +11587 +12439 +12853 +13033 +13535 +13857 +14439 +15406 +15980 +16730 +18785 +20460 +20858 +21261 +21570 +21226 +20678 +20262 +19745 +19655 +19255 +18849 +18038 +17137 +16875 +16871 +16850 +16308 +15919 +15575 +15325 +15450 +15451 +15413 +15454 +15613 +16583 +16814 +17118 +16871 +16678 +2234883 diff --git a/tmp.csv b/tmp.csv new file mode 100644 index 0000000..788d0f2 --- /dev/null +++ b/tmp.csv @@ -0,0 +1,3 @@ +long_term_unemployed;time +8413;2006-01-01 +8303;2006-02-01 From b1fe932da5661b390909e8fc6537ce7c4e7c934c Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Thu, 6 Mar 2025 22:42:56 +0200 Subject: [PATCH 08/29] fix memleak on aggregates --- src/operators/aggregate.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index f1089e5..d81f0be 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -1,11 +1,12 @@ #include "../include/operators/aggregate.h" long doCount(Operator* opToIterate) { - Tuple* tpl = opToIterate->getTuple(opToIterate); int result = 0; - while (tpl != NULL) { - tpl = opToIterate->getTuple(opToIterate); + while (true) { + Tuple* tpl = opToIterate->getTuple(opToIterate); + if (tpl == NULL) break; result++; + freeTuple(tpl); }; return result; @@ -14,17 +15,17 @@ long doCount(Operator* opToIterate) { long doAverage(Operator* opToIterate, size_t colOffset) { - Tuple* tpl; long sum = 0; long count = 0; for (;;) { - tpl = opToIterate->getTuple(opToIterate); + Tuple* tpl = opToIterate->getTuple(opToIterate); if (tpl == NULL) { break; } sum += *(long*) (tpl->data + colOffset); count++; + freeTuple(tpl); }; long result = 0.0; if (count > 0) { @@ -36,16 +37,15 @@ long doAverage(Operator* opToIterate, size_t colOffset) { long doSum(Operator* opToIterate, size_t colOffset) { - Tuple* tpl ; long long result = 0; for (;;) { - tpl = opToIterate->getTuple(opToIterate); + Tuple* tpl = opToIterate->getTuple(opToIterate); if (tpl == NULL) { break; } result += *(long*) (tpl->data + colOffset); - + freeTuple(tpl); }; return result; @@ -54,16 +54,16 @@ long doSum(Operator* opToIterate, size_t colOffset) { long doMax(Operator* opToIterate, size_t colOffset) { - Tuple* tpl; long result = 0, tmp = 0; for (;;) { - tpl = opToIterate->getTuple(opToIterate); + Tuple* tpl = opToIterate->getTuple(opToIterate); if (tpl == NULL) { break; } tmp = *(long*) (tpl->data + colOffset); result = tmp > result ? tmp : result; + freeTuple(tpl); }; @@ -73,16 +73,16 @@ long doMax(Operator* opToIterate, size_t colOffset) { long doMin(Operator* opToIterate, size_t colOffset) { - Tuple* tpl; long result = __LONG_MAX__, tmp = 0; for (;;) { - tpl = opToIterate->getTuple(opToIterate); + Tuple* tpl = opToIterate->getTuple(opToIterate); if (tpl == NULL) { break; } tmp = *(long*) (tpl->data + colOffset); result = tmp < result ? tmp : result; + freeTuple(tpl); }; From 53bc9f4616f969cba1a409bc37a295989e7626dc Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sun, 9 Mar 2025 09:28:39 +0200 Subject: [PATCH 09/29] refactor aggregate-operator --- src/operators/aggregate.c | 64 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index d81f0be..ab34b54 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -89,6 +89,22 @@ long doMin(Operator* opToIterate, size_t colOffset) { return result; } +long count(long result, long num __attribute__((unused))) { + return result + 1; +} + +long max(long result, long num) { + return num > result ? num : result; +} + +long sum(long result, long num) { + return num + result; +} + +long min(long result, long num) { + return num < result ? num : result; +} + Tuple* aggregateGetTuple(Operator* op) { @@ -107,10 +123,12 @@ Tuple* aggregateGetTuple(Operator* op) { // } + size_t colOffset = op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]; + // Build new tuple to store result - long result = 0; + /* switch(op->info.aggregate.aggtype) { case COUNT: result = doCount(op->child); @@ -131,6 +149,50 @@ Tuple* aggregateGetTuple(Operator* op) { printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype); exit(1); } + */ + long (*agg_fun)(long result, long num); + long result = 0, tmp = 0; + + + switch(op->info.aggregate.aggtype) { + case COUNT: + agg_fun = count; + break; + case SUM: + agg_fun = sum; + break; + case AVG: + agg_fun = sum; + break; + case MAX: + agg_fun = max; + break; + case MIN: + agg_fun = min; + result = __LONG_MAX__; + break; + default: + printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype); + exit(1); + } + + + size_t observations = 0; + for (;;) { + Tuple* tpl = op->child->getTuple(op->child); + if (tpl == NULL) { + break; + } + tmp = *(long*) (tpl->data + colOffset); + result = agg_fun(result, tmp); + freeTuple(tpl); + observations++; + }; + + if (op->info.aggregate.aggtype == AVG) { + result = result / observations; + } + op->resultDescription.columnCount = 1; op->resultDescription.pCols[0] = 0; From 8e375cee22e4b0b3f4913df3e8863bf01bf01fce Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sun, 9 Mar 2025 09:29:53 +0200 Subject: [PATCH 10/29] cleanup --- src/operators/aggregate.c | 115 +------------------------------------- 1 file changed, 1 insertion(+), 114 deletions(-) diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index ab34b54..fe1a814 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -1,94 +1,5 @@ #include "../include/operators/aggregate.h" -long doCount(Operator* opToIterate) { - int result = 0; - while (true) { - Tuple* tpl = opToIterate->getTuple(opToIterate); - if (tpl == NULL) break; - result++; - freeTuple(tpl); - }; - - return result; -} - -long doAverage(Operator* opToIterate, size_t colOffset) { - - - long sum = 0; - long count = 0; - - for (;;) { - Tuple* tpl = opToIterate->getTuple(opToIterate); - if (tpl == NULL) { - break; - } - sum += *(long*) (tpl->data + colOffset); - count++; - freeTuple(tpl); - }; - long result = 0.0; - if (count > 0) { - result = sum / (double) count; - } - return result; -} - -long doSum(Operator* opToIterate, size_t colOffset) { - - - long long result = 0; - - for (;;) { - Tuple* tpl = opToIterate->getTuple(opToIterate); - if (tpl == NULL) { - break; - } - result += *(long*) (tpl->data + colOffset); - freeTuple(tpl); - }; - - return result; -} - -long doMax(Operator* opToIterate, size_t colOffset) { - - - long result = 0, tmp = 0; - - for (;;) { - Tuple* tpl = opToIterate->getTuple(opToIterate); - if (tpl == NULL) { - break; - } - tmp = *(long*) (tpl->data + colOffset); - result = tmp > result ? tmp : result; - freeTuple(tpl); - - }; - - return result; -} - -long doMin(Operator* opToIterate, size_t colOffset) { - - - long result = __LONG_MAX__, tmp = 0; - - for (;;) { - Tuple* tpl = opToIterate->getTuple(opToIterate); - if (tpl == NULL) { - break; - } - tmp = *(long*) (tpl->data + colOffset); - result = tmp < result ? tmp : result; - freeTuple(tpl); - - }; - - return result; -} - long count(long result, long num __attribute__((unused))) { return result + 1; } @@ -125,31 +36,7 @@ Tuple* aggregateGetTuple(Operator* op) { size_t colOffset = op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]; - // Build new tuple to store result - - /* - switch(op->info.aggregate.aggtype) { - case COUNT: - result = doCount(op->child); - break; - case SUM: - result = doSum(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); - break; - case AVG: - result = doAverage(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); - break; - case MAX: - result = doMax(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); - break; - case MIN: - result = doMin(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]); - break; - default: - printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype); - exit(1); - } - */ long (*agg_fun)(long result, long num); long result = 0, tmp = 0; @@ -162,7 +49,7 @@ Tuple* aggregateGetTuple(Operator* op) { agg_fun = sum; break; case AVG: - agg_fun = sum; + agg_fun = sum; // See below why break; case MAX: agg_fun = max; From d8780478bc40d5aa1acac9f6c304f11742afa37a Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sun, 9 Mar 2025 10:24:23 +0200 Subject: [PATCH 11/29] pass pointers to tpls --- src/executor/executor.c | 9 ++++--- src/executor/tuple.c | 8 ++++++ src/include/executor/tuple.h | 6 ++++- src/include/operators/aggregate.h | 2 +- src/include/operators/filter.h | 2 +- src/include/operators/join.h | 2 +- src/include/operators/project.h | 2 +- src/include/operators/scan.h | 2 +- src/include/operators/scanTDB.h | 2 +- src/include/planner/planner.h | 2 +- src/operators/aggregate.c | 26 ++++++++++-------- src/operators/filter.c | 11 +++----- src/operators/join.c | 44 ++++++++++++++++++------------- src/operators/project.c | 4 +-- src/operators/scan.c | 7 +++-- src/operators/scanTDB.c | 10 +++---- 16 files changed, 78 insertions(+), 61 deletions(-) diff --git a/src/executor/executor.c b/src/executor/executor.c index 2714c4a..e96b5dd 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -86,15 +86,16 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) } // Get tuples one by one - Tuple* tpl; + Tuple* tpl = initTupleOfSize(500); // TODO no magic constants for (;;) { - tpl = op->getTuple(op); - if (tpl == NULL) break; + op->getTuple(op, tpl); + if (isTupleEmpty(tpl)) break; tupleHandler(tpl); - freeTuple(tpl); }; + freeTuple(tpl); + free(buffpool->pool); free(buffpool); diff --git a/src/executor/tuple.c b/src/executor/tuple.c index 1667c2e..b313e76 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -25,4 +25,12 @@ void freeTuple(Tuple* tpl) { free(tpl->data); } free(tpl); +} + +size_t isTupleEmpty(Tuple* tpl) { + return tpl->size == 0 ? 1 : 0; +} + +void markTupleAsEmpty(Tuple* tpl) { + tpl->size = 0; } \ No newline at end of file diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h index 91c0dea..935d03b 100644 --- a/src/include/executor/tuple.h +++ b/src/include/executor/tuple.h @@ -16,4 +16,8 @@ Tuple* initTupleOfSize(size_t p_size); void* getTupleCol(Tuple* tpl, size_t colOffset); -void freeTuple(Tuple* tpl); \ No newline at end of file +void freeTuple(Tuple* tpl); + +size_t isTupleEmpty(Tuple* tpl); + +void markTupleAsEmpty(Tuple* tpl); diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h index fd8f2db..cd58290 100644 --- a/src/include/operators/aggregate.h +++ b/src/include/operators/aggregate.h @@ -6,4 +6,4 @@ #include "../executor/tuple.h" -Tuple* aggregateGetTuple(Operator* op); \ No newline at end of file +void aggregateGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h index f4ff59e..f3bd5d8 100644 --- a/src/include/operators/filter.h +++ b/src/include/operators/filter.h @@ -4,5 +4,5 @@ #include "../planner/planner.h" #include "../executor/tuple.h" -Tuple* filterGetTuple(Operator* op); +void filterGetTuple(Operator* op, Tuple* tpl); bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op); \ No newline at end of file diff --git a/src/include/operators/join.h b/src/include/operators/join.h index 7e2bf7e..e54a6da 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -6,4 +6,4 @@ #include "../executor/tuplebuffer.h" -Tuple* joinGetTuple(Operator* op); \ No newline at end of file +void joinGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/project.h b/src/include/operators/project.h index 6a228d9..46bef38 100644 --- a/src/include/operators/project.h +++ b/src/include/operators/project.h @@ -3,4 +3,4 @@ #include "../planner/planner.h" #include "../executor/tuple.h" -Tuple* projectGetTuple(Operator* op); \ No newline at end of file +void projectGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h index f43aa63..10ab0bc 100644 --- a/src/include/operators/scan.h +++ b/src/include/operators/scan.h @@ -4,4 +4,4 @@ #include "../executor/executor.h" #include "../executor/tuple.h" -Tuple* scanGetTuple(Operator* op); \ No newline at end of file +void scanGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h index f5da46a..7f8b6a9 100644 --- a/src/include/operators/scanTDB.h +++ b/src/include/operators/scanTDB.h @@ -6,4 +6,4 @@ #include "../executor/tuplebuffer.h" #include -Tuple* scanTDBGetTuple(Operator* op); \ No newline at end of file +void scanTDBGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index edbb075..df3efb2 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -128,7 +128,7 @@ typedef struct Operator { ResultSet resultDescription; int iteratorTupleOffset; struct Operator* child; - Tuple* (*getTuple) (struct Operator* op); + void (*getTuple) (struct Operator* op, Tuple* tpl); } Operator; void freeQueryplan(Operator *node); diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index fe1a814..395b1d7 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -18,13 +18,14 @@ long min(long result, long num) { -Tuple* aggregateGetTuple(Operator* op) { +void aggregateGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op->child, "OP_AGGREGATE has no child."); checkPtrNotNull(op->child->getTuple, "Child of OP_AGGREGATE has no getTuple-method."); if (op->info.aggregate.aggregationDone) { - return NULL; + markTupleAsEmpty(tpl); + return; } // TODO: @@ -65,17 +66,23 @@ Tuple* aggregateGetTuple(Operator* op) { size_t observations = 0; + + Tuple* tmpTpl = initTupleOfSize(500); // TODO no magic + for (;;) { - Tuple* tpl = op->child->getTuple(op->child); - if (tpl == NULL) { + + op->child->getTuple(op->child, tmpTpl); + if (isTupleEmpty(tmpTpl)) { break; } - tmp = *(long*) (tpl->data + colOffset); + tmp = *(long*) (tmpTpl->data + colOffset); result = agg_fun(result, tmp); - freeTuple(tpl); observations++; }; + freeTuple(tmpTpl); + + if (op->info.aggregate.aggtype == AVG) { result = result / observations; } @@ -84,11 +91,8 @@ Tuple* aggregateGetTuple(Operator* op) { op->resultDescription.columnCount = 1; op->resultDescription.pCols[0] = 0; op->info.aggregate.aggregationDone = true; + - Tuple* tpl = initTuple(); - long* res_ptr = malloc(sizeof(result)); - *res_ptr = result; - tpl->data = res_ptr; + *(long*)(tpl->data) = result; - return tpl; } diff --git a/src/operators/filter.c b/src/operators/filter.c index 13d9d54..1d8b587 100644 --- a/src/operators/filter.c +++ b/src/operators/filter.c @@ -176,7 +176,7 @@ bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op) { return rtrnValue; } -Tuple* filterGetTuple(Operator* op) { +void filterGetTuple(Operator* op, Tuple* tpl) { if (op == NULL) { printf("FILTER_OP: Passed a NULL-pointer to filterGetTuple\n"); @@ -197,15 +197,12 @@ Tuple* filterGetTuple(Operator* op) { exit(1); } - - Tuple* tpl = 0; - while (true) { /* Get new tuples until found something that passes the filter */ - tpl = op->child->getTuple(op->child); + op->child->getTuple(op->child, tpl); - if (tpl == NULL) { + if (isTupleEmpty(tpl)) { break; } @@ -213,6 +210,4 @@ Tuple* filterGetTuple(Operator* op) { } - - return tpl; } diff --git a/src/operators/join.c b/src/operators/join.c index eca8e0a..5b839e0 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -17,8 +17,7 @@ void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* returnTpl->data = address; } -Tuple* joinGetTuple(Operator* op) { - +void joinGetTuple(Operator* op, Tuple* tpl) { if ( op->info.join.left == NULL || op->info.join.right == NULL @@ -45,10 +44,10 @@ Tuple* joinGetTuple(Operator* op) { Tuple* rightTuple; // This is only entered first time the operator is called while (!op->info.join.rightTuplesCollected) { - - rightTuple = op->info.join.right->getTuple(op->info.join.right); + rightTuple = initTupleOfSize(500); // TODO no magic + op->info.join.right->getTuple(op->info.join.right, rightTuple); - if (rightTuple == NULL) { + if (isTupleEmpty(rightTuple)) { op->info.join.rightTuplesCollected = true; continue; } @@ -62,46 +61,53 @@ Tuple* joinGetTuple(Operator* op) { } } + + // Nested join loop // For each tuple if left relation // For each tuple in right relation // if join_predicates(left,right) return tuple(left,right) - if (op->info.join.leftTuple == NULL) { - op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left); - } + op->info.join.leftTuple = initTupleOfSize(500); - Tuple* leftTuple = op->info.join.leftTuple; + if (isTupleEmpty(op->info.join.leftTuple)) { + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + } do { if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) { op->info.join.rightTupleIdx = 0; - freeTuple(leftTuple); - op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left); - leftTuple = op->info.join.leftTuple; + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + if (isTupleEmpty(op->info.join.leftTuple)) { + break; + } + continue; } rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++); - if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) { - Tuple* newTuple = initTuple(); + if (evaluateTuplesAgainstFilterOps(op->info.join.leftTuple, rightTuple, op->info.join.filter)) { // Create a new tuple by concating the tuples concatTuples( - newTuple, - leftTuple, + tpl, + op->info.join.leftTuple, rightTuple, &op->info.join.left->resultDescription, &op->info.join.right->resultDescription ); - return newTuple; + + return; } - } while(leftTuple != NULL); + } while(!isTupleEmpty(op->info.join.leftTuple)); // Join complete, we can free the buffer and the tuples associated freeTupleBuffer(op->info.join.rightTuples); - return NULL; + // freeTuple(op->info.join.leftTuple); + // freeTuple(rightTuple); + markTupleAsEmpty(tpl); + } diff --git a/src/operators/project.c b/src/operators/project.c index 858c8ab..2a498aa 100644 --- a/src/operators/project.c +++ b/src/operators/project.c @@ -1,6 +1,6 @@ #include "../include/operators/project.h" -Tuple* projectGetTuple(Operator* op) { +void projectGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op->child, "OP_PROJECT has no child"); checkPtrNotNull(op->child->getTuple, "Child of OP_PROJECT has no getTuple-method"); @@ -13,5 +13,5 @@ Tuple* projectGetTuple(Operator* op) { This is an unfortunate extra function call :( */ - return op->child->getTuple(op->child); + op->child->getTuple(op->child, tpl); } \ No newline at end of file diff --git a/src/operators/scan.c b/src/operators/scan.c index abf5a97..c56d4f6 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -1,6 +1,6 @@ #include "../include/operators/scan.h" -Tuple* scanGetTuple(Operator* op) { +void scanGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op, "NULL pointer passed to scanGetTuple"); @@ -24,7 +24,8 @@ Tuple* scanGetTuple(Operator* op) { if (line == NULL) { free(lineBuffer); fclose(op->info.scan.tablefile); - return NULL; + markTupleAsEmpty(tpl); + return; } @@ -140,9 +141,7 @@ Tuple* scanGetTuple(Operator* op) { free(lineBuffer); - Tuple* tpl = initTuple(); tpl->data = diskBuffer; - return tpl; } diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c index 100e00a..5a44475 100644 --- a/src/operators/scanTDB.c +++ b/src/operators/scanTDB.c @@ -29,26 +29,26 @@ void fillBuffer(Operator* op) { } -Tuple* scanTDBGetTuple(Operator* op) { +void scanTDBGetTuple(Operator* op, Tuple* tpl) { checkPtrNotNull(op, "NULL pointer passed to scanTDBGetTuple"); if (op->info.scan.fileRead && op->info.scan.recordsInBuffer == 0) { free(op->info.scan.buffer); - return NULL; + markTupleAsEmpty(tpl); + return; } if (op->info.scan.recordsInBuffer == 0) { fillBuffer(op); - return scanTDBGetTuple(op); + scanTDBGetTuple(op, tpl); + return; } size_t bufferDataOffset = (op->info.scan.recordsInBuffer - 1) * op->info.scan.recordSize; op->info.scan.recordsInBuffer--; - Tuple* tpl = initTupleOfSize(op->info.scan.recordSize); memcpy(tpl->data, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize); - return tpl; } \ No newline at end of file From 500e85fbf656127be9a339b263387d6693f474cd Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sun, 9 Mar 2025 19:54:40 +0200 Subject: [PATCH 12/29] do not allocate on every call.. --- src/operators/scan.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/operators/scan.c b/src/operators/scan.c index c56d4f6..ea34d39 100644 --- a/src/operators/scan.c +++ b/src/operators/scan.c @@ -45,7 +45,7 @@ void scanGetTuple(Operator* op, Tuple* tpl) { size_t tplSize = 0; - void* diskBuffer = calloc(1, SCANTUPLESIZE); + void* diskBuffer = tpl->data; void* diskBufferCursor = diskBuffer; checkPtrNotNull(diskBuffer, "could not allocate buffer for scan"); @@ -140,8 +140,5 @@ void scanGetTuple(Operator* op, Tuple* tpl) { op->resultDescription.size = tplSize; free(lineBuffer); - - tpl->data = diskBuffer; - } From 52ad7fb503c77114dd223956efd274fadcdcb38c Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Sun, 9 Mar 2025 21:43:47 +0200 Subject: [PATCH 13/29] add EXPLAIN; use single alloc for join buffering --- src/executor/executeStatement.c | 3 ++ src/executor/statements/explain.c | 68 ++++++++++++++++++++++++++++++ src/executor/tuple.c | 4 +- src/executor/tuplebuffer.c | 53 ++++++++++++++++++----- src/include/executor/executor.h | 3 +- src/include/executor/tuplebuffer.h | 10 ++++- src/include/parser/parsetree.h | 1 + src/operators/join.c | 12 +++--- src/parser/parser.c | 13 ++++++ 9 files changed, 144 insertions(+), 23 deletions(-) create mode 100644 src/executor/statements/explain.c diff --git a/src/executor/executeStatement.c b/src/executor/executeStatement.c index ace3cca..e6fbd79 100644 --- a/src/executor/executeStatement.c +++ b/src/executor/executeStatement.c @@ -16,6 +16,9 @@ void executeStatement(Node* node) { case STMTINSERT: executeInsert(node); break; + case STMTEXPLAIN: + executeExplain(node); + break; default: printf("Don't know how execute statement of type %d\n", node->type); exit(1); diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c new file mode 100644 index 0000000..01fad91 --- /dev/null +++ b/src/executor/statements/explain.c @@ -0,0 +1,68 @@ +#include "../../include/executor/executor.h" + +void printOp(Operator* op) { + + switch (op->type) { + case OP_SCANTDB: + printf("OP_SCANTDB"); + break; + case OP_SCAN: + printf("OP_SCAN"); + break; + case OP_PROJECT: + printf("OP_PROJECT"); + break; + case OP_FILTER: + printf("OP_FILTER"); + break; + case OP_JOIN: + printf("OP_JOIN"); + break; + case OP_AGGREGATE: + printf("OP_AGGREGATE"); + break; + default: + printf("Unknown operation type"); + break; + } + +} + +void explainOp(Operator* op) { + + if (!op) return; + + printOp(op); + printf(", size: %ld\n", op->resultDescription.size); + if (op->child) { + + if (op->type == OP_FILTER) { + explainOp(op->info.filter.next); + } + + if (op->type == OP_JOIN) { + explainOp(op->info.join.filter); + explainOp(op->info.join.left); + explainOp(op->info.join.right); + } + + explainOp(op->child); + } + +} + + +void executeExplain(Node* node) { + + /* Plan the query */ + Operator* queryplan = planQuery(node->next); + + /* Print the query plan */ + printf("******* EXPLAIN **********\n"); + explainOp(queryplan); + printf("**************************\n"); + + + freeQueryplan(queryplan); +} + diff --git a/src/executor/tuple.c b/src/executor/tuple.c index b313e76..90b29ac 100644 --- a/src/executor/tuple.c +++ b/src/executor/tuple.c @@ -2,14 +2,14 @@ Tuple* initTuple() { - Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation + Tuple* tpl = malloc(sizeof(Tuple)); tpl->size = 0; return tpl; } Tuple* initTupleOfSize(size_t p_size) { - Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation + Tuple* tpl = malloc(sizeof(Tuple)); tpl->data = calloc(1, p_size); tpl->size = p_size; return tpl; diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c index c3985f6..aa9dd88 100644 --- a/src/executor/tuplebuffer.c +++ b/src/executor/tuplebuffer.c @@ -1,37 +1,68 @@ #include "../include/executor/tuplebuffer.h" -TupleBuffer* initTupleBuffer(size_t p_capacity) { + +TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize) { TupleBuffer* buff = malloc(sizeof(TupleBuffer)); - buff->capacity = p_capacity; - buff->tuples = malloc(p_capacity * sizeof(Tuple*)); - buff->size = 0; - return buff; + buff->capacity = p_capacity; + buff->tupledatasize = p_tuplesize; + buff->tuples = malloc(p_capacity * sizeof(Tuple)); + buff->data = malloc(p_capacity * p_tuplesize); + buff->size = 0; + buff->cursor = 0; + return buff; } void resizeTupleBuffer(TupleBuffer* buff) { + buff->capacity *= 2; - buff->tuples = realloc(buff->tuples, buff->capacity * sizeof(Tuple)); + + Tuple* tmpTpl = realloc(buff->tuples, buff->capacity * sizeof(Tuple)); + if (tmpTpl == NULL) { + printf("ERROR: Could resize tuplebuffer from %ld to %ld\n", buff->capacity, buff->capacity * 2); + exit(1); + } + + buff->tuples = tmpTpl; + void* tmpData = realloc(buff->data, buff->capacity * buff->tupledatasize); + if (tmpData == NULL) { + printf("ERROR: Could resize tuplebuffer data from %ld to %ld\n", buff->capacity * buff->tupledatasize, buff->capacity * buff->tupledatasize * 2); + exit(1); + } + + buff->data = tmpData; } -void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) { - if (buff->size == buff->capacity) { + +Tuple* getTupleFromBuffer(TupleBuffer* buff) { + + if (buff->size >= (buff->capacity-1)) { resizeTupleBuffer(buff); } - buff->tuples[buff->size++] = tpl; + Tuple* tpl = &buff->tuples[buff->size++]; + tpl->size = buff->tupledatasize; + tpl->data = buff->data + buff->cursor; + buff->cursor += buff->tupledatasize; + return tpl; } +void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx) { + tpl->data = buff->data + (idx * buff->tupledatasize); +} + + void freeTupleBuffer(TupleBuffer* buff) { - for (size_t i = 0; i < buff->size; i++) freeTuple(buff->tuples[i]); free(buff->tuples); + free(buff->data); free(buff); } Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) { - return buff->tuples[idx]; + updateTupleDataptr(buff, &buff->tuples[idx], idx); + return &buff->tuples[idx]; } size_t isTupleBufferEmpty(TupleBuffer* buff) { diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index a0abaee..b382411 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -22,4 +22,5 @@ extern Bufferpool* buffpool; void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)); void executeStatement(Node* node); void executeCreateTable(Node* node); -void executeInsert(Node* node); \ No newline at end of file +void executeInsert(Node* node); +void executeExplain(Node* node); \ No newline at end of file diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h index 91c72ae..4dc2ce2 100644 --- a/src/include/executor/tuplebuffer.h +++ b/src/include/executor/tuplebuffer.h @@ -1,15 +1,21 @@ #pragma once #include "tuple.h" +#include typedef struct { - Tuple** tuples; + Tuple* tuples; + void* data; + size_t cursor; + size_t tupledatasize; size_t size; size_t capacity; } TupleBuffer; -TupleBuffer* initTupleBuffer(size_t p_capacity); +TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize); void resizeTupleBuffer(TupleBuffer* buff); void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff); void freeTupleBuffer(TupleBuffer* buff); +void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx); Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx); +Tuple* getTupleFromBuffer(TupleBuffer* buff); size_t isTupleBufferEmpty(TupleBuffer* buff); diff --git a/src/include/parser/parsetree.h b/src/include/parser/parsetree.h index 7b6e84f..0482b18 100644 --- a/src/include/parser/parsetree.h +++ b/src/include/parser/parsetree.h @@ -32,6 +32,7 @@ enum nodeType { AND, STMTCREATE, STMTINSERT, + STMTEXPLAIN, TABLE, OR }; diff --git a/src/operators/join.c b/src/operators/join.c index 5b839e0..45332e6 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -38,13 +38,15 @@ void joinGetTuple(Operator* op, Tuple* tpl) { */ if (!op->info.join.rightTuples) { - op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER); + op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER, 500); // TODO no magic } Tuple* rightTuple; // This is only entered first time the operator is called while (!op->info.join.rightTuplesCollected) { - rightTuple = initTupleOfSize(500); // TODO no magic + + rightTuple = getTupleFromBuffer(op->info.join.rightTuples); + op->info.join.right->getTuple(op->info.join.right, rightTuple); if (isTupleEmpty(rightTuple)) { @@ -52,7 +54,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) { continue; } - addTupleToBuffer(rightTuple, op->info.join.rightTuples); op->info.join.rightTupleCount++; if (op->info.join.rightTupleCount >= JOINPTRBUFFER) { @@ -61,7 +62,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) { } } - // Nested join loop // For each tuple if left relation @@ -88,7 +88,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) { rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++); - if (evaluateTuplesAgainstFilterOps(op->info.join.leftTuple, rightTuple, op->info.join.filter)) { // Create a new tuple by concating the tuples concatTuples( @@ -105,8 +104,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) { // Join complete, we can free the buffer and the tuples associated freeTupleBuffer(op->info.join.rightTuples); - // freeTuple(op->info.join.leftTuple); - // freeTuple(rightTuple); + freeTuple(op->info.join.leftTuple); markTupleAsEmpty(tpl); } diff --git a/src/parser/parser.c b/src/parser/parser.c index 9c85243..b45325d 100644 --- a/src/parser/parser.c +++ b/src/parser/parser.c @@ -442,6 +442,13 @@ void insert() { } +void explain() { + keyword("EXPLAIN", STMTEXPLAIN); + skipWhite(); + query(); +} + + size_t parse(char* input, Node* p_root) { root = p_root; @@ -451,6 +458,12 @@ size_t parse(char* input, Node* p_root) { qsize = strlen(rawSql); getNextChar(); + + if (peekWordMatches("EXPLAIN")) { + explain(); + return nodeCount; + } + if (peekWordMatches("CREATE")) { create(); return nodeCount; From e58c8c2c4d08333bc4d7bbb6b7a62a1819fadab8 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:01:45 +0200 Subject: [PATCH 14/29] remove bufferpool entirely --- src/bufferpool/bufferpool.c | 84 ----------------------------- src/executor/executor.c | 13 +---- src/include/bufferpool/bufferpool.h | 40 -------------- src/include/const.h | 6 +-- src/include/executor/executor.h | 4 -- src/include/operators/aggregate.h | 1 - src/include/operators/filter.h | 1 - src/include/operators/join.h | 1 - src/include/operators/project.h | 1 - src/include/operators/scan.h | 1 - src/include/operators/scanTDB.h | 1 - src/operators/aggregate.c | 2 +- src/operators/join.c | 9 +--- src/squel.c | 1 - 14 files changed, 8 insertions(+), 157 deletions(-) delete mode 100644 src/bufferpool/bufferpool.c delete mode 100644 src/include/bufferpool/bufferpool.h diff --git a/src/bufferpool/bufferpool.c b/src/bufferpool/bufferpool.c deleted file mode 100644 index d843741..0000000 --- a/src/bufferpool/bufferpool.c +++ /dev/null @@ -1,84 +0,0 @@ -#include "../include/bufferpool/bufferpool.h" -#include "../include/planner/planner.h" - - -void growBufferpoolIfNeedBe(size_t size) { - - if (buffpool->used + (long) size < buffpool->capacity) return; - long oldCapacity = buffpool->capacity; - buffpool->capacity *= 2; - buffpool->pool = realloc(buffpool->pool, buffpool->capacity); - checkPtrNotNull(buffpool->pool, "Could not allocate memory for bufferpool"); - memset(buffpool->pool + oldCapacity, 0, oldCapacity); - -} - -void copyToBufferPool(int destinationoffset, void* source, size_t size) { - growBufferpoolIfNeedBe(size); - void* destination = getTuple(destinationoffset); - memcpy(destination, source, size); -} - -int addToBufferPoolFromOffset(int originOffset, size_t size) { - growBufferpoolIfNeedBe(size); - void* target = getNextFreeSlot(); - memcpy(target, getTuple(originOffset), size); - int offset = buffpool->used; - buffpool->used += size; - return offset; -} - - -int addToBufferPool(void* source, size_t size) { - growBufferpoolIfNeedBe(size); - void* target = getNextFreeSlot(); - memcpy(target, source, size); - int offset = buffpool->used; - buffpool->used += size; - return offset; -} - -void reserveSpaceBufferpool(int offset, size_t size) { - growBufferpoolIfNeedBe(size); - void* from = getTuple(offset); - memset(from, 0, size); - buffpool->used += size; -} - - -int getCurrentOffset() { return buffpool->used; } - -void* getCol(int pooloffset, size_t colOffset) { - return buffpool->pool + pooloffset + colOffset; -} - -void* getTuple(int pooloffset) { - return buffpool->pool + pooloffset; -} - -void* getNextFreeSlot() { - return buffpool->pool + buffpool->used; -} - -void getColAsChar(char* target, int pooloffset, size_t colOffset, Datatype type) { - if (type == DTYPE_STR) { - strcpy(target, getCol(pooloffset, colOffset)); - return; - } - if (type == DTYPE_INT) { - char tmp[CHARMAXSIZE]; - sprintf(tmp, "%d", *(int*) getCol(pooloffset, colOffset)); - memcpy(target, tmp, strlen(tmp)); - return; - } - if (type == DTYPE_LONG) { - char tmp[CHARMAXSIZE]; - sprintf(tmp, "%ld", *(long*) getCol(pooloffset, colOffset)); - memcpy(target, tmp, strlen(tmp)); - return; - } - printf("Don't know how to represent type %d as char\n", type); - exit(1); -} - - diff --git a/src/executor/executor.c b/src/executor/executor.c index e96b5dd..f595b84 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -1,7 +1,7 @@ #include "../include/executor/executor.h" #include "../include/executor/tuple.h" -Bufferpool* buffpool; + void assignGetTupleFunction(Operator *op) { @@ -35,9 +35,6 @@ void assignGetTupleFunction(Operator *op) { } - - - void doAssignGetTupleFunction(Operator* p_op) { if (p_op == NULL) { @@ -63,10 +60,6 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) return; } - buffpool = calloc(1, sizeof(Bufferpool)); - buffpool->pool = calloc(BUFFERPOOLSIZE, 1); - buffpool->capacity = BUFFERPOOLSIZE; - buffpool->used = 0; doAssignGetTupleFunction(op); @@ -86,7 +79,7 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) } // Get tuples one by one - Tuple* tpl = initTupleOfSize(500); // TODO no magic constants + Tuple* tpl = initTupleOfSize(TUPLESIZE); for (;;) { op->getTuple(op, tpl); if (isTupleEmpty(tpl)) break; @@ -97,6 +90,4 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) freeTuple(tpl); - free(buffpool->pool); - free(buffpool); } diff --git a/src/include/bufferpool/bufferpool.h b/src/include/bufferpool/bufferpool.h deleted file mode 100644 index abc0797..0000000 --- a/src/include/bufferpool/bufferpool.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once -#include -#include "../const.h" -#include "../parser/parsetree.h" - -/* - The bufferpool maintains a pool for tuples - to which tuples can be added and removed (freed) from. - - A tuple is struct with a fixed size despite - the fact that the data is likely not fixed in size. - - A tuple contains it's data as a string. Columns - are pointers to the string. - - The rest of the system passes around pointers to the buffer pool. - -*/ - - -typedef struct { - void* pool; - long capacity; - long used; -} Bufferpool; - -extern Bufferpool* buffpool; - - -void* getNextFreeSlot(); -void getColAsChar(char* target, int pooloffset, size_t colIdx, Datatype type); -void copyToBufferPool(int destinationoffset, void* source, size_t size); -int addToBufferPool(void* source, size_t size); -int addToBufferPoolFromOffset(int offset, size_t size); -void reserveSpaceBufferpool(int offset, size_t size); - -int getCurrentOffset(); - -void* getTuple(int pooloffset); -void* getCol(int pooloffset, size_t colOffset); \ No newline at end of file diff --git a/src/include/const.h b/src/include/const.h index 74e230e..625e12d 100644 --- a/src/include/const.h +++ b/src/include/const.h @@ -14,9 +14,9 @@ #define JOINTUPLESIZE 1000 #define SCANTUPLESIZE 2000 -// Bufferpool -#define BUFFERPOOLSIZE 100000 -#define JOINPTRBUFFER 100000 +// Query execution +#define JOINBUFFSIZE 100000 +#define TUPLESIZE 500 // Define max size (in chars) of expressions and query #define MAXQUERYSIZE 1000 diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index b382411..08e147d 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -4,7 +4,6 @@ #include "../const.h" #include "../parser/parsetree.h" #include "../planner/planner.h" -#include "../bufferpool/bufferpool.h" #include "../operators/join.h" #include "../operators/filter.h" #include "../operators/scan.h" @@ -13,12 +12,9 @@ #include "../operators/aggregate.h" #include "../io/tdb.h" - extern char *buffercache; extern char *bufferscan; -extern Bufferpool* buffpool; - void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)); void executeStatement(Node* node); void executeCreateTable(Node* node); diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h index cd58290..f0586a9 100644 --- a/src/include/operators/aggregate.h +++ b/src/include/operators/aggregate.h @@ -1,6 +1,5 @@ #pragma once #include -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h index f3bd5d8..f1582ea 100644 --- a/src/include/operators/filter.h +++ b/src/include/operators/filter.h @@ -1,6 +1,5 @@ #pragma once #include -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/tuple.h" diff --git a/src/include/operators/join.h b/src/include/operators/join.h index e54a6da..8b38e50 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -1,5 +1,4 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" diff --git a/src/include/operators/project.h b/src/include/operators/project.h index 46bef38..e4b4a10 100644 --- a/src/include/operators/project.h +++ b/src/include/operators/project.h @@ -1,5 +1,4 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/tuple.h" diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h index 10ab0bc..da6ad44 100644 --- a/src/include/operators/scan.h +++ b/src/include/operators/scan.h @@ -1,5 +1,4 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h index 7f8b6a9..f34c20d 100644 --- a/src/include/operators/scanTDB.h +++ b/src/include/operators/scanTDB.h @@ -1,5 +1,4 @@ #pragma once -#include "../bufferpool/bufferpool.h" #include "../planner/planner.h" #include "../executor/executor.h" #include "../executor/tuple.h" diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c index 395b1d7..1742aa8 100644 --- a/src/operators/aggregate.c +++ b/src/operators/aggregate.c @@ -67,7 +67,7 @@ void aggregateGetTuple(Operator* op, Tuple* tpl) { size_t observations = 0; - Tuple* tmpTpl = initTupleOfSize(500); // TODO no magic + Tuple* tmpTpl = initTupleOfSize(TUPLESIZE); for (;;) { diff --git a/src/operators/join.c b/src/operators/join.c index 45332e6..9ccadc2 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -38,7 +38,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) { */ if (!op->info.join.rightTuples) { - op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER, 500); // TODO no magic + op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); } Tuple* rightTuple; @@ -55,11 +55,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) { } op->info.join.rightTupleCount++; - - if (op->info.join.rightTupleCount >= JOINPTRBUFFER) { - printf("Can't fit the right table in the query into joinbuffer. Increase JOINPTRBUFFER\n"); - exit(1); - } } @@ -68,7 +63,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) { // For each tuple in right relation // if join_predicates(left,right) return tuple(left,right) - op->info.join.leftTuple = initTupleOfSize(500); + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); if (isTupleEmpty(op->info.join.leftTuple)) { op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); diff --git a/src/squel.c b/src/squel.c index 8469934..a4256a1 100644 --- a/src/squel.c +++ b/src/squel.c @@ -2,7 +2,6 @@ #include "./include/parser/parser.h" #include "./include/planner/planner.h" #include "./include/io/tdb.h" -#include "./include/bufferpool/bufferpool.h" #define METADATABUFFSIZE 10 From e6a2927f35792ec6b9fa6229b321aa02cd3447d5 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:20:05 +0200 Subject: [PATCH 15/29] explain and test --- src/executor/statements/explain.c | 22 ++++++++++------------ test/test-explain.bats | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 12 deletions(-) create mode 100644 test/test-explain.bats diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c index 01fad91..ccb18f2 100644 --- a/src/executor/statements/explain.c +++ b/src/executor/statements/explain.c @@ -33,22 +33,21 @@ void explainOp(Operator* op) { if (!op) return; printOp(op); - printf(", size: %ld\n", op->resultDescription.size); - if (op->child) { + printf("\n"); - if (op->type == OP_FILTER) { - explainOp(op->info.filter.next); - } + if (op->type == OP_FILTER) { + explainOp(op->info.filter.next); + } - if (op->type == OP_JOIN) { - explainOp(op->info.join.filter); - explainOp(op->info.join.left); - explainOp(op->info.join.right); - } + if (op->type == OP_JOIN) { + explainOp(op->info.join.filter); + explainOp(op->info.join.left); + explainOp(op->info.join.right); + } + if (op->child) { explainOp(op->child); } - } @@ -62,7 +61,6 @@ void executeExplain(Node* node) { explainOp(queryplan); printf("**************************\n"); - freeQueryplan(queryplan); } diff --git a/test/test-explain.bats b/test/test-explain.bats new file mode 100644 index 0000000..003d530 --- /dev/null +++ b/test/test-explain.bats @@ -0,0 +1,27 @@ + +#!/usr/bin/env bats + +setup_file() { + run make +} + +@test "Simple subquery \w WHERE" { + run ./build/squel "EXPLAIN SELECT col3 FROM (SELECT col3,col1 FROM './test/data/small.csv') WHERE col3>100" + [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] + [[ $"${lines[1]}" == "OP_PROJECT" ]] + [[ $"${lines[2]}" == "OP_FILTER" ]] + [[ $"${lines[3]}" == "OP_PROJECT" ]] + [[ $"${lines[4]}" == "OP_SCAN" ]] + [[ $"${lines[5]}" == "**************************" ]] +} + +@test "EXPLAIN a query" { + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int" + [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] + [[ $"${lines[1]}" == "OP_PROJECT" ]] + [[ $"${lines[2]}" == "OP_JOIN" ]] + [[ $"${lines[3]}" == "OP_FILTER" ]] + [[ $"${lines[4]}" == "OP_SCANTDB" ]] + [[ $"${lines[5]}" == "OP_SCANTDB" ]] + [[ $"${lines[6]}" == "**************************" ]] +} From 18803bdfc65725cd20e9c702f9ee191ff81d2ebc Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Tue, 11 Mar 2025 21:08:11 +0200 Subject: [PATCH 16/29] hashjoin started --- Makefile | 2 +- perf/results/count.csv | 8 +-- perf/results/join.csv | 12 ++-- src/executor/executor.c | 7 ++- src/executor/statements/explain.c | 7 ++- src/include/executor/executor.h | 1 + src/include/operators/hashjoin.h | 10 ++++ src/include/operators/join.h | 3 +- src/include/planner/planner.h | 5 +- src/include/util/hashmap.h | 28 ++++++++++ src/operators/hashjoin.c | 92 +++++++++++++++++++++++++++++++ src/operators/join.c | 1 + src/planner/operators/join.c | 27 ++++++++- src/util/hashmap.c | 46 ++++++++++++++++ test/data/animals.csv | 5 ++ test/data/fruits.csv | 5 ++ test/hashmap_test.c | 28 ++++++++++ test/test-explain.bats | 15 ++++- test/test_hashmap.bats | 14 +++++ test/test_join_duplicate.bats | 30 ++++++++++ 20 files changed, 325 insertions(+), 21 deletions(-) create mode 100644 src/include/operators/hashjoin.h create mode 100644 src/include/util/hashmap.h create mode 100644 src/operators/hashjoin.c create mode 100644 src/util/hashmap.c create mode 100644 test/data/animals.csv create mode 100644 test/data/fruits.csv create mode 100644 test/hashmap_test.c create mode 100644 test/test_hashmap.bats create mode 100644 test/test_join_duplicate.bats diff --git a/Makefile b/Makefile index dfea7e7..a5aa287 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ $(ODIR)/%.o: $(SRC)%.c $(CC) -g -c $< -o $@ $(CFLAGS) dirs: - mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/bufferpool build/operators + mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/operators build/util/hashmap clean: rm -f ./build/squel $(OBJ) diff --git a/perf/results/count.csv b/perf/results/count.csv index aaf89ab..b958cfb 100644 --- a/perf/results/count.csv +++ b/perf/results/count.csv @@ -1,7 +1,7 @@ filetype;records;time -CSV;100000;0:00.05 +CSV;100000;0:00.04 TDB;100000;0:00.00 -CSV;1000000;0:00.46 +CSV;1000000;0:00.30 TDB;1000000;0:00.04 -Command terminated by signal 2 -CSV;10000000;0:01.30 +CSV;10000000;0:02.96 +TDB;10000000;0:00.44 diff --git a/perf/results/join.csv b/perf/results/join.csv index 5d44775..d10b3b1 100644 --- a/perf/results/join.csv +++ b/perf/results/join.csv @@ -1,9 +1,5 @@ filetype;records_left;records_right;time -CSV;10000;100;0:00.06 -TDB;10000;100;0:00.04 -CSV;10000;1000;0:00.40 -TDB;10000;1000;0:00.39 -CSV;100000;100;0:00.45 -TDB;100000;100;0:00.38 -CSV;100000;1000;0:03.93 -TDB;100000;1000;0:03.80 +CSV;10000;100;0:00.02 +TDB;10000;100;0:00.00 +Command terminated by signal 9 +CSV;10000;1000;0:23.16 diff --git a/src/executor/executor.c b/src/executor/executor.c index f595b84..053caa8 100644 --- a/src/executor/executor.c +++ b/src/executor/executor.c @@ -25,11 +25,14 @@ void assignGetTupleFunction(Operator *op) { case (OP_JOIN): op->getTuple = &joinGetTuple; break; + case (OP_HASHJOIN): + op->getTuple = &hashjoinGetTuple; + break; case (OP_AGGREGATE): op->getTuple = &aggregateGetTuple; break; default: - printf("Don't know how to handle op-type %d\n", op->type); + printf("EXECUTOR-error: Don't know how to handle op-type %d\n", op->type); exit(1); } } @@ -47,7 +50,7 @@ void doAssignGetTupleFunction(Operator* p_op) { doAssignGetTupleFunction(p_op->child); } - if (p_op->type == OP_JOIN) { + if (p_op->type == OP_JOIN || p_op->type == OP_HASHJOIN) { doAssignGetTupleFunction(p_op->info.join.left); doAssignGetTupleFunction(p_op->info.join.right); } diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c index ccb18f2..8314855 100644 --- a/src/executor/statements/explain.c +++ b/src/executor/statements/explain.c @@ -21,8 +21,11 @@ void printOp(Operator* op) { case OP_AGGREGATE: printf("OP_AGGREGATE"); break; + case OP_HASHJOIN: + printf("OP_HASHJOIN"); + break; default: - printf("Unknown operation type"); + printf("EXPLAIN-error: Unknown operation type"); break; } @@ -39,7 +42,7 @@ void explainOp(Operator* op) { explainOp(op->info.filter.next); } - if (op->type == OP_JOIN) { + if (op->type == OP_JOIN || op->type == OP_HASHJOIN) { explainOp(op->info.join.filter); explainOp(op->info.join.left); explainOp(op->info.join.right); diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 08e147d..58d11da 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -5,6 +5,7 @@ #include "../parser/parsetree.h" #include "../planner/planner.h" #include "../operators/join.h" +#include "../operators/hashjoin.h" #include "../operators/filter.h" #include "../operators/scan.h" #include "../operators/scanTDB.h" diff --git a/src/include/operators/hashjoin.h b/src/include/operators/hashjoin.h new file mode 100644 index 0000000..24a5d86 --- /dev/null +++ b/src/include/operators/hashjoin.h @@ -0,0 +1,10 @@ +#pragma once +#include "../planner/planner.h" +#include "../executor/executor.h" +#include "../executor/tuple.h" +#include "../executor/tuplebuffer.h" +#include "./join.h" +#include "../util/hashmap.h" + + +void hashjoinGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file diff --git a/src/include/operators/join.h b/src/include/operators/join.h index 8b38e50..2a6fbec 100644 --- a/src/include/operators/join.h +++ b/src/include/operators/join.h @@ -5,4 +5,5 @@ #include "../executor/tuplebuffer.h" -void joinGetTuple(Operator* op, Tuple* tpl); \ No newline at end of file +void joinGetTuple(Operator* op, Tuple* tpl); +void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right); \ No newline at end of file diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h index df3efb2..01f86ea 100644 --- a/src/include/planner/planner.h +++ b/src/include/planner/planner.h @@ -8,6 +8,7 @@ #include "../parser/parsetree.h" #include "../executor/tuple.h" #include "../executor/tuplebuffer.h" +#include "../util/hashmap.h" typedef enum { @@ -16,7 +17,8 @@ typedef enum { OP_PROJECT, OP_FILTER, OP_JOIN, - OP_AGGREGATE + OP_AGGREGATE, + OP_HASHJOIN } OperatorType; typedef enum ComparisonType { @@ -100,6 +102,7 @@ typedef struct { struct Operator* left; struct Operator* right; struct Operator* filter; + Hashmap* hashmap; TupleBuffer* rightTuples; Tuple* leftTuple; size_t rightTupleIdx; diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h new file mode 100644 index 0000000..e5e74ae --- /dev/null +++ b/src/include/util/hashmap.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include + +typedef struct { + char key[100]; + size_t value; + size_t obs; + struct MapNode* next; +} MapNode; + + +typedef struct { + MapNode* data; + size_t table_size; +} Hashmap; + + + +Hashmap* initHashmap(size_t table_size); +void insertToHashmap(Hashmap* map, const char* key, size_t value); +size_t isInHashmap(Hashmap* map, const char* value); +void freeHashmap(Hashmap* map); +size_t getValueFromHashmap(Hashmap* map, const char* key); + +unsigned int hash(const char *key, size_t table_size); \ No newline at end of file diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c new file mode 100644 index 0000000..b12aad1 --- /dev/null +++ b/src/operators/hashjoin.c @@ -0,0 +1,92 @@ +#include "../include/operators/hashjoin.h" + + + +void hashjoinGetTuple(Operator* op, Tuple* tpl) { + if ( + op->info.join.left == NULL || + op->info.join.right == NULL + ) { + printf("Join left or right operator is NULL\n"); + exit(1); + } + + int joinColIdx = op->info.join.filter->info.filter.boolExprList[2]; + int joinColOffset = op->info.join.right->resultDescription.pCols[joinColIdx]; + + if (!op->info.join.hashmap) { + op->info.join.hashmap = initHashmap(1000); // TODO magic + op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); + } + + + Tuple* rightTuple = initTupleOfSize(TUPLESIZE); + const char* joinValue; + + // This is only entered first time the operator is called + while (!op->info.join.rightTuplesCollected) { + + rightTuple = getTupleFromBuffer(op->info.join.rightTuples); + + op->info.join.right->getTuple(op->info.join.right, rightTuple); + + if (isTupleEmpty(rightTuple)) { + op->info.join.rightTuplesCollected = true; + continue; + } + // Get value of join column + joinValue = (const char*) getTupleCol(rightTuple, joinColOffset); + + insertToHashmap(op->info.join.hashmap, joinValue, op->info.join.rightTupleCount); + + op->info.join.rightTupleCount++; + } + + + + // Nested join loop + // For each tuple if left relation + // For each tuple in right relation + // if join_predicates(left,right) return tuple(left,right) + + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + + if (isTupleEmpty(op->info.join.leftTuple)) { + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + } + + joinColIdx = op->info.join.filter->info.filter.boolExprList[0]; + joinColOffset = op->info.join.left->resultDescription.pCols[joinColIdx]; + + + do { + joinValue = (const char*) getTupleCol(op->info.join.leftTuple, joinColOffset); + + if (!isInHashmap(op->info.join.hashmap, joinValue)) { + op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); + continue; + } + + rightTuple = getTupleByIndex(op->info.join.rightTuples, getValueFromHashmap(op->info.join.hashmap, joinValue)); + + // Create a new tuple by concating the tuples + concatTuples( + tpl, + op->info.join.leftTuple, + rightTuple, + &op->info.join.left->resultDescription, + &op->info.join.right->resultDescription + ); + + return; + + } while (!isTupleEmpty(op->info.join.leftTuple)); + + // Join complete, we can free the buffer and the tuples associated + freeTupleBuffer(op->info.join.rightTuples); + freeTuple(op->info.join.leftTuple); + markTupleAsEmpty(tpl); + +} + + diff --git a/src/operators/join.c b/src/operators/join.c index 9ccadc2..58247f4 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -104,3 +104,4 @@ void joinGetTuple(Operator* op, Tuple* tpl) { } + diff --git a/src/planner/operators/join.c b/src/planner/operators/join.c index 9e7c452..e586650 100644 --- a/src/planner/operators/join.c +++ b/src/planner/operators/join.c @@ -40,6 +40,28 @@ Operator* makeJoinFilterOps( return filterOps; } +OperatorType deduceJoinType(Operator* filterOp) { + // Atm we can do a hash join + // if and only if: + // - There's only one join condition + // - The condition is an equality comparison + + if (filterOp->info.filter.next) { + return OP_JOIN; + } + + if (filterOp->info.filter.boolExprListSize < 3) { + return OP_JOIN; + } + + if (filterOp->info.filter.boolExprList[1] != -1) { + return OP_JOIN; + } + + return OP_HASHJOIN; + +} + Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { @@ -54,7 +76,6 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { Operator* opJoin = (Operator*) calloc(1, sizeof(Operator)); opJoin->info.join.left = left; opJoin->info.join.right = right; - opJoin->type = OP_JOIN; opJoin->info.join.rightTupleCount = 0; opJoin->info.join.rightTupleIdx = 0; opJoin->info.join.rightTuplesCollected = false; @@ -94,5 +115,9 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) { Operator* opFilter = makeJoinFilterOps(ON, opJoin, left->resultDescription, right->resultDescription); opJoin->info.join.filter = opFilter; + + opJoin->type = deduceJoinType(opFilter); + + return opJoin; } \ No newline at end of file diff --git a/src/util/hashmap.c b/src/util/hashmap.c new file mode 100644 index 0000000..1b241a2 --- /dev/null +++ b/src/util/hashmap.c @@ -0,0 +1,46 @@ +#include "../include/util/hashmap.h" + + + +Hashmap* initHashmap(size_t table_size) { + Hashmap* map = malloc(sizeof(Hashmap)); + map->data = calloc(table_size, sizeof(MapNode)); + map->table_size = table_size; + return map; +} + +void insertToHashmap(Hashmap* map, const char* key, size_t value) { + unsigned int idx = hash(key, map->table_size); + if (map->data[idx].obs == 0) { + memcpy(map->data[idx].key, key, strlen(key)); + } + map->data[idx].value = value; + map->data[idx].obs++; + // TODO handle collisions +} + +size_t isInHashmap(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + return map->data[idx].obs > 0 ? 1 : 0; +} + +size_t getValueFromHashmap(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + return map->data[idx].value; +} + + + + +void freeHashmap(Hashmap* map) { + free(map->data); // TODO free any adjacent nodes after handling collitions + free(map); +} + +unsigned int hash(const char *key, size_t table_size) { + unsigned long int hashval = 0; + while (*key) { + hashval = (hashval << 5) + *key++; + } + return hashval % table_size; +} \ No newline at end of file diff --git a/test/data/animals.csv b/test/data/animals.csv new file mode 100644 index 0000000..cff6b78 --- /dev/null +++ b/test/data/animals.csv @@ -0,0 +1,5 @@ +animal;size +monkey;small +cat;small +whale;very big +horse;medium \ No newline at end of file diff --git a/test/data/fruits.csv b/test/data/fruits.csv new file mode 100644 index 0000000..98b24cf --- /dev/null +++ b/test/data/fruits.csv @@ -0,0 +1,5 @@ +fruit;size +grape;small +strawberry;small +watermelon;very big +orange;medium \ No newline at end of file diff --git a/test/hashmap_test.c b/test/hashmap_test.c new file mode 100644 index 0000000..ec2df1f --- /dev/null +++ b/test/hashmap_test.c @@ -0,0 +1,28 @@ +#include "../src/util/hashmap.c" +#include + +int main() { + + Hashmap* map = initHashmap(1000); + + insertToHashmap(map, "12345", 1442); + insertToHashmap(map, "12346", 2); + insertToHashmap(map, "12X46", 3); + + if (isInHashmap(map, "12345")) { + printf("12345 in map with value %ld\n", getValueFromHashmap(map, "12345")); + } + + if (isInHashmap(map, "12X46")) { + printf("12X46 in map\n"); + } + + if (isInHashmap(map, "123fASFA")) { + printf("12X46 in map\n"); + } + + + freeHashmap(map); + + return 0; +} \ No newline at end of file diff --git a/test/test-explain.bats b/test/test-explain.bats index 003d530..0c99167 100644 --- a/test/test-explain.bats +++ b/test/test-explain.bats @@ -15,10 +15,23 @@ setup_file() { [[ $"${lines[5]}" == "**************************" ]] } -@test "EXPLAIN a query" { +@test "EXPLAIN - hash join" { run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int" [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] [[ $"${lines[1]}" == "OP_PROJECT" ]] + [[ $"${lines[2]}" == "OP_HASHJOIN" ]] + [[ $"${lines[3]}" == "OP_FILTER" ]] + [[ $"${lines[4]}" == "OP_SCANTDB" ]] + [[ $"${lines[5]}" == "OP_SCANTDB" ]] + [[ $"${lines[6]}" == "**************************" ]] +} + + + +@test "EXPLAIN - join with nested loop join" { + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int" + [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] + [[ $"${lines[1]}" == "OP_PROJECT" ]] [[ $"${lines[2]}" == "OP_JOIN" ]] [[ $"${lines[3]}" == "OP_FILTER" ]] [[ $"${lines[4]}" == "OP_SCANTDB" ]] diff --git a/test/test_hashmap.bats b/test/test_hashmap.bats new file mode 100644 index 0000000..fd5eb73 --- /dev/null +++ b/test/test_hashmap.bats @@ -0,0 +1,14 @@ + +#!/usr/bin/env bats + +setup_file() { + run rm ./build/hashmap_test.o + run gcc ./test/hashmap_test.c -o ./build/hashmap_test.o +} + +@test "Hashmap functionality" { + run ./build/hashmap_test.o + [[ $"${lines[0]}" == "12345 in map with value 1442" ]] + [[ $"${lines[1]}" == "12X46 in map" ]] +} + diff --git a/test/test_join_duplicate.bats b/test/test_join_duplicate.bats new file mode 100644 index 0000000..330c652 --- /dev/null +++ b/test/test_join_duplicate.bats @@ -0,0 +1,30 @@ +#!/usr/bin/env bats + +setup_file() { + run make +} + +@test "Join animals to fruits duplicating rows" { + run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/animals.csv' AS a JOIN './test/data/fruits.csv' AS f ON a.size=f.size" + [[ $"${lines[0]}" == "size;animal;fruit" ]] + [[ $"${lines[1]}" == "small;monkey;grape" ]] + [[ $"${lines[2]}" == "small;monkey;strawberry" ]] + [[ $"${lines[3]}" == "small;cat;grape" ]] + [[ $"${lines[4]}" == "small;cat;strawberry" ]] + [[ $"${lines[5]}" == "very big;whale;watermelon" ]] + [[ $"${lines[6]}" == "medium;horse;orange" ]] + +} + + +@test "Join fruits to animals duplicating rows" { + run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/fruits.csv' AS f JOIN './test/data/animals.csv' AS a ON a.size=f.size" + [[ $"${lines[0]}" == "size;animal;fruit" ]] + [[ $"${lines[1]}" == "small;monkey;grape" ]] + [[ $"${lines[2]}" == "small;monkey;strawberry" ]] + [[ $"${lines[3]}" == "small;cat;grape" ]] + [[ $"${lines[4]}" == "small;cat;strawberry" ]] + [[ $"${lines[5]}" == "very big;whale;watermelon" ]] + [[ $"${lines[6]}" == "medium;horse;orange" ]] + +} From 442cf93b9c627eb66548041ac6580890c4bde28c Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Tue, 11 Mar 2025 21:38:09 +0200 Subject: [PATCH 17/29] hashjoin sort of works --- src/include/util/hashmap.h | 9 +++++---- src/operators/hashjoin.c | 17 +++++++++++++---- src/operators/join.c | 4 +++- src/util/hashmap.c | 21 ++++++++++++++++----- test/test_join_duplicate.bats | 5 ++--- 5 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index e5e74ae..39d1156 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -5,9 +5,10 @@ #include typedef struct { - char key[100]; - size_t value; + char key[10000]; // TODO no magic; + size_t values[10000]; size_t obs; + size_t cursor; struct MapNode* next; } MapNode; @@ -18,11 +19,11 @@ typedef struct { } Hashmap; - Hashmap* initHashmap(size_t table_size); void insertToHashmap(Hashmap* map, const char* key, size_t value); size_t isInHashmap(Hashmap* map, const char* value); void freeHashmap(Hashmap* map); -size_t getValueFromHashmap(Hashmap* map, const char* key); +size_t getValueFromHashmap(Hashmap* map, const char* key); +void resetCursor(Hashmap* map, const char* key); unsigned int hash(const char *key, size_t table_size); \ No newline at end of file diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c index b12aad1..5ca7975 100644 --- a/src/operators/hashjoin.c +++ b/src/operators/hashjoin.c @@ -12,7 +12,8 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { } int joinColIdx = op->info.join.filter->info.filter.boolExprList[2]; - int joinColOffset = op->info.join.right->resultDescription.pCols[joinColIdx]; + int joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; + // int joinColOffset = op->info.join.right->resultDescription.pCols[joinColIdx]; if (!op->info.join.hashmap) { op->info.join.hashmap = initHashmap(1000); // TODO magic @@ -49,25 +50,33 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { // For each tuple in right relation // if join_predicates(left,right) return tuple(left,right) - op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + if (op->info.join.leftTuple == NULL) { + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + } if (isTupleEmpty(op->info.join.leftTuple)) { op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); } joinColIdx = op->info.join.filter->info.filter.boolExprList[0]; - joinColOffset = op->info.join.left->resultDescription.pCols[joinColIdx]; + joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; + int tupleIdx; do { joinValue = (const char*) getTupleCol(op->info.join.leftTuple, joinColOffset); if (!isInHashmap(op->info.join.hashmap, joinValue)) { + resetCursor(op->info.join.hashmap, joinValue); op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); continue; } - rightTuple = getTupleByIndex(op->info.join.rightTuples, getValueFromHashmap(op->info.join.hashmap, joinValue)); + + tupleIdx = getValueFromHashmap(op->info.join.hashmap, joinValue); + if (tupleIdx < 0) continue; + + rightTuple = getTupleByIndex(op->info.join.rightTuples, tupleIdx); // Create a new tuple by concating the tuples concatTuples( diff --git a/src/operators/join.c b/src/operators/join.c index 58247f4..49bc5f0 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -63,7 +63,9 @@ void joinGetTuple(Operator* op, Tuple* tpl) { // For each tuple in right relation // if join_predicates(left,right) return tuple(left,right) - op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + if (op->info.join.leftTuple == NULL) { + op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); + } if (isTupleEmpty(op->info.join.leftTuple)) { op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple); diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 1b241a2..5599c94 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -1,7 +1,5 @@ #include "../include/util/hashmap.h" - - Hashmap* initHashmap(size_t table_size) { Hashmap* map = malloc(sizeof(Hashmap)); map->data = calloc(table_size, sizeof(MapNode)); @@ -14,23 +12,36 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) { if (map->data[idx].obs == 0) { memcpy(map->data[idx].key, key, strlen(key)); } - map->data[idx].value = value; + map->data[idx].values[map->data[idx].obs] = value; map->data[idx].obs++; + if (map->data[idx].obs >= 100) { + printf("OUT OF BOUNDS\n"); // TODO + } // TODO handle collisions } size_t isInHashmap(Hashmap* map, const char* key) { unsigned int idx = hash(key, map->table_size); + if (map->data[idx].cursor == map->data[idx].obs) return 0; return map->data[idx].obs > 0 ? 1 : 0; } -size_t getValueFromHashmap(Hashmap* map, const char* key) { + +void resetCursor(Hashmap* map, const char* key) { unsigned int idx = hash(key, map->table_size); - return map->data[idx].value; + map->data[idx].cursor = 0; } +size_t getValueFromHashmap(Hashmap* map, const char* key) { + unsigned int idx = hash(key, map->table_size); + if (map->data[idx].cursor == map->data[idx].obs) return -1; + size_t rtrn = map->data[idx].values[map->data[idx].cursor++]; + + + return rtrn; +} void freeHashmap(Hashmap* map) { free(map->data); // TODO free any adjacent nodes after handling collitions diff --git a/test/test_join_duplicate.bats b/test/test_join_duplicate.bats index 330c652..7bba3cc 100644 --- a/test/test_join_duplicate.bats +++ b/test/test_join_duplicate.bats @@ -16,13 +16,12 @@ setup_file() { } - @test "Join fruits to animals duplicating rows" { run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/fruits.csv' AS f JOIN './test/data/animals.csv' AS a ON a.size=f.size" [[ $"${lines[0]}" == "size;animal;fruit" ]] [[ $"${lines[1]}" == "small;monkey;grape" ]] - [[ $"${lines[2]}" == "small;monkey;strawberry" ]] - [[ $"${lines[3]}" == "small;cat;grape" ]] + [[ $"${lines[2]}" == "small;cat;grape" ]] + [[ $"${lines[3]}" == "small;monkey;strawberry" ]] [[ $"${lines[4]}" == "small;cat;strawberry" ]] [[ $"${lines[5]}" == "very big;whale;watermelon" ]] [[ $"${lines[6]}" == "medium;horse;orange" ]] From ef00fbb9f661180af715637f522f43062f57f8d9 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Tue, 11 Mar 2025 22:16:43 +0200 Subject: [PATCH 18/29] hashmap without collision detection --- src/include/util/hashmap.h | 4 ++-- src/operators/hashjoin.c | 3 +-- src/util/hashmap.c | 6 +++++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index 39d1156..645b3d3 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -5,8 +5,8 @@ #include typedef struct { - char key[10000]; // TODO no magic; - size_t values[10000]; + char key[100]; // TODO no magic; + size_t values[1000]; size_t obs; size_t cursor; struct MapNode* next; diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c index 5ca7975..15b99c0 100644 --- a/src/operators/hashjoin.c +++ b/src/operators/hashjoin.c @@ -13,10 +13,9 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { int joinColIdx = op->info.join.filter->info.filter.boolExprList[2]; int joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; - // int joinColOffset = op->info.join.right->resultDescription.pCols[joinColIdx]; if (!op->info.join.hashmap) { - op->info.join.hashmap = initHashmap(1000); // TODO magic + op->info.join.hashmap = initHashmap(300000); // TODO magic op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); } diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 5599c94..ec6b1e6 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -3,6 +3,10 @@ Hashmap* initHashmap(size_t table_size) { Hashmap* map = malloc(sizeof(Hashmap)); map->data = calloc(table_size, sizeof(MapNode)); + if (map->data == NULL) { + printf("Error: unable to reserve %ld bytes\n", (sizeof(MapNode) * table_size) / 1024); + exit(1); + } map->table_size = table_size; return map; } @@ -14,7 +18,7 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) { } map->data[idx].values[map->data[idx].obs] = value; map->data[idx].obs++; - if (map->data[idx].obs >= 100) { + if (map->data[idx].obs >= 10000) { printf("OUT OF BOUNDS\n"); // TODO } // TODO handle collisions From 0d7d45983357c101e0cd6eec116ce9bdb0f362a8 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:48:01 +0200 Subject: [PATCH 19/29] minor changes --- src/executor/tuplebuffer.c | 3 +++ src/include/util/hashmap.h | 2 +- src/util/hashmap.c | 6 ++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c index aa9dd88..74080fa 100644 --- a/src/executor/tuplebuffer.c +++ b/src/executor/tuplebuffer.c @@ -19,13 +19,16 @@ void resizeTupleBuffer(TupleBuffer* buff) { buff->capacity *= 2; Tuple* tmpTpl = realloc(buff->tuples, buff->capacity * sizeof(Tuple)); + if (tmpTpl == NULL) { printf("ERROR: Could resize tuplebuffer from %ld to %ld\n", buff->capacity, buff->capacity * 2); exit(1); } buff->tuples = tmpTpl; + void* tmpData = realloc(buff->data, buff->capacity * buff->tupledatasize); + if (tmpData == NULL) { printf("ERROR: Could resize tuplebuffer data from %ld to %ld\n", buff->capacity * buff->tupledatasize, buff->capacity * buff->tupledatasize * 2); exit(1); diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index 645b3d3..c2408d5 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -6,7 +6,7 @@ typedef struct { char key[100]; // TODO no magic; - size_t values[1000]; + size_t values[10000]; size_t obs; size_t cursor; struct MapNode* next; diff --git a/src/util/hashmap.c b/src/util/hashmap.c index ec6b1e6..67f69ae 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -17,10 +17,12 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) { memcpy(map->data[idx].key, key, strlen(key)); } map->data[idx].values[map->data[idx].obs] = value; - map->data[idx].obs++; + if (map->data[idx].obs >= 10000) { - printf("OUT OF BOUNDS\n"); // TODO + // printf("OUT OF BOUNDS\n"); // TODOs + return; } + map->data[idx].obs++; // TODO handle collisions } From c5196f5a62c7ef45e5652680161b542c3e705774 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Thu, 13 Mar 2025 21:51:01 +0200 Subject: [PATCH 20/29] fix join memleaks; start collision handling --- src/include/util/hashmap.h | 1 + src/operators/hashjoin.c | 12 +++++------- src/operators/join.c | 7 +++---- src/planner/planner.c | 2 +- src/util/hashmap.c | 33 +++++++++++++++++++++++++++------ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index c2408d5..6e5420c 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -25,5 +25,6 @@ size_t isInHashmap(Hashmap* map, const char* value); void freeHashmap(Hashmap* map); size_t getValueFromHashmap(Hashmap* map, const char* key); void resetCursor(Hashmap* map, const char* key); +void _tryInsert(Hashmap* map, const char* key, size_t value, MapNode* node); unsigned int hash(const char *key, size_t table_size); \ No newline at end of file diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c index 15b99c0..2ea4fd7 100644 --- a/src/operators/hashjoin.c +++ b/src/operators/hashjoin.c @@ -15,12 +15,12 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { int joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; if (!op->info.join.hashmap) { - op->info.join.hashmap = initHashmap(300000); // TODO magic + op->info.join.hashmap = initHashmap(30000); // TODO magic op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); } - Tuple* rightTuple = initTupleOfSize(TUPLESIZE); + Tuple* rightTuple; const char* joinValue; // This is only entered first time the operator is called @@ -43,11 +43,7 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { } - - // Nested join loop - // For each tuple if left relation - // For each tuple in right relation - // if join_predicates(left,right) return tuple(left,right) + // Join if (op->info.join.leftTuple == NULL) { op->info.join.leftTuple = initTupleOfSize(TUPLESIZE); @@ -93,6 +89,8 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { // Join complete, we can free the buffer and the tuples associated freeTupleBuffer(op->info.join.rightTuples); freeTuple(op->info.join.leftTuple); + + freeHashmap(op->info.join.hashmap); markTupleAsEmpty(tpl); } diff --git a/src/operators/join.c b/src/operators/join.c index 49bc5f0..8e629da 100644 --- a/src/operators/join.c +++ b/src/operators/join.c @@ -11,10 +11,9 @@ void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* exit(1); } - void* address = calloc(1, left->size + right->size); - memcpy(address, leftTpl->data, left->size); - memcpy(address + left->size, rightTpl->data, right->size); - returnTpl->data = address; + + memcpy(returnTpl->data, leftTpl->data, left->size); + memcpy(returnTpl->data + left->size, rightTpl->data, right->size); } void joinGetTuple(Operator* op, Tuple* tpl) { diff --git a/src/planner/planner.c b/src/planner/planner.c index b595a0d..101eeb6 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -11,7 +11,7 @@ void freeQueryplan(Operator *node) { } - if (node->type == OP_JOIN) { + if (node->type == OP_JOIN || node->type == OP_JOIN) { freeQueryplan(node->info.join.left); freeQueryplan(node->info.join.right); freeQueryplan(node->info.join.filter); diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 67f69ae..b28abb7 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -13,17 +13,38 @@ Hashmap* initHashmap(size_t table_size) { void insertToHashmap(Hashmap* map, const char* key, size_t value) { unsigned int idx = hash(key, map->table_size); - if (map->data[idx].obs == 0) { - memcpy(map->data[idx].key, key, strlen(key)); + + MapNode* node = &map->data[idx]; + _tryInsert(map, key, value, node); +} + + + +void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t value, MapNode* node) { + + if (node->obs == 0) { + + memcpy(node->key, key, strlen(key)); + + } else { + + // if (strcmp(key, node->key) == 0) { + // printf("Collision\n"); + // if (!node->next) { + // node->next = calloc(1, sizeof(MapNode)); + // } + + // _tryInsert(map, key, value, node); + // return; + // } } - map->data[idx].values[map->data[idx].obs] = value; + node->values[node->obs] = value; - if (map->data[idx].obs >= 10000) { + if (node->obs >= 10000) { // printf("OUT OF BOUNDS\n"); // TODOs return; } - map->data[idx].obs++; - // TODO handle collisions + node->obs++; } size_t isInHashmap(Hashmap* map, const char* key) { From 699436746dc3f932c3aa049f50c229af250898a3 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Thu, 13 Mar 2025 22:17:23 +0200 Subject: [PATCH 21/29] actually fix memleak --- src/planner/planner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/planner/planner.c b/src/planner/planner.c index 101eeb6..1bc1df3 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -11,7 +11,7 @@ void freeQueryplan(Operator *node) { } - if (node->type == OP_JOIN || node->type == OP_JOIN) { + if (node->type == OP_JOIN || node->type == OP_HASHJOIN) { freeQueryplan(node->info.join.left); freeQueryplan(node->info.join.right); freeQueryplan(node->info.join.filter); From 55acfc709e8d86846ba2c6d968bf605f28da41b8 Mon Sep 17 00:00:00 2001 From: toppyy <43851547+toppyy@users.noreply.github.com> Date: Fri, 14 Mar 2025 21:52:22 +0200 Subject: [PATCH 22/29] cont hashjoin collisions --- src/include/util/hashmap.h | 4 +++- src/util/hashmap.c | 38 +++++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index 6e5420c..b62e703 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -4,7 +4,9 @@ #include #include -typedef struct { + + +typedef struct MapNode { char key[100]; // TODO no magic; size_t values[10000]; size_t obs; diff --git a/src/util/hashmap.c b/src/util/hashmap.c index b28abb7..1bdb31b 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -21,6 +21,8 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) { void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t value, MapNode* node) { + + if (strlen(key) == 0) return; // Where do these come from? if (node->obs == 0) { @@ -28,15 +30,15 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va } else { - // if (strcmp(key, node->key) == 0) { - // printf("Collision\n"); - // if (!node->next) { - // node->next = calloc(1, sizeof(MapNode)); - // } + if (strcmp(key, node->key) == 0) { + printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key)); + if (!node->next) { + node->next = calloc(1, sizeof(MapNode)); + } - // _tryInsert(map, key, value, node); - // return; - // } + _tryInsert(map, key, value, node); + return; + } } node->values[node->obs] = value; @@ -70,8 +72,26 @@ size_t getValueFromHashmap(Hashmap* map, const char* key) { return rtrn; } +void freeHashMapNode(MapNode* node) { + if (!node) return; + + if (node->next) { + freeHashMapNode(node->next); + } + free(node); +} + void freeHashmap(Hashmap* map) { - free(map->data); // TODO free any adjacent nodes after handling collitions + MapNode* node; + for (size_t i = 0; i < map->table_size; i++) { + node = &map->data[i]; + + if (node == 0) break; + + freeHashMapNode(node->next); // Only adjacents need to be freed + + } + free(map->data); free(map); } From ac37de8198a0b77961742f955ae1793db9eb43d1 Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 08:30:46 +0200 Subject: [PATCH 23/29] introduce options --- src/include/const.h | 3 +- src/include/squel.h | 16 ++++++++++ src/operators/hashjoin.c | 4 +-- src/squel.c | 63 +++++++++++++++++++++++++++++++++++----- src/util/hashmap.c | 2 +- 5 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 src/include/squel.h diff --git a/src/include/const.h b/src/include/const.h index 625e12d..287f302 100644 --- a/src/include/const.h +++ b/src/include/const.h @@ -14,9 +14,10 @@ #define JOINTUPLESIZE 1000 #define SCANTUPLESIZE 2000 -// Query execution +// Defaults for query execution #define JOINBUFFSIZE 100000 #define TUPLESIZE 500 +#define HTSIZE 3000 // Define max size (in chars) of expressions and query #define MAXQUERYSIZE 1000 diff --git a/src/include/squel.h b/src/include/squel.h new file mode 100644 index 0000000..d72a828 --- /dev/null +++ b/src/include/squel.h @@ -0,0 +1,16 @@ +#pragma once + + +typedef enum { + OPT_HTSIZE +} Option; + +typedef struct Options { + size_t htsize; +} Options; + +extern Options* OPTIONS; + +Options* initOptions(); + +size_t getOption(Option opt); \ No newline at end of file diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c index 2ea4fd7..eede747 100644 --- a/src/operators/hashjoin.c +++ b/src/operators/hashjoin.c @@ -1,5 +1,5 @@ #include "../include/operators/hashjoin.h" - +#include "../include/squel.h" void hashjoinGetTuple(Operator* op, Tuple* tpl) { @@ -15,7 +15,7 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) { int joinColOffset = op->info.join.filter->resultDescription.pCols[joinColIdx]; if (!op->info.join.hashmap) { - op->info.join.hashmap = initHashmap(30000); // TODO magic + op->info.join.hashmap = initHashmap(getOption(OPT_HTSIZE)); op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE); } diff --git a/src/squel.c b/src/squel.c index a4256a1..24990aa 100644 --- a/src/squel.c +++ b/src/squel.c @@ -2,11 +2,14 @@ #include "./include/parser/parser.h" #include "./include/planner/planner.h" #include "./include/io/tdb.h" +#include "./include/const.h" +#include "./include/squel.h" #define METADATABUFFSIZE 10 - +// Globals :/ ResultSet* resultDescToPrint = NULL; +Options* OPTIONS; void printTree(Node *node) { @@ -49,8 +52,6 @@ void valueToChar(char* target, Tuple* tpl, size_t colOffset, Datatype type) { } - - void printTuple(Tuple* tpl) { if (resultDescToPrint == NULL) { @@ -73,6 +74,26 @@ void printTuple(Tuple* tpl) { } +Options* initOptions() { + OPTIONS = malloc(sizeof(Options)); + OPTIONS->htsize = HTSIZE; + return OPTIONS; +} + +size_t getOption(Option opt) { + printf("Getting opt!\n"); + switch(opt) { + case OPT_HTSIZE: + return OPTIONS->htsize; + } + + printf("getOption: Tried to retrieve an unknown option\n"); + exit(1); +} + + + + int main(int argc, char* argv[]) { if (argc == 1) { @@ -80,16 +101,42 @@ int main(int argc, char* argv[]) { exit(1); } - if (strlen(argv[1]) >= MAXQUERYSIZE) { + Options* opts = initOptions(); + + size_t query_arg = 1; + + // Loop through the arguments + for (int i = 1; i < argc; i++) { + + if (strcmp(argv[i], "--help") == 0) { + printf("Help: See README.md.\n"); + return 0; + } + else if (strcmp(argv[i], "--htsize") == 0) { + i++; + char* endptr; + size_t htsize = strtoull(argv[i], &endptr, 10); + + if (endptr == argv[i]) { + printf("--htsize expects an integer\n"); + exit(1); + } + + opts->htsize = htsize; + + query_arg += 2; + } + } + + + if (strlen(argv[query_arg]) >= MAXQUERYSIZE) { printf("Error: Query length exceeds maximum.\n"); exit(1); } /* Allocate memory for parse tree and parse the raw query */ Node* parsetree = createParsetree(); - parse(argv[1], parsetree); - - // printTree(parsetree); + parse(argv[query_arg], parsetree); // It's either a SELECT or a STMT Operator* queryplan = NULL; @@ -112,4 +159,6 @@ int main(int argc, char* argv[]) { freeQueryplan(queryplan); } + free(opts); + } \ No newline at end of file diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 1bdb31b..6a7f90d 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -30,7 +30,7 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va } else { - if (strcmp(key, node->key) == 0) { + if (strcmp(key, node->key) != 0) { printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key)); if (!node->next) { node->next = calloc(1, sizeof(MapNode)); From 049a1880aae106497c6d8792fa2e08bc047adfa6 Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 08:49:20 +0200 Subject: [PATCH 24/29] handle collisions in hashmap --- src/include/util/hashmap.h | 2 ++ src/squel.c | 4 ---- src/util/hashmap.c | 38 ++++++++++++++++++++++++++++++-------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h index b62e703..97a3bff 100644 --- a/src/include/util/hashmap.h +++ b/src/include/util/hashmap.h @@ -24,8 +24,10 @@ typedef struct { Hashmap* initHashmap(size_t table_size); void insertToHashmap(Hashmap* map, const char* key, size_t value); size_t isInHashmap(Hashmap* map, const char* value); +size_t _isInHashmap(Hashmap* map, MapNode* node, const char* key); void freeHashmap(Hashmap* map); size_t getValueFromHashmap(Hashmap* map, const char* key); +size_t _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key); void resetCursor(Hashmap* map, const char* key); void _tryInsert(Hashmap* map, const char* key, size_t value, MapNode* node); diff --git a/src/squel.c b/src/squel.c index 24990aa..945ad14 100644 --- a/src/squel.c +++ b/src/squel.c @@ -81,7 +81,6 @@ Options* initOptions() { } size_t getOption(Option opt) { - printf("Getting opt!\n"); switch(opt) { case OPT_HTSIZE: return OPTIONS->htsize; @@ -91,9 +90,6 @@ size_t getOption(Option opt) { exit(1); } - - - int main(int argc, char* argv[]) { if (argc == 1) { diff --git a/src/util/hashmap.c b/src/util/hashmap.c index 6a7f90d..6acbd02 100644 --- a/src/util/hashmap.c +++ b/src/util/hashmap.c @@ -31,12 +31,11 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va } else { if (strcmp(key, node->key) != 0) { - printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key)); if (!node->next) { node->next = calloc(1, sizeof(MapNode)); } - _tryInsert(map, key, value, node); + _tryInsert(map, key, value, node->next); return; } } @@ -49,10 +48,22 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va node->obs++; } +size_t _isInHashmap(Hashmap* map, MapNode* node, const char* key) { + if (strcmp(key, node->key) != 0) { + if (!node->next) { + return 0; + } + return _isInHashmap(map, node->next, key); + } + if (node->cursor == node->obs) return 0; + return node->obs > 0 ? 1 : 0; +} + + size_t isInHashmap(Hashmap* map, const char* key) { unsigned int idx = hash(key, map->table_size); - if (map->data[idx].cursor == map->data[idx].obs) return 0; - return map->data[idx].obs > 0 ? 1 : 0; + MapNode* node = &map->data[idx]; + return _isInHashmap(map, node, key); } @@ -63,13 +74,24 @@ void resetCursor(Hashmap* map, const char* key) { size_t getValueFromHashmap(Hashmap* map, const char* key) { unsigned int idx = hash(key, map->table_size); + MapNode* node = &map->data[idx]; + return _getValueFromHashmap(map, node, key); +} - if (map->data[idx].cursor == map->data[idx].obs) return -1; +size_t _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key) { - size_t rtrn = map->data[idx].values[map->data[idx].cursor++]; + if (strcmp(key, node->key) != 0) { + if (!node->next) { + return 0; + } + + return _getValueFromHashmap(map, node->next, key); + } + + if (node->cursor == node->obs) return -1; + + return node->values[node->cursor++]; - - return rtrn; } void freeHashMapNode(MapNode* node) { From c17a722275e73673c2132ae626cdf7a71f17a13c Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 08:53:21 +0200 Subject: [PATCH 25/29] hashjoin with small ht-size --- test/test_simple_join.bats | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_simple_join.bats b/test/test_simple_join.bats index 57bc7d3..b5dc044 100644 --- a/test/test_simple_join.bats +++ b/test/test_simple_join.bats @@ -30,6 +30,11 @@ setup_file() { [[ $"${lines[5]}" == "" ]] } - +@test "Hashjoin with small hashtable" { + run ./build/squel --htsize 10 "SELECT COUNT(u.unemployed) FROM './test/data/lt_unemployed.csv' AS lt JOIN './test/data/unemployed.csv' AS u ON u.time=lt.time" + [[ $"${lines[0]}" == "unemployed" ]] + [[ $"${lines[1]}" == "213" ]] + [[ $"${lines[2]}" == "" ]] +} From 20e2893de9e4f9a74db4094deb2b078c2e6bf94e Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 08:58:02 +0200 Subject: [PATCH 26/29] update perf-stats --- perf/results/count.csv | 6 +++--- perf/results/join.csv | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/perf/results/count.csv b/perf/results/count.csv index b958cfb..5af0628 100644 --- a/perf/results/count.csv +++ b/perf/results/count.csv @@ -1,7 +1,7 @@ filetype;records;time -CSV;100000;0:00.04 +CSV;100000;0:00.03 TDB;100000;0:00.00 CSV;1000000;0:00.30 TDB;1000000;0:00.04 -CSV;10000000;0:02.96 -TDB;10000000;0:00.44 +CSV;10000000;0:03.36 +TDB;10000000;0:00.36 diff --git a/perf/results/join.csv b/perf/results/join.csv index d10b3b1..7760768 100644 --- a/perf/results/join.csv +++ b/perf/results/join.csv @@ -1,5 +1,9 @@ filetype;records_left;records_right;time -CSV;10000;100;0:00.02 -TDB;10000;100;0:00.00 -Command terminated by signal 9 -CSV;10000;1000;0:23.16 +CSV;10000;100;0:00.05 +TDB;10000;100;0:00.05 +CSV;10000;1000;0:00.50 +TDB;10000;1000;0:00.51 +CSV;100000;100;0:00.55 +TDB;100000;100;0:00.50 +CSV;100000;1000;0:05.08 +TDB;100000;1000;0:05.01 From a772d1334fae11b6e4b63f92db15a3bc98ba35b9 Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 10:02:00 +0200 Subject: [PATCH 27/29] add bats submodules --- .gitmodules | 6 ++++++ test/bats-core | 2 +- test/test-explain.bats | 19 +++++++++---------- test/test_helper/bats-assert | 1 + test/test_helper/bats-support | 1 + 5 files changed, 18 insertions(+), 11 deletions(-) create mode 160000 test/test_helper/bats-assert create mode 160000 test/test_helper/bats-support diff --git a/.gitmodules b/.gitmodules index 85e69a6..7cda21a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,9 @@ [submodule "bats"] path = test/bats-core url = https://github.com/bats-core/bats-core +[submodule "test/test_helper/bats-assert"] + path = test/test_helper/bats-assert + url = https://github.com/bats-core/bats-assert.git +[submodule "test/test_helper/bats-support"] + path = test/test_helper/bats-support + url = https://github.com/bats-core/bats-support.git diff --git a/test/bats-core b/test/bats-core index de96df0..261b029 160000 --- a/test/bats-core +++ b/test/bats-core @@ -1 +1 @@ -Subproject commit de96df03197ecc51635463fd9e35e26638191a90 +Subproject commit 261b029f3b3957a154f3e69abcbf19fe3e265c0a diff --git a/test/test-explain.bats b/test/test-explain.bats index 0c99167..80d9619 100644 --- a/test/test-explain.bats +++ b/test/test-explain.bats @@ -1,18 +1,15 @@ +#!/usr/bin/env bash -#!/usr/bin/env bats - -setup_file() { +setup() { + load './test_helper/bats-support/load' + load './test_helper/bats-assert/load' run make } -@test "Simple subquery \w WHERE" { +@test "EXPLAIN - subquery \w WHERE" { run ./build/squel "EXPLAIN SELECT col3 FROM (SELECT col3,col1 FROM './test/data/small.csv') WHERE col3>100" - [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] - [[ $"${lines[1]}" == "OP_PROJECT" ]] - [[ $"${lines[2]}" == "OP_FILTER" ]] - [[ $"${lines[3]}" == "OP_PROJECT" ]] - [[ $"${lines[4]}" == "OP_SCAN" ]] - [[ $"${lines[5]}" == "**************************" ]] + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_FILTER\nOP_PROJECT\nOP_SCAN\n**************************") + assert_output "$expected_output" } @test "EXPLAIN - hash join" { @@ -38,3 +35,5 @@ setup_file() { [[ $"${lines[5]}" == "OP_SCANTDB" ]] [[ $"${lines[6]}" == "**************************" ]] } + + diff --git a/test/test_helper/bats-assert b/test/test_helper/bats-assert new file mode 160000 index 0000000..0ec504e --- /dev/null +++ b/test/test_helper/bats-assert @@ -0,0 +1 @@ +Subproject commit 0ec504eb523fd87af924ad77e1221ee4fb8c1596 diff --git a/test/test_helper/bats-support b/test/test_helper/bats-support new file mode 160000 index 0000000..9bf10e8 --- /dev/null +++ b/test/test_helper/bats-support @@ -0,0 +1 @@ +Subproject commit 9bf10e876dd6b624fe44423f0b35e064225f7556 From a73f4c7c2998fbb15a78657ce5a16bf7c1a14862 Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 10:06:01 +0200 Subject: [PATCH 28/29] use assert_output --- test/test-explain.bats | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/test/test-explain.bats b/test/test-explain.bats index 80d9619..996f70d 100644 --- a/test/test-explain.bats +++ b/test/test-explain.bats @@ -14,26 +14,15 @@ setup() { @test "EXPLAIN - hash join" { run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int" - [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] - [[ $"${lines[1]}" == "OP_PROJECT" ]] - [[ $"${lines[2]}" == "OP_HASHJOIN" ]] - [[ $"${lines[3]}" == "OP_FILTER" ]] - [[ $"${lines[4]}" == "OP_SCANTDB" ]] - [[ $"${lines[5]}" == "OP_SCANTDB" ]] - [[ $"${lines[6]}" == "**************************" ]] -} - + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n") + assert_output "$expected_output" +} @test "EXPLAIN - join with nested loop join" { run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int" - [[ $"${lines[0]}" == "******* EXPLAIN **********" ]] - [[ $"${lines[1]}" == "OP_PROJECT" ]] - [[ $"${lines[2]}" == "OP_JOIN" ]] - [[ $"${lines[3]}" == "OP_FILTER" ]] - [[ $"${lines[4]}" == "OP_SCANTDB" ]] - [[ $"${lines[5]}" == "OP_SCANTDB" ]] - [[ $"${lines[6]}" == "**************************" ]] + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n") + assert_output "$expected_output" } From e2bdf66c084196fa3540bcfa7d7060a4c73244ff Mon Sep 17 00:00:00 2001 From: toby <43851547+toppyy@users.noreply.github.com> Date: Sun, 16 Mar 2025 10:09:10 +0200 Subject: [PATCH 29/29] use csv-files for tests --- test/test-explain.bats | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test-explain.bats b/test/test-explain.bats index 996f70d..3b84ca6 100644 --- a/test/test-explain.bats +++ b/test/test-explain.bats @@ -13,15 +13,15 @@ setup() { } @test "EXPLAIN - hash join" { - run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int" - expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n") + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3=int" + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n") assert_output "$expected_output" } @test "EXPLAIN - join with nested loop join" { - run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int" - expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n") + run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3>int" + expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n") assert_output "$expected_output" }