From 41ff0f24c19767137deee39b2390d4410772c1b7 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Tue, 4 Mar 2025 23:54:13 +0200
Subject: [PATCH 01/29] operators return a pointer to tuple

---
 src/executor/executor.c           | 10 +++----
 src/executor/statements/insert.c  |  4 +--
 src/executor/tuple.c              |  8 +++++
 src/include/executor/executor.h   |  2 +-
 src/include/executor/tuple.h      | 11 +++++++
 src/include/operators/aggregate.h |  4 ++-
 src/include/operators/filter.h    |  3 +-
 src/include/operators/join.h      |  3 +-
 src/include/operators/project.h   |  3 +-
 src/include/operators/scan.h      |  3 +-
 src/include/operators/scanTDB.h   |  3 +-
 src/include/planner/planner.h     |  3 +-
 src/operators/aggregate.c         | 50 +++++++++++++++++--------------
 src/operators/filter.c            | 15 +++++-----
 src/operators/join.c              |  8 +++--
 src/operators/project.c           | 10 ++-----
 src/operators/scan.c              | 10 ++++---
 src/operators/scanTDB.c           | 11 +++++--
 src/squel.c                       | 28 +++++++++++++++--
 19 files changed, 125 insertions(+), 64 deletions(-)
 create mode 100644 src/executor/tuple.c
 create mode 100644 src/include/executor/tuple.h

diff --git a/src/executor/executor.c b/src/executor/executor.c
index 726676e..a4ed7ca 100644
--- a/src/executor/executor.c
+++ b/src/executor/executor.c
@@ -57,7 +57,7 @@ void doAssignGetTupleFunction(Operator* p_op) {
 }
 
 
-void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset)) {
+void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl)) {
 
     if (op == NULL) {
         return;
@@ -86,12 +86,12 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffs
     }
 
     // Get tuples one by one
-    int offset;
+    Tuple* tpl;
     for (;;) {
-        offset = op->getTuple(op);
-        if (offset == -1) break;
+        tpl = op->getTuple(op);
+        if (tpl == NULL) break;
 
-        tupleHandler(offset);
+        tupleHandler(tpl);
     };
 
     free(buffpool->pool);
diff --git a/src/executor/statements/insert.c b/src/executor/statements/insert.c
index c443c70..277725f 100644
--- a/src/executor/statements/insert.c
+++ b/src/executor/statements/insert.c
@@ -5,14 +5,14 @@ size_t tupleSize = 0;
 FILE* f = NULL;
 
 
-void handleTupleInsert(int offset) {
+void handleTupleInsert(Tuple* tpl) {
 
     if (f == NULL) {
         printf("No file to insert to\n");
         exit(1);
     }
 
-    size_t bytesWritten = fwrite(getTuple(offset), tupleSize, 1, f);
+    size_t bytesWritten = fwrite(tpl->data, tupleSize, 1, f);
     assert(bytesWritten > 0);
 }
 
diff --git a/src/executor/tuple.c b/src/executor/tuple.c
new file mode 100644
index 0000000..398931c
--- /dev/null
+++ b/src/executor/tuple.c
@@ -0,0 +1,8 @@
+#include "../include/executor/tuple.h"
+
+
+Tuple* initTuple(size_t size) {
+    Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
+    tpl->size = size;
+    return tpl;
+}
\ No newline at end of file
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6cb1b0c..a0abaee 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -19,7 +19,7 @@ extern char *bufferscan;
 
 extern Bufferpool* buffpool;
 
-void execute(Operator* op, bool printColNames, void (*tupleHandler)(int pooloffset));
+void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl));
 void executeStatement(Node* node);
 void executeCreateTable(Node* node);
 void executeInsert(Node* node);
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
new file mode 100644
index 0000000..6170267
--- /dev/null
+++ b/src/include/executor/tuple.h
@@ -0,0 +1,11 @@
+#pragma once
+#include <stddef.h>
+#include <stdlib.h>
+
+typedef struct  {
+    void* data;
+    size_t size;
+} Tuple;
+
+
+Tuple* initTuple(size_t size);
\ No newline at end of file
diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h
index 82060cc..fd8f2db 100644
--- a/src/include/operators/aggregate.h
+++ b/src/include/operators/aggregate.h
@@ -3,5 +3,7 @@
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
+#include "../executor/tuple.h"
 
-int aggregateGetTuple(Operator* op);
\ No newline at end of file
+
+Tuple* aggregateGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h
index 17c5eb1..c969599 100644
--- a/src/include/operators/filter.h
+++ b/src/include/operators/filter.h
@@ -2,6 +2,7 @@
 #include <stdbool.h>
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
+#include "../executor/tuple.h"
 
-int filterGetTuple(Operator* op);
+Tuple* filterGetTuple(Operator* op);
 bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/join.h b/src/include/operators/join.h
index d6e7983..25ffebe 100644
--- a/src/include/operators/join.h
+++ b/src/include/operators/join.h
@@ -2,5 +2,6 @@
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
+#include "../executor/tuple.h"
 
-int joinGetTuple(Operator* op);
\ No newline at end of file
+Tuple* joinGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/project.h b/src/include/operators/project.h
index ad551fb..6a228d9 100644
--- a/src/include/operators/project.h
+++ b/src/include/operators/project.h
@@ -1,5 +1,6 @@
 #pragma once
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
+#include "../executor/tuple.h"
 
-int projectGetTuple(Operator* op);
\ No newline at end of file
+Tuple* projectGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h
index 1e223c0..f43aa63 100644
--- a/src/include/operators/scan.h
+++ b/src/include/operators/scan.h
@@ -2,5 +2,6 @@
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
+#include "../executor/tuple.h"
 
-int scanGetTuple(Operator* op);
\ No newline at end of file
+Tuple* scanGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h
index 692df5f..8b97cdb 100644
--- a/src/include/operators/scanTDB.h
+++ b/src/include/operators/scanTDB.h
@@ -2,6 +2,7 @@
 #include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
+#include "../executor/tuple.h"
 #include <errno.h>
 
-int scanTDBGetTuple(Operator* op);
\ No newline at end of file
+Tuple* scanTDBGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h
index d99a45f..620eb25 100644
--- a/src/include/planner/planner.h
+++ b/src/include/planner/planner.h
@@ -6,6 +6,7 @@
 #include "../io/tdb.h"
 #include "../parser/utils.h"
 #include "../parser/parsetree.h"
+#include "../executor/tuple.h"
 
 
 typedef enum {
@@ -127,7 +128,7 @@ typedef struct Operator {
     ResultSet resultDescription;
     int iteratorTupleOffset;
     struct Operator* child;
-    int (*getTuple) (struct Operator* op);
+    Tuple* (*getTuple) (struct Operator* op);
 } Operator;
 
 void freeQueryplan(Operator *node);
diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index dcacb28..13deb22 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -1,10 +1,10 @@
 #include "../include/operators/aggregate.h"
 
 long doCount(Operator* opToIterate) {
-    int offset = opToIterate->getTuple(opToIterate);
+    Tuple* tpl = opToIterate->getTuple(opToIterate);
     int result = 0;
-    while (offset >= 0) {
-        offset = opToIterate->getTuple(opToIterate);
+    while (tpl != NULL) {
+        tpl = opToIterate->getTuple(opToIterate);
         result++;
     };
     
@@ -14,16 +14,16 @@ long doCount(Operator* opToIterate) {
 long doAverage(Operator* opToIterate, size_t colOffset) {
 
 
-    int offset = 0;
+    Tuple* tpl;
     long sum = 0;
     long count = 0;
 
     for (;;) {
-        offset = opToIterate->getTuple(opToIterate);
-        if (offset == -1) {
+        tpl = opToIterate->getTuple(opToIterate);
+        if (tpl == NULL) {
             break;
         }
-        sum += *(long*) getCol(offset,colOffset);
+        sum += *(long*) tpl->data + colOffset;
         count++;
     };
     long result = 0.0; 
@@ -36,15 +36,15 @@ long doAverage(Operator* opToIterate, size_t colOffset) {
 long doSum(Operator* opToIterate, size_t colOffset) {
 
 
-    int offset = 0;
+    Tuple* tpl ;
     long long result = 0;
 
     for (;;) {
-        offset = opToIterate->getTuple(opToIterate);
-        if (offset == -1) {
+        tpl = opToIterate->getTuple(opToIterate);
+        if (tpl == NULL) {
             break;
         }
-        result += *(long*) getCol(offset,colOffset);
+        result += *(long*) tpl->data + colOffset;
 
     };
 
@@ -54,15 +54,15 @@ long doSum(Operator* opToIterate, size_t colOffset) {
 long doMax(Operator* opToIterate, size_t colOffset) {
 
 
-    int offset = 0;
+    Tuple* tpl;
     long result = 0, tmp = 0;
 
     for (;;) {
-        offset = opToIterate->getTuple(opToIterate);
-        if (offset == -1) {
+        tpl = opToIterate->getTuple(opToIterate);
+        if (tpl == NULL) {
             break;
         }
-        tmp = *(long*) getCol(offset,colOffset);
+        tmp = *(long*) tpl->data + colOffset;
         result = tmp > result ? tmp : result;
 
     };
@@ -73,15 +73,15 @@ long doMax(Operator* opToIterate, size_t colOffset) {
 long doMin(Operator* opToIterate, size_t colOffset) {
 
 
-    int offset = 0;
+    Tuple* tpl;
     long result = __LONG_MAX__, tmp = 0;
 
     for (;;) {
-        offset = opToIterate->getTuple(opToIterate);
-        if (offset == -1) {
+        tpl = opToIterate->getTuple(opToIterate);
+        if (tpl == NULL) {
             break;
         }
-        tmp = *(long*) getCol(offset,colOffset);
+        tmp = *(long*) tpl->data + colOffset;
         result = tmp < result ? tmp : result;
 
     };
@@ -91,13 +91,13 @@ long doMin(Operator* opToIterate, size_t colOffset) {
 
 
 
-int aggregateGetTuple(Operator* op) {
+Tuple* aggregateGetTuple(Operator* op) {
     
     checkPtrNotNull(op->child, "OP_AGGREGATE has no child.");
     checkPtrNotNull(op->child->getTuple, "Child of OP_AGGREGATE has no getTuple-method.");
 
     if (op->info.aggregate.aggregationDone) {
-        return -1;
+        return NULL;
     }
 
     // TODO:
@@ -137,5 +137,11 @@ int aggregateGetTuple(Operator* op) {
     op->info.aggregate.aggregationDone = true;
     
 
-    return addToBufferPool(&result, sizeof(result));
+    Tuple* tpl = initTuple(sizeof(result));
+
+    long* res_ptr = malloc(sizeof(result));
+    
+    *res_ptr = result;
+
+    return tpl;
 }
diff --git a/src/operators/filter.c b/src/operators/filter.c
index 475b1ab..34ccc38 100644
--- a/src/operators/filter.c
+++ b/src/operators/filter.c
@@ -176,7 +176,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator*
     return rtrnValue;
 }
 
-int filterGetTuple(Operator* op) {
+Tuple* filterGetTuple(Operator* op) {
 
     if (op == NULL) {
         printf("FILTER_OP: Passed a NULL-pointer to filterGetTuple\n");
@@ -198,20 +198,21 @@ int filterGetTuple(Operator* op) {
     }
 
 
-    int poolOffset = 0;
+    Tuple* tpl = 0;
 
     while (true) {
         /* Get new tuples until found something that passes the filter */
 
-        poolOffset = op->child->getTuple(op->child);
+        tpl = op->child->getTuple(op->child);
 
-        if (poolOffset == -1) {
-            return -1;
+        if (tpl == NULL) {
+            break;
         }
 
-        if (evaluateTuplesAgainstFilterOps(poolOffset, poolOffset, op)) break;
+        if (evaluateTuplesAgainstFilterOps(0, 0, op)) break;
 
 
     }
-    return poolOffset;
+
+    return tpl;
 }
diff --git a/src/operators/join.c b/src/operators/join.c
index 69b5076..7912cb6 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -19,7 +19,7 @@ void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* lef
 
 }
 
-int joinGetTuple(Operator* op) {
+Tuple* joinGetTuple(Operator* op) {
 
     if (
         op->info.join.left == NULL ||
@@ -42,8 +42,9 @@ int joinGetTuple(Operator* op) {
         
 
     */
-
-
+    return NULL;
+    
+    /*
     int rightTupleOffset = 0, originalOffset;
     // Reuse this and only create a new tuple if it passes the filter
     int offset = 0;
@@ -115,5 +116,6 @@ int joinGetTuple(Operator* op) {
             return op->iteratorTupleOffset;
         }
     } while(true);
+    */
 
 }
diff --git a/src/operators/project.c b/src/operators/project.c
index 009f6ff..858c8ab 100644
--- a/src/operators/project.c
+++ b/src/operators/project.c
@@ -1,6 +1,6 @@
 #include "../include/operators/project.h"
 
-int projectGetTuple(Operator* op) {
+Tuple* projectGetTuple(Operator* op) {
 
     checkPtrNotNull(op->child, "OP_PROJECT has no child");
     checkPtrNotNull(op->child->getTuple, "Child of OP_PROJECT has no getTuple-method");
@@ -13,11 +13,5 @@ int projectGetTuple(Operator* op) {
         This is an unfortunate extra function call :(
     */
     
-    int pooloffset = op->child->getTuple(op->child);
-
-    if (pooloffset == -1) {
-        return -1;
-    }
-
-    return pooloffset;
+    return op->child->getTuple(op->child);
 }
\ No newline at end of file
diff --git a/src/operators/scan.c b/src/operators/scan.c
index 40082d9..09cd80b 100644
--- a/src/operators/scan.c
+++ b/src/operators/scan.c
@@ -1,6 +1,6 @@
 #include "../include/operators/scan.h"
 
-int scanGetTuple(Operator* op) {
+Tuple* scanGetTuple(Operator* op) {
 
     checkPtrNotNull(op, "NULL pointer passed to scanGetTuple");
 
@@ -24,7 +24,7 @@ int scanGetTuple(Operator* op) {
      if (line == NULL) {
         free(lineBuffer);
         fclose(op->info.scan.tablefile);
-        return -1;
+        return NULL;
     }
 
 
@@ -147,8 +147,10 @@ int scanGetTuple(Operator* op) {
     op->resultDescription.size = tplSize;
 
     free(lineBuffer);
-    free(diskBuffer);
 
-    return op->iteratorTupleOffset;
+    Tuple* tpl = initTuple(tplSize);
+    tpl->data = diskBuffer;
+
+    return tpl;
 }
 
diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c
index 88dfd0b..3de5574 100644
--- a/src/operators/scanTDB.c
+++ b/src/operators/scanTDB.c
@@ -29,13 +29,13 @@ void fillBuffer(Operator* op) {
 
 }
 
-int scanTDBGetTuple(Operator* op) {
+Tuple* scanTDBGetTuple(Operator* op) {
 
     checkPtrNotNull(op, "NULL pointer passed to scanTDBGetTuple");
 
     if (op->info.scan.fileRead && op->info.scan.recordsInBuffer == 0) {
         free(op->info.scan.buffer);
-        return -1;
+        return NULL;
     }
 
     if (op->info.scan.recordsInBuffer == 0) {
@@ -52,5 +52,10 @@ int scanTDBGetTuple(Operator* op) {
     } else {
         copyToBufferPool(op->iteratorTupleOffset, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize);
     }
-    return op->iteratorTupleOffset;
+
+
+    Tuple* tpl = initTuple(op->info.scan.recordSize);
+    tpl->data = op->info.scan.buffer + bufferDataOffset;
+
+    return tpl;
 }
\ No newline at end of file
diff --git a/src/squel.c b/src/squel.c
index 83571dd..8469934 100644
--- a/src/squel.c
+++ b/src/squel.c
@@ -28,7 +28,31 @@ void printTree(Node *node) {
     }
 }
 
-void printTuple(int offset) {
+void valueToChar(char* target, Tuple* tpl, size_t colOffset, Datatype type) {
+    if (type == DTYPE_STR) {
+        strcpy(target, tpl->data + colOffset);
+        return;
+    }
+    if (type == DTYPE_INT) {
+        char tmp[CHARMAXSIZE];
+        sprintf(tmp, "%d", *(int*) (tpl->data + colOffset));
+        memcpy(target, tmp, strlen(tmp));
+        return;
+    }
+    if (type == DTYPE_LONG) {
+        char tmp[CHARMAXSIZE];
+        sprintf(tmp, "%ld", *(long*) (tpl->data + colOffset));
+        memcpy(target, tmp, strlen(tmp));
+        return;
+    }
+    printf("Don't know how to represent type %d as char\n", type);
+    exit(1);
+}
+
+
+
+
+void printTuple(Tuple* tpl) {
 
     if (resultDescToPrint == NULL) {
         printf("No result set to print?\n");
@@ -39,7 +63,7 @@ void printTuple(int offset) {
 
     for (size_t i = 0; i < resultDescToPrint->columnCount; i++) {
         memset(buff, 0, CHARMAXSIZE);        
-        getColAsChar(buff, offset ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type);
+        valueToChar(buff, tpl ,resultDescToPrint->pCols[i], resultDescToPrint->columns[i].type);
 
         if (i == 0) printf("%s",buff);
         else printf(";%s",buff);

From 07407e82745d5e89332b0ef8841e077761a7a730 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Wed, 5 Mar 2025 00:02:47 +0200
Subject: [PATCH 02/29] filter uses Tuple

---
 src/executor/tuple.c           |  4 ++++
 src/include/executor/tuple.h   |  4 +++-
 src/include/operators/filter.h |  2 +-
 src/operators/filter.c         | 32 ++++++++++++++++----------------
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index 398931c..ed2143f 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -5,4 +5,8 @@ Tuple* initTuple(size_t size) {
     Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
     tpl->size = size;
     return tpl;
+}
+
+void* getTupleCol(Tuple* tpl, size_t colOffset) {
+    return tpl->data + colOffset;
 }
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
index 6170267..6e3978f 100644
--- a/src/include/executor/tuple.h
+++ b/src/include/executor/tuple.h
@@ -8,4 +8,6 @@ typedef struct  {
 } Tuple;
 
 
-Tuple* initTuple(size_t size);
\ No newline at end of file
+Tuple* initTuple(size_t size);
+
+void* getTupleCol(Tuple* tpl, size_t colOffset);
\ No newline at end of file
diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h
index c969599..f4ff59e 100644
--- a/src/include/operators/filter.h
+++ b/src/include/operators/filter.h
@@ -5,4 +5,4 @@
 #include "../executor/tuple.h"
 
 Tuple* filterGetTuple(Operator* op);
-bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op);
\ No newline at end of file
+bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op);
\ No newline at end of file
diff --git a/src/operators/filter.c b/src/operators/filter.c
index 34ccc38..13d9d54 100644
--- a/src/operators/filter.c
+++ b/src/operators/filter.c
@@ -1,13 +1,13 @@
 #include "../include/operators/filter.h"
 
 
-bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op) {
+bool evaluateTupleAgainstFilterOp(Tuple* tpl1, Tuple* tpl2, Operator* op) {
 
-    if (poolOffset1 == -1) {
+    if (tpl1 == NULL) {
         return false;
     }
 
-    if (poolOffset2 == -1) {
+    if (tpl2 == NULL) {
         return false;
     }
 
@@ -44,18 +44,18 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op
         switch (dtype1)   {
             case DTYPE_STR:
                 cmpRes = strcmp(
-                    (char*) getCol(poolOffset1,idx1Offset),
-                    (char*) getCol(poolOffset2,idx2Offset)
+                    (char*) getTupleCol(tpl1,idx1Offset),
+                    (char*) getTupleCol(tpl2,idx2Offset)
                 );
                 break;
             case DTYPE_INT:
-                int number1 = *(int*) getCol(poolOffset1,idx1Offset);
-                int number2 = *(int*) getCol(poolOffset2,idx2Offset);
+                int number1 = *(int*) getTupleCol(tpl1,idx1Offset);
+                int number2 = *(int*) getTupleCol(tpl2,idx2Offset);
                 cmpRes = number1 - number2;
                 break;
             case DTYPE_LONG:
-                long lnumber1 = *(long*) getCol(poolOffset1,idx1Offset);
-                long lnumber2 = *(long*) getCol(poolOffset2,idx2Offset);
+                long lnumber1 = *(long*) getTupleCol(tpl1,idx1Offset);
+                long lnumber2 = *(long*) getTupleCol(tpl2,idx2Offset);
                 cmpRes = lnumber1 - lnumber2;
                 break;
             default:
@@ -87,14 +87,14 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op
         Datatype constDatatype  = dtype2;
         size_t colOffset   = idx1Offset;
         size_t constIdx = 2;
-        int poolOffset = poolOffset1;
+        Tuple* tpl = tpl1;
         
         if (compType == CMP_CONST_COL) {
             // Guess was wrong, fix it
             constDatatype   = dtype1;
             constIdx        = 0;
             colOffset       = idx2Offset;
-            poolOffset      = poolOffset2;
+            tpl             = tpl2;
         }
         // Now we have to only deal with 4 combinations of all the eight possible
         // 'cause datatypes must match
@@ -105,10 +105,10 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op
         //      DTYPE_INT vs. IDENT_COL + NUMBER
         switch (constDatatype) {
             case DTYPE_STR:
-                cmpRes = strcmp(op->info.filter.charConstants[constIdx], getCol(poolOffset,colOffset));
+                cmpRes = strcmp(op->info.filter.charConstants[constIdx], getTupleCol(tpl,colOffset));
                 break;
             case DTYPE_LONG:
-                long colNumber = *(long*) getCol(poolOffset,colOffset);
+                long colNumber = *(long*) getTupleCol(tpl,colOffset);
                 long constNumber = (long) op->info.filter.numConstants[constIdx];
                 // Order matters here
                 if (constIdx == 0) {
@@ -145,7 +145,7 @@ bool evaluateTupleAgainstFilterOp(int poolOffset1, int poolOffset2, Operator* op
     return matches;
 }
 
-bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator* op) {
+bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op) {
 
     bool rtrnValue = true,
          result = true;
@@ -155,7 +155,7 @@ bool evaluateTuplesAgainstFilterOps(int poolOffset1, int poolOffset2, Operator*
 
     while (p_op != NULL) {
         
-        result = evaluateTupleAgainstFilterOp(poolOffset1, poolOffset2, p_op);
+        result = evaluateTupleAgainstFilterOp(tpl1, tpl2, p_op);
 
         switch (boolOp) {
             case AND:
@@ -209,7 +209,7 @@ Tuple* filterGetTuple(Operator* op) {
             break;
         }
 
-        if (evaluateTuplesAgainstFilterOps(0, 0, op)) break;
+        if (evaluateTuplesAgainstFilterOps(tpl, tpl, op)) break;
 
 
     }

From ec8bd3684e497c76cda12f5f6e409d9d7dcfd886 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Wed, 5 Mar 2025 20:33:47 +0200
Subject: [PATCH 03/29] free tuples after use

---
 src/executor/executor.c      | 4 +++-
 src/executor/tuple.c         | 7 +++++++
 src/include/executor/tuple.h | 4 +++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/executor/executor.c b/src/executor/executor.c
index a4ed7ca..2714c4a 100644
--- a/src/executor/executor.c
+++ b/src/executor/executor.c
@@ -1,5 +1,5 @@
 #include "../include/executor/executor.h"
-
+#include "../include/executor/tuple.h"
 
 Bufferpool* buffpool;
 
@@ -92,6 +92,8 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl))
         if (tpl == NULL) break;
 
         tupleHandler(tpl);
+
+        freeTuple(tpl);
     };
 
     free(buffpool->pool);
diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index ed2143f..99049fb 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -9,4 +9,11 @@ Tuple* initTuple(size_t size) {
 
 void* getTupleCol(Tuple* tpl, size_t colOffset) {
     return tpl->data + colOffset;
+}
+
+void freeTuple(Tuple* tpl) {
+    if (tpl->data) {
+        free(tpl->data);
+    }
+    free(tpl);
 }
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
index 6e3978f..acb3320 100644
--- a/src/include/executor/tuple.h
+++ b/src/include/executor/tuple.h
@@ -10,4 +10,6 @@ typedef struct  {
 
 Tuple* initTuple(size_t size);
 
-void* getTupleCol(Tuple* tpl, size_t colOffset);
\ No newline at end of file
+void* getTupleCol(Tuple* tpl, size_t colOffset);
+
+void freeTuple(Tuple* tpl);
\ No newline at end of file

From d2ec05693f3d4ede0de753125f346e3c8a9cd6a7 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Wed, 5 Mar 2025 20:38:08 +0200
Subject: [PATCH 04/29] fix aggregate; clean bufferpool stuff

---
 src/operators/aggregate.c | 4 +---
 src/operators/scan.c      | 8 --------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index 13deb22..605f52f 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -136,12 +136,10 @@ Tuple* aggregateGetTuple(Operator* op) {
     op->resultDescription.pCols[0] = 0;
     op->info.aggregate.aggregationDone = true;
     
-
     Tuple* tpl = initTuple(sizeof(result));
-
     long* res_ptr = malloc(sizeof(result));
-    
     *res_ptr = result;
+    tpl->data = res_ptr;
 
     return tpl;
 }
diff --git a/src/operators/scan.c b/src/operators/scan.c
index 09cd80b..bc93cdd 100644
--- a/src/operators/scan.c
+++ b/src/operators/scan.c
@@ -117,14 +117,6 @@ Tuple* scanGetTuple(Operator* op) {
         i++;
     };
 
-    // Write to bufferpool
-    if (op->iteratorTupleOffset == -1) {
-        op->iteratorTupleOffset = addToBufferPool(diskBuffer, tplSize);
-    } else {
-        copyToBufferPool(op->iteratorTupleOffset, diskBuffer, tplSize);
-    }
-
-
     // // ---------------- Useful for debuggin. Leave it be for a while ------------------
     // tpldata = diskBuffer;
     // printf("tpldata at: ", diskBuffer);

From cf3cb6813b039f0868900ce77e2029b7db1b1d2b Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Wed, 5 Mar 2025 21:25:26 +0200
Subject: [PATCH 05/29] tuplebuffer added; some joins work now

---
 src/executor/tuple.c               |  4 +-
 src/executor/tuplebuffer.c         | 34 +++++++++++++
 src/include/executor/tuple.h       |  4 +-
 src/include/executor/tuplebuffer.h | 15 ++++++
 src/include/operators/join.h       |  2 +
 src/include/planner/planner.h      |  9 ++--
 src/operators/aggregate.c          |  2 +-
 src/operators/join.c               | 81 ++++++++++++------------------
 src/operators/scan.c               |  2 +-
 src/operators/scanTDB.c            |  2 +-
 src/planner/operators/join.c       |  1 -
 11 files changed, 94 insertions(+), 62 deletions(-)
 create mode 100644 src/executor/tuplebuffer.c
 create mode 100644 src/include/executor/tuplebuffer.h

diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index 99049fb..3436d47 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -1,9 +1,9 @@
 #include "../include/executor/tuple.h"
 
 
-Tuple* initTuple(size_t size) {
+Tuple* initTuple() {
     Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
-    tpl->size = size;
+    tpl->size = 0;
     return tpl;
 }
 
diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c
new file mode 100644
index 0000000..c7a0314
--- /dev/null
+++ b/src/executor/tuplebuffer.c
@@ -0,0 +1,34 @@
+#include "../include/executor/tuplebuffer.h"
+
+
+TupleBuffer* initTupleBuffer(size_t p_capacity) {
+    TupleBuffer* buff = malloc(sizeof(TupleBuffer));
+    buff->capacity  = p_capacity;
+    buff->tuples    = malloc(p_capacity * sizeof(Tuple*));
+    buff->size      = 0;
+    return buff;
+}
+
+
+void resizeTupleBuffer(TupleBuffer* buff) {
+    buff->capacity *= 2;
+    buff->tuples = realloc(buff->tuples, buff->capacity * sizeof(Tuple));
+}
+
+void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) {
+    if (buff->size == buff->capacity) {
+        resizeTupleBuffer(buff);
+    }
+
+    buff->tuples[buff->size++] = tpl;
+}
+
+void freeTupleBuffer(TupleBuffer* buff) {
+    free(buff->tuples);
+    free(buff);
+}
+
+
+Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) {
+    return buff->tuples[idx];
+}
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
index acb3320..2e32012 100644
--- a/src/include/executor/tuple.h
+++ b/src/include/executor/tuple.h
@@ -8,7 +8,9 @@ typedef struct  {
 } Tuple;
 
 
-Tuple* initTuple(size_t size);
+
+
+Tuple* initTuple();
 
 void* getTupleCol(Tuple* tpl, size_t colOffset);
 
diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h
new file mode 100644
index 0000000..f26839a
--- /dev/null
+++ b/src/include/executor/tuplebuffer.h
@@ -0,0 +1,15 @@
+#pragma once
+#include "tuple.h"
+
+typedef struct {
+    Tuple** tuples;
+    size_t size;
+    size_t capacity;
+} TupleBuffer;
+
+TupleBuffer* initTupleBuffer(size_t p_capacity);
+void resizeTupleBuffer(TupleBuffer* buff);
+void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff);
+void freeTupleBuffer(TupleBuffer* buff);
+Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx);
+
diff --git a/src/include/operators/join.h b/src/include/operators/join.h
index 25ffebe..7e2bf7e 100644
--- a/src/include/operators/join.h
+++ b/src/include/operators/join.h
@@ -3,5 +3,7 @@
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
+#include "../executor/tuplebuffer.h"
+
 
 Tuple* joinGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h
index 620eb25..893608e 100644
--- a/src/include/planner/planner.h
+++ b/src/include/planner/planner.h
@@ -7,6 +7,7 @@
 #include "../parser/utils.h"
 #include "../parser/parsetree.h"
 #include "../executor/tuple.h"
+#include "../executor/tuplebuffer.h"
 
 
 typedef enum {
@@ -99,11 +100,9 @@ typedef struct {
     struct Operator* left;
     struct Operator* right;
     struct Operator* filter;
-    int lastTupleOffset;
-    int filterTupleOffset;
-    int rightTuples[JOINPTRBUFFER];
-    int rightTupleIdx;
-    int rightTupleCount;
+    TupleBuffer* rightTuples;
+    size_t rightTupleIdx;
+    size_t rightTupleCount;
     bool rightTuplesCollected;
 } JoinInfo;
 
diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index 605f52f..9122168 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -136,7 +136,7 @@ Tuple* aggregateGetTuple(Operator* op) {
     op->resultDescription.pCols[0] = 0;
     op->info.aggregate.aggregationDone = true;
     
-    Tuple* tpl = initTuple(sizeof(result));
+    Tuple* tpl = initTuple();
     long* res_ptr = malloc(sizeof(result));
     *res_ptr = result;
     tpl->data = res_ptr;
diff --git a/src/operators/join.c b/src/operators/join.c
index 7912cb6..c16d942 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -1,7 +1,7 @@
 #include "../include/operators/join.h"
 
 
-void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* left, ResultSet* right) {
+void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right) {
 
     if (
         left == NULL ||
@@ -11,12 +11,10 @@ void concatTuples(int tupleOffset,int leftOffset,int rightOffset, ResultSet* lef
         exit(1);
     }
 
-    void* address = getTuple(tupleOffset);
-
-    memset(address, 0, left->size + right->size);
-    memcpy(address, getTuple(leftOffset), left->size);
-    memcpy(address + left->size, getTuple(rightOffset), right->size);
-
+    void* address = calloc(1, left->size + right->size);
+    memcpy(address, leftTpl->data, left->size);
+    memcpy(address + left->size, rightTpl->data, right->size);
+    returnTpl->data = address;
 }
 
 Tuple* joinGetTuple(Operator* op) {
@@ -42,35 +40,23 @@ Tuple* joinGetTuple(Operator* op) {
         
 
     */
-    return NULL;
     
-    /*
-    int rightTupleOffset = 0, originalOffset;
-    // Reuse this and only create a new tuple if it passes the filter
-    int offset = 0;
-     
-    // Reserve space from the buffer pool so that we can concatenate tuples
-    if (op->info.join.filterTupleOffset == -1) {
-        op->info.join.filterTupleOffset = getCurrentOffset();
-        reserveSpaceBufferpool(op->info.join.filterTupleOffset, JOINTUPLESIZE);
+    if (!op->info.join.rightTuples) {
+        op->info.join.rightTuples = initTupleBuffer(100); // TODO NO MAGIC NUMBERS 
     }
-    
 
+    Tuple* rightTuple;
     // This is only entered first time the operator is called
     while (!op->info.join.rightTuplesCollected) {
         
-        originalOffset = op->info.join.right->getTuple(op->info.join.right);
+        rightTuple = op->info.join.right->getTuple(op->info.join.right);
     
-        if (originalOffset == -1) {
+        if (rightTuple == NULL) {
             op->info.join.rightTuplesCollected = true;
-            op->info.join.lastTupleOffset = -1;
-            op->info.join.rightTupleIdx = 0;            
-            continue;  
+            continue; 
         } 
 
-        rightTupleOffset = addToBufferPoolFromOffset(originalOffset, op->info.join.right->resultDescription.size);
-
-        op->info.join.rightTuples[op->info.join.rightTupleIdx++] = rightTupleOffset;
+        addTupleToBuffer(rightTuple, op->info.join.rightTuples);
         op->info.join.rightTupleCount++;
 
         if (op->info.join.rightTupleCount >= JOINPTRBUFFER) {
@@ -79,43 +65,38 @@ Tuple* joinGetTuple(Operator* op) {
         }
     }
 
-    // Join loop
+    // Nested join loop
+    // For each tuple if left relation
+    //      For each tuple in right relation
+    //          if join_predicates(left,right) return tuple(left,right)
+    op->info.join.rightTupleIdx = 0;
+    Tuple* leftTuple = op->info.join.left->getTuple(op->info.join.left);
     do {
         
         if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) {
-            op->info.join.rightTupleIdx = 0;
-            op->info.join.lastTupleOffset = -1;
-        }
-        
-        if (op->info.join.lastTupleOffset == -1) {
-            offset = op->info.join.left->getTuple(op->info.join.left);
-            if (offset == -1) {
-                return -1;
+            op->info.join.rightTupleIdx = 0;        
+            leftTuple = op->info.join.left->getTuple(op->info.join.left);
+            if (leftTuple == NULL) {
+                return NULL;
             }
-            op->info.join.lastTupleOffset = offset;
         }
 
-        rightTupleOffset = op->info.join.rightTuples[op->info.join.rightTupleIdx++];
+        rightTuple = getTupleByIndex(op->info.join.rightTuples,op->info.join.rightTupleIdx++);
 
         
-        if (evaluateTuplesAgainstFilterOps(op->info.join.lastTupleOffset, rightTupleOffset, op->info.join.filter)) {
+        if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) {
+
+            Tuple* newTuple = initTuple();
             // Create a new tuple by concating the tuples
             concatTuples(
-                op->info.join.filterTupleOffset,
-                op->info.join.lastTupleOffset,
-                rightTupleOffset,
+                newTuple,
+                leftTuple,
+                rightTuple,
                 &op->info.join.left->resultDescription,
                 &op->info.join.right->resultDescription
             );
-            if (op->iteratorTupleOffset == -1) {
-                op->iteratorTupleOffset = addToBufferPool(getTuple(op->info.join.filterTupleOffset), op->resultDescription.size);
-            } else {
-                copyToBufferPool(op->iteratorTupleOffset, getTuple(op->info.join.filterTupleOffset),  op->resultDescription.size);
-
-            }
-            return op->iteratorTupleOffset;
+            return newTuple;
         }
     } while(true);
-    */
-
 }
+
diff --git a/src/operators/scan.c b/src/operators/scan.c
index bc93cdd..abf5a97 100644
--- a/src/operators/scan.c
+++ b/src/operators/scan.c
@@ -140,7 +140,7 @@ Tuple* scanGetTuple(Operator* op) {
 
     free(lineBuffer);
 
-    Tuple* tpl = initTuple(tplSize);
+    Tuple* tpl = initTuple();
     tpl->data = diskBuffer;
 
     return tpl;
diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c
index 3de5574..cd09fb0 100644
--- a/src/operators/scanTDB.c
+++ b/src/operators/scanTDB.c
@@ -54,7 +54,7 @@ Tuple* scanTDBGetTuple(Operator* op) {
     }
 
 
-    Tuple* tpl = initTuple(op->info.scan.recordSize);
+    Tuple* tpl = initTuple();
     tpl->data = op->info.scan.buffer + bufferDataOffset;
 
     return tpl;
diff --git a/src/planner/operators/join.c b/src/planner/operators/join.c
index 52b7354..9e7c452 100644
--- a/src/planner/operators/join.c
+++ b/src/planner/operators/join.c
@@ -59,7 +59,6 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) {
         opJoin->info.join.rightTupleIdx = 0;
         opJoin->info.join.rightTuplesCollected = false;
         opJoin->iteratorTupleOffset = -1;
-        opJoin->info.join.filterTupleOffset = -1;
 
 
         copyResultDescription(opJoin->info.join.left, opJoin,     0);

From 386a228e761a93262530f9998282d909d42d6d63 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Wed, 5 Mar 2025 22:05:00 +0200
Subject: [PATCH 06/29] fix join memory leaks (and logic)

---
 src/executor/tuplebuffer.c    |  1 +
 src/include/planner/planner.h |  1 +
 src/operators/join.c          | 35 ++++++++++++++++++++---------------
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c
index c7a0314..3a70902 100644
--- a/src/executor/tuplebuffer.c
+++ b/src/executor/tuplebuffer.c
@@ -24,6 +24,7 @@ void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) {
 }
 
 void freeTupleBuffer(TupleBuffer* buff) {
+    for (size_t i = 0; i < buff->size; i++) freeTuple(buff->tuples[i]);
     free(buff->tuples);
     free(buff);
 }
diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h
index 893608e..edbb075 100644
--- a/src/include/planner/planner.h
+++ b/src/include/planner/planner.h
@@ -101,6 +101,7 @@ typedef struct {
     struct Operator* right;
     struct Operator* filter;
     TupleBuffer* rightTuples;
+    Tuple* leftTuple;
     size_t rightTupleIdx;
     size_t rightTupleCount;
     bool rightTuplesCollected;
diff --git a/src/operators/join.c b/src/operators/join.c
index c16d942..eca8e0a 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -35,14 +35,11 @@ Tuple* joinGetTuple(Operator* op) {
 
         We store one of the tables in the join in memory.
         Which is why the tuples from the right table are copied 
-        to the buffer pool. Their original location will be
-        rewritten by child operators iterating over tuples.
-        
-
+        to a buffer.
     */
     
     if (!op->info.join.rightTuples) {
-        op->info.join.rightTuples = initTupleBuffer(100); // TODO NO MAGIC NUMBERS 
+        op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER);
     }
 
     Tuple* rightTuple;
@@ -69,21 +66,25 @@ Tuple* joinGetTuple(Operator* op) {
     // For each tuple if left relation
     //      For each tuple in right relation
     //          if join_predicates(left,right) return tuple(left,right)
-    op->info.join.rightTupleIdx = 0;
-    Tuple* leftTuple = op->info.join.left->getTuple(op->info.join.left);
+
+    if (op->info.join.leftTuple == NULL) {
+        op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left);
+    }
+
+    Tuple* leftTuple = op->info.join.leftTuple; 
+
     do {
         
         if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) {
-            op->info.join.rightTupleIdx = 0;        
-            leftTuple = op->info.join.left->getTuple(op->info.join.left);
-            if (leftTuple == NULL) {
-                return NULL;
-            }
+            op->info.join.rightTupleIdx = 0;
+            freeTuple(leftTuple);
+            op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left);
+            leftTuple = op->info.join.leftTuple;
+            continue;
         }
 
-        rightTuple = getTupleByIndex(op->info.join.rightTuples,op->info.join.rightTupleIdx++);
+        rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++);
 
-        
         if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) {
 
             Tuple* newTuple = initTuple();
@@ -97,6 +98,10 @@ Tuple* joinGetTuple(Operator* op) {
             );
             return newTuple;
         }
-    } while(true);
+    } while(leftTuple != NULL);
+    
+    // Join complete, we can free the buffer and the tuples associated
+    freeTupleBuffer(op->info.join.rightTuples);
+    return NULL;
 }
 

From c9356f727f6459a92bc92b0881cb4e0fd5bfe308 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Thu, 6 Mar 2025 21:50:45 +0200
Subject: [PATCH 07/29] all tests pass again

---
 src/executor/tuple.c               |   9 ++
 src/executor/tuplebuffer.c         |   7 +
 src/include/executor/tuple.h       |   2 +
 src/include/executor/tuplebuffer.h |   2 +-
 src/include/operators/scanTDB.h    |   1 +
 src/operators/aggregate.c          |   8 +-
 src/operators/scanTDB.c            |  11 +-
 src/planner/operators/scanTDB.c    |   5 +-
 testi.csv                          | 215 +++++++++++++++++++++++++++++
 tmp.csv                            |   3 +
 10 files changed, 246 insertions(+), 17 deletions(-)
 create mode 100644 testi.csv
 create mode 100644 tmp.csv

diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index 3436d47..1667c2e 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -7,6 +7,15 @@ Tuple* initTuple() {
     return tpl;
 }
 
+
+Tuple* initTupleOfSize(size_t p_size) {
+    Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
+    tpl->data = calloc(1, p_size);
+    tpl->size = p_size;
+    return tpl;
+}
+
+
 void* getTupleCol(Tuple* tpl, size_t colOffset) {
     return tpl->data + colOffset;
 }
diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c
index 3a70902..c3985f6 100644
--- a/src/executor/tuplebuffer.c
+++ b/src/executor/tuplebuffer.c
@@ -32,4 +32,11 @@ void freeTupleBuffer(TupleBuffer* buff) {
 
 Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) {
     return buff->tuples[idx];
+}
+
+size_t isTupleBufferEmpty(TupleBuffer* buff) {
+    if (buff->size > 0) {
+        return 0;
+    }
+    return 1;
 }
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
index 2e32012..91c0dea 100644
--- a/src/include/executor/tuple.h
+++ b/src/include/executor/tuple.h
@@ -12,6 +12,8 @@ typedef struct  {
 
 Tuple* initTuple();
 
+Tuple* initTupleOfSize(size_t p_size);
+
 void* getTupleCol(Tuple* tpl, size_t colOffset);
 
 void freeTuple(Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h
index f26839a..91c72ae 100644
--- a/src/include/executor/tuplebuffer.h
+++ b/src/include/executor/tuplebuffer.h
@@ -12,4 +12,4 @@ void resizeTupleBuffer(TupleBuffer* buff);
 void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff);
 void freeTupleBuffer(TupleBuffer* buff);
 Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx);
-
+size_t isTupleBufferEmpty(TupleBuffer* buff);
diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h
index 8b97cdb..f5da46a 100644
--- a/src/include/operators/scanTDB.h
+++ b/src/include/operators/scanTDB.h
@@ -3,6 +3,7 @@
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
+#include "../executor/tuplebuffer.h"
 #include <errno.h>
 
 Tuple* scanTDBGetTuple(Operator* op);
\ No newline at end of file
diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index 9122168..f1089e5 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -23,7 +23,7 @@ long doAverage(Operator* opToIterate, size_t colOffset) {
         if (tpl == NULL) {
             break;
         }
-        sum += *(long*) tpl->data + colOffset;
+        sum += *(long*) (tpl->data + colOffset);
         count++;
     };
     long result = 0.0; 
@@ -44,7 +44,7 @@ long doSum(Operator* opToIterate, size_t colOffset) {
         if (tpl == NULL) {
             break;
         }
-        result += *(long*) tpl->data + colOffset;
+        result += *(long*) (tpl->data + colOffset);
 
     };
 
@@ -62,7 +62,7 @@ long doMax(Operator* opToIterate, size_t colOffset) {
         if (tpl == NULL) {
             break;
         }
-        tmp = *(long*) tpl->data + colOffset;
+        tmp = *(long*) (tpl->data + colOffset);
         result = tmp > result ? tmp : result;
 
     };
@@ -81,7 +81,7 @@ long doMin(Operator* opToIterate, size_t colOffset) {
         if (tpl == NULL) {
             break;
         }
-        tmp = *(long*) tpl->data + colOffset;
+        tmp = *(long*) (tpl->data + colOffset);
         result = tmp < result ? tmp : result;
 
     };
diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c
index cd09fb0..100e00a 100644
--- a/src/operators/scanTDB.c
+++ b/src/operators/scanTDB.c
@@ -46,16 +46,9 @@ Tuple* scanTDBGetTuple(Operator* op) {
     size_t bufferDataOffset = (op->info.scan.recordsInBuffer - 1) * op->info.scan.recordSize;    
     op->info.scan.recordsInBuffer--;
 
-    // Write to bufferpool
-    if (op->iteratorTupleOffset == -1) {
-        op->iteratorTupleOffset = addToBufferPool(op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize);
-    } else {
-        copyToBufferPool(op->iteratorTupleOffset, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize);
-    }
-
 
-    Tuple* tpl = initTuple();
-    tpl->data = op->info.scan.buffer + bufferDataOffset;
+    Tuple* tpl = initTupleOfSize(op->info.scan.recordSize);
+    memcpy(tpl->data, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize);
 
     return tpl;
 }
\ No newline at end of file
diff --git a/src/planner/operators/scanTDB.c b/src/planner/operators/scanTDB.c
index d0005cf..9b569d4 100644
--- a/src/planner/operators/scanTDB.c
+++ b/src/planner/operators/scanTDB.c
@@ -38,7 +38,6 @@ Operator* makeScanTDBOp(Node* node) {
     op->info.scan.fileRead      = false;
     op->info.scan.recordsInBuffer   = 0;
     op->iteratorTupleOffset = -1;
-    
 
 
     op->info.scan.columnOffsets[0] = 0;
@@ -55,8 +54,8 @@ Operator* makeScanTDBOp(Node* node) {
     op->resultDescription.columnCount = tbldef.colCount;
     op->resultDescription.size = op->info.scan.recordSize;
 
-    op->info.scan.bufferSize        = op->info.scan.recordSize * TDBSCANBUFFRECORDS;
-    op->info.scan.buffer = malloc(op->info.scan.bufferSize);
+    op->info.scan.bufferSize    = op->info.scan.recordSize * TDBSCANBUFFRECORDS;
+    op->info.scan.buffer        = malloc(op->info.scan.bufferSize);
 
     if (op->info.scan.buffer == NULL) {
         printf("Failed to allocate memory for scanTDB\n");
diff --git a/testi.csv b/testi.csv
new file mode 100644
index 0000000..c4c625f
--- /dev/null
+++ b/testi.csv
@@ -0,0 +1,215 @@
+unemployed
+8471
+8361
+8119
+7931
+7765
+7808
+7828
+7544
+7130
+6984
+6774
+6683
+6525
+6228
+6018
+5840
+5719
+5649
+5659
+5323
+5027
+4817
+4629
+4572
+4478
+4308
+4197
+4049
+3921
+3958
+3996
+3744
+3660
+3497
+3392
+3459
+3373
+3308
+3243
+3249
+3233
+3383
+3410
+3419
+3493
+3583
+3779
+4045
+4295
+4433
+4607
+4714
+4900
+5159
+5391
+5358
+5351
+5364
+5389
+5467
+5527
+5403
+5364
+5315
+5259
+5351
+5494
+5420
+5341
+5182
+5119
+5194
+5281
+5418
+5420
+5555
+5737
+5748
+5899
+5818
+5668
+5850
+5921
+6137
+6242
+6283
+6315
+6491
+6636
+7013
+7373
+7508
+7600
+7858
+8086
+8573
+9041
+9381
+9644
+9919
+10181
+10732
+11069
+11106
+11453
+11663
+11947
+12598
+13118
+13409
+13800
+14243
+14675
+15583
+16116
+16222
+16356
+15726
+15790
+16350
+16883
+17204
+17503
+17620
+18374
+18778
+18944
+19141
+18852
+18290
+17914
+17893
+17934
+17337
+16466
+16050
+15858
+15983
+15973
+15576
+14888
+13789
+13055
+12698
+12440
+12124
+11758
+11545
+11309
+11372
+11364
+10980
+10714
+10673
+10556
+10671
+10639
+10442
+10328
+10265
+10297
+10572
+10777
+10618
+10565
+10363
+10331
+10546
+10619
+10636
+10843
+11197
+11587
+12439
+12853
+13033
+13535
+13857
+14439
+15406
+15980
+16730
+18785
+20460
+20858
+21261
+21570
+21226
+20678
+20262
+19745
+19655
+19255
+18849
+18038
+17137
+16875
+16871
+16850
+16308
+15919
+15575
+15325
+15450
+15451
+15413
+15454
+15613
+16583
+16814
+17118
+16871
+16678
+2234883
diff --git a/tmp.csv b/tmp.csv
new file mode 100644
index 0000000..788d0f2
--- /dev/null
+++ b/tmp.csv
@@ -0,0 +1,3 @@
+long_term_unemployed;time
+8413;2006-01-01
+8303;2006-02-01

From b1fe932da5661b390909e8fc6537ce7c4e7c934c Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Thu, 6 Mar 2025 22:42:56 +0200
Subject: [PATCH 08/29] fix memleak on aggregates

---
 src/operators/aggregate.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index f1089e5..d81f0be 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -1,11 +1,12 @@
 #include "../include/operators/aggregate.h"
 
 long doCount(Operator* opToIterate) {
-    Tuple* tpl = opToIterate->getTuple(opToIterate);
     int result = 0;
-    while (tpl != NULL) {
-        tpl = opToIterate->getTuple(opToIterate);
+    while (true) {
+        Tuple* tpl = opToIterate->getTuple(opToIterate);
+        if (tpl == NULL) break;
         result++;
+        freeTuple(tpl);
     };
     
     return result;
@@ -14,17 +15,17 @@ long doCount(Operator* opToIterate) {
 long doAverage(Operator* opToIterate, size_t colOffset) {
 
 
-    Tuple* tpl;
     long sum = 0;
     long count = 0;
 
     for (;;) {
-        tpl = opToIterate->getTuple(opToIterate);
+        Tuple* tpl = opToIterate->getTuple(opToIterate);
         if (tpl == NULL) {
             break;
         }
         sum += *(long*) (tpl->data + colOffset);
         count++;
+        freeTuple(tpl);
     };
     long result = 0.0; 
     if (count > 0) {
@@ -36,16 +37,15 @@ long doAverage(Operator* opToIterate, size_t colOffset) {
 long doSum(Operator* opToIterate, size_t colOffset) {
 
 
-    Tuple* tpl ;
     long long result = 0;
 
     for (;;) {
-        tpl = opToIterate->getTuple(opToIterate);
+        Tuple* tpl = opToIterate->getTuple(opToIterate);
         if (tpl == NULL) {
             break;
         }
         result += *(long*) (tpl->data + colOffset);
-
+        freeTuple(tpl);
     };
 
     return result;
@@ -54,16 +54,16 @@ long doSum(Operator* opToIterate, size_t colOffset) {
 long doMax(Operator* opToIterate, size_t colOffset) {
 
 
-    Tuple* tpl;
     long result = 0, tmp = 0;
 
     for (;;) {
-        tpl = opToIterate->getTuple(opToIterate);
+        Tuple* tpl = opToIterate->getTuple(opToIterate);
         if (tpl == NULL) {
             break;
         }
         tmp = *(long*) (tpl->data + colOffset);
         result = tmp > result ? tmp : result;
+        freeTuple(tpl);
 
     };
 
@@ -73,16 +73,16 @@ long doMax(Operator* opToIterate, size_t colOffset) {
 long doMin(Operator* opToIterate, size_t colOffset) {
 
 
-    Tuple* tpl;
     long result = __LONG_MAX__, tmp = 0;
 
     for (;;) {
-        tpl = opToIterate->getTuple(opToIterate);
+        Tuple* tpl = opToIterate->getTuple(opToIterate);
         if (tpl == NULL) {
             break;
         }
         tmp = *(long*) (tpl->data + colOffset);
         result = tmp < result ? tmp : result;
+        freeTuple(tpl);
 
     };
 

From 53bc9f4616f969cba1a409bc37a295989e7626dc Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Sun, 9 Mar 2025 09:28:39 +0200
Subject: [PATCH 09/29] refactor aggregate-operator

---
 src/operators/aggregate.c | 64 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index d81f0be..ab34b54 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -89,6 +89,22 @@ long doMin(Operator* opToIterate, size_t colOffset) {
     return result;
 }
 
+long count(long result, long num __attribute__((unused))) {
+    return result + 1;
+}
+
+long max(long result, long num) {
+    return num > result ? num : result;
+}
+
+long sum(long result, long num) {
+    return num + result;
+}
+
+long min(long result, long num) {
+    return num < result ? num : result;
+}
+
 
 
 Tuple* aggregateGetTuple(Operator* op) {
@@ -107,10 +123,12 @@ Tuple* aggregateGetTuple(Operator* op) {
     // }
 
 
+    size_t colOffset = op->child->resultDescription.pCols[op->info.aggregate.colToAggregate];
+
     // Build new tuple to store result
 
-    long result = 0;
 
+    /*
     switch(op->info.aggregate.aggtype) {
         case COUNT:
             result = doCount(op->child);
@@ -131,6 +149,50 @@ Tuple* aggregateGetTuple(Operator* op) {
             printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype);
             exit(1);
     }
+    */
+    long (*agg_fun)(long result, long num);
+    long result = 0, tmp = 0;
+
+
+    switch(op->info.aggregate.aggtype) {
+        case COUNT:
+            agg_fun = count;
+            break;
+        case SUM:
+            agg_fun = sum;
+            break;
+        case AVG:
+            agg_fun = sum;
+            break;
+        case MAX:
+            agg_fun = max;
+            break;
+        case MIN:
+            agg_fun = min;
+            result = __LONG_MAX__;
+            break;
+        default:
+            printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype);
+            exit(1);
+    }
+
+
+    size_t observations = 0;
+    for (;;) {
+        Tuple* tpl = op->child->getTuple(op->child);
+        if (tpl == NULL) {
+            break;
+        }
+        tmp = *(long*) (tpl->data + colOffset);
+        result = agg_fun(result, tmp);
+        freeTuple(tpl);
+        observations++;
+    };
+
+    if (op->info.aggregate.aggtype == AVG) {
+        result = result / observations;
+    }
+
 
     op->resultDescription.columnCount = 1;
     op->resultDescription.pCols[0] = 0;

From 8e375cee22e4b0b3f4913df3e8863bf01bf01fce Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Sun, 9 Mar 2025 09:29:53 +0200
Subject: [PATCH 10/29] cleanup

---
 src/operators/aggregate.c | 115 +-------------------------------------
 1 file changed, 1 insertion(+), 114 deletions(-)

diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index ab34b54..fe1a814 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -1,94 +1,5 @@
 #include "../include/operators/aggregate.h"
 
-long doCount(Operator* opToIterate) {
-    int result = 0;
-    while (true) {
-        Tuple* tpl = opToIterate->getTuple(opToIterate);
-        if (tpl == NULL) break;
-        result++;
-        freeTuple(tpl);
-    };
-    
-    return result;
-}
-
-long doAverage(Operator* opToIterate, size_t colOffset) {
-
-
-    long sum = 0;
-    long count = 0;
-
-    for (;;) {
-        Tuple* tpl = opToIterate->getTuple(opToIterate);
-        if (tpl == NULL) {
-            break;
-        }
-        sum += *(long*) (tpl->data + colOffset);
-        count++;
-        freeTuple(tpl);
-    };
-    long result = 0.0; 
-    if (count > 0) {
-        result = sum / (double) count;
-    }
-    return result;
-}
-
-long doSum(Operator* opToIterate, size_t colOffset) {
-
-
-    long long result = 0;
-
-    for (;;) {
-        Tuple* tpl = opToIterate->getTuple(opToIterate);
-        if (tpl == NULL) {
-            break;
-        }
-        result += *(long*) (tpl->data + colOffset);
-        freeTuple(tpl);
-    };
-
-    return result;
-}
-
-long doMax(Operator* opToIterate, size_t colOffset) {
-
-
-    long result = 0, tmp = 0;
-
-    for (;;) {
-        Tuple* tpl = opToIterate->getTuple(opToIterate);
-        if (tpl == NULL) {
-            break;
-        }
-        tmp = *(long*) (tpl->data + colOffset);
-        result = tmp > result ? tmp : result;
-        freeTuple(tpl);
-
-    };
-
-    return result;
-}
-
-long doMin(Operator* opToIterate, size_t colOffset) {
-
-
-    long result = __LONG_MAX__, tmp = 0;
-
-    for (;;) {
-        Tuple* tpl = opToIterate->getTuple(opToIterate);
-        if (tpl == NULL) {
-            break;
-        }
-        tmp = *(long*) (tpl->data + colOffset);
-        result = tmp < result ? tmp : result;
-        freeTuple(tpl);
-
-    };
-
-    return result;
-}
-
 long count(long result, long num __attribute__((unused))) {
     return result + 1;
 }
@@ -125,31 +36,7 @@ Tuple* aggregateGetTuple(Operator* op) {
 
     size_t colOffset = op->child->resultDescription.pCols[op->info.aggregate.colToAggregate];
 
-    // Build new tuple to store result
-
 
-    /*
-    switch(op->info.aggregate.aggtype) {
-        case COUNT:
-            result = doCount(op->child);
-            break;
-        case SUM:
-            result = doSum(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]);
-            break;
-        case AVG:
-            result = doAverage(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]);
-            break;
-        case MAX:
-            result = doMax(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]);
-            break;
-        case MIN:
-            result = doMin(op->child, op->child->resultDescription.pCols[op->info.aggregate.colToAggregate]);
-            break;
-        default:
-            printf("Aggregation type (%d) not implemented\n", op->info.aggregate.aggtype);
-            exit(1);
-    }
-    */
     long (*agg_fun)(long result, long num);
     long result = 0, tmp = 0;
 
@@ -162,7 +49,7 @@ Tuple* aggregateGetTuple(Operator* op) {
             agg_fun = sum;
             break;
         case AVG:
-            agg_fun = sum;
+            agg_fun = sum; // See below why
             break;
         case MAX:
             agg_fun = max;

From d8780478bc40d5aa1acac9f6c304f11742afa37a Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Sun, 9 Mar 2025 10:24:23 +0200
Subject: [PATCH 11/29] pass pointers to tpls

---
 src/executor/executor.c           |  9 ++++---
 src/executor/tuple.c              |  8 ++++++
 src/include/executor/tuple.h      |  6 ++++-
 src/include/operators/aggregate.h |  2 +-
 src/include/operators/filter.h    |  2 +-
 src/include/operators/join.h      |  2 +-
 src/include/operators/project.h   |  2 +-
 src/include/operators/scan.h      |  2 +-
 src/include/operators/scanTDB.h   |  2 +-
 src/include/planner/planner.h     |  2 +-
 src/operators/aggregate.c         | 26 ++++++++++--------
 src/operators/filter.c            | 11 +++-----
 src/operators/join.c              | 44 ++++++++++++++++++-------------
 src/operators/project.c           |  4 +--
 src/operators/scan.c              |  7 +++--
 src/operators/scanTDB.c           | 10 +++----
 16 files changed, 78 insertions(+), 61 deletions(-)

diff --git a/src/executor/executor.c b/src/executor/executor.c
index 2714c4a..e96b5dd 100644
--- a/src/executor/executor.c
+++ b/src/executor/executor.c
@@ -86,15 +86,16 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl))
     }
 
     // Get tuples one by one
-    Tuple* tpl;
+    Tuple* tpl = initTupleOfSize(500); // TODO no magic constants
     for (;;) {
-        tpl = op->getTuple(op);
-        if (tpl == NULL) break;
+        op->getTuple(op, tpl);
+        if (isTupleEmpty(tpl)) break;
 
         tupleHandler(tpl);
 
-        freeTuple(tpl);
     };
+    freeTuple(tpl);
+
 
     free(buffpool->pool);
     free(buffpool);
diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index 1667c2e..b313e76 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -25,4 +25,12 @@ void freeTuple(Tuple* tpl) {
         free(tpl->data);
     }
     free(tpl);
+}
+
+size_t isTupleEmpty(Tuple* tpl) {
+    return tpl->size == 0 ? 1 : 0;
+}
+
+void markTupleAsEmpty(Tuple* tpl) {
+    tpl->size = 0;
 }
\ No newline at end of file
diff --git a/src/include/executor/tuple.h b/src/include/executor/tuple.h
index 91c0dea..935d03b 100644
--- a/src/include/executor/tuple.h
+++ b/src/include/executor/tuple.h
@@ -16,4 +16,8 @@ Tuple* initTupleOfSize(size_t p_size);
 
 void* getTupleCol(Tuple* tpl, size_t colOffset);
 
-void freeTuple(Tuple* tpl);
\ No newline at end of file
+void freeTuple(Tuple* tpl);
+
+size_t isTupleEmpty(Tuple* tpl);
+
+void markTupleAsEmpty(Tuple* tpl);
diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h
index fd8f2db..cd58290 100644
--- a/src/include/operators/aggregate.h
+++ b/src/include/operators/aggregate.h
@@ -6,4 +6,4 @@
 #include "../executor/tuple.h"
 
 
-Tuple* aggregateGetTuple(Operator* op);
\ No newline at end of file
+void aggregateGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h
index f4ff59e..f3bd5d8 100644
--- a/src/include/operators/filter.h
+++ b/src/include/operators/filter.h
@@ -4,5 +4,5 @@
 #include "../planner/planner.h"
 #include "../executor/tuple.h"
 
-Tuple* filterGetTuple(Operator* op);
+void filterGetTuple(Operator* op, Tuple* tpl);
 bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op);
\ No newline at end of file
diff --git a/src/include/operators/join.h b/src/include/operators/join.h
index 7e2bf7e..e54a6da 100644
--- a/src/include/operators/join.h
+++ b/src/include/operators/join.h
@@ -6,4 +6,4 @@
 #include "../executor/tuplebuffer.h"
 
 
-Tuple* joinGetTuple(Operator* op);
\ No newline at end of file
+void joinGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/operators/project.h b/src/include/operators/project.h
index 6a228d9..46bef38 100644
--- a/src/include/operators/project.h
+++ b/src/include/operators/project.h
@@ -3,4 +3,4 @@
 #include "../planner/planner.h"
 #include "../executor/tuple.h"
 
-Tuple* projectGetTuple(Operator* op);
\ No newline at end of file
+void projectGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h
index f43aa63..10ab0bc 100644
--- a/src/include/operators/scan.h
+++ b/src/include/operators/scan.h
@@ -4,4 +4,4 @@
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
 
-Tuple* scanGetTuple(Operator* op);
\ No newline at end of file
+void scanGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h
index f5da46a..7f8b6a9 100644
--- a/src/include/operators/scanTDB.h
+++ b/src/include/operators/scanTDB.h
@@ -6,4 +6,4 @@
 #include "../executor/tuplebuffer.h"
 #include <errno.h>
 
-Tuple* scanTDBGetTuple(Operator* op);
\ No newline at end of file
+void scanTDBGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h
index edbb075..df3efb2 100644
--- a/src/include/planner/planner.h
+++ b/src/include/planner/planner.h
@@ -128,7 +128,7 @@ typedef struct Operator {
     ResultSet resultDescription;
     int iteratorTupleOffset;
     struct Operator* child;
-    Tuple* (*getTuple) (struct Operator* op);
+    void (*getTuple) (struct Operator* op, Tuple* tpl);
 } Operator;
 
 void freeQueryplan(Operator *node);
diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index fe1a814..395b1d7 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -18,13 +18,14 @@ long min(long result, long num) {
 
 
 
-Tuple* aggregateGetTuple(Operator* op) {
+void aggregateGetTuple(Operator* op, Tuple* tpl) {
     
     checkPtrNotNull(op->child, "OP_AGGREGATE has no child.");
     checkPtrNotNull(op->child->getTuple, "Child of OP_AGGREGATE has no getTuple-method.");
 
     if (op->info.aggregate.aggregationDone) {
-        return NULL;
+        markTupleAsEmpty(tpl);
+        return;
     }
 
     // TODO:
@@ -65,17 +66,23 @@ Tuple* aggregateGetTuple(Operator* op) {
 
 
     size_t observations = 0;
+    
+    Tuple* tmpTpl = initTupleOfSize(500); // TODO no magic
+
     for (;;) {
-        Tuple* tpl = op->child->getTuple(op->child);
-        if (tpl == NULL) {
+        
+        op->child->getTuple(op->child, tmpTpl);
+        if (isTupleEmpty(tmpTpl)) {
             break;
         }
-        tmp = *(long*) (tpl->data + colOffset);
+        tmp = *(long*) (tmpTpl->data + colOffset);
         result = agg_fun(result, tmp);
-        freeTuple(tpl);
         observations++;
     };
 
+    freeTuple(tmpTpl);
+
+
     if (op->info.aggregate.aggtype == AVG) {
         result = result / observations;
     }
@@ -84,11 +91,8 @@ Tuple* aggregateGetTuple(Operator* op) {
     op->resultDescription.columnCount = 1;
     op->resultDescription.pCols[0] = 0;
     op->info.aggregate.aggregationDone = true;
+
     
-    Tuple* tpl = initTuple();
-    long* res_ptr = malloc(sizeof(result));
-    *res_ptr = result;
-    tpl->data = res_ptr;
+    *(long*)(tpl->data) = result; 
 
-    return tpl;
 }
diff --git a/src/operators/filter.c b/src/operators/filter.c
index 13d9d54..1d8b587 100644
--- a/src/operators/filter.c
+++ b/src/operators/filter.c
@@ -176,7 +176,7 @@ bool evaluateTuplesAgainstFilterOps(Tuple* tpl1, Tuple* tpl2, Operator* op) {
     return rtrnValue;
 }
 
-Tuple* filterGetTuple(Operator* op) {
+void filterGetTuple(Operator* op, Tuple* tpl) {
 
     if (op == NULL) {
         printf("FILTER_OP: Passed a NULL-pointer to filterGetTuple\n");
@@ -197,15 +197,12 @@ Tuple* filterGetTuple(Operator* op) {
         exit(1);
     }
 
-
-    Tuple* tpl = 0;
-
     while (true) {
         /* Get new tuples until found something that passes the filter */
 
-        tpl = op->child->getTuple(op->child);
+        op->child->getTuple(op->child, tpl);
 
-        if (tpl == NULL) {
+        if (isTupleEmpty(tpl)) {
             break;
         }
 
@@ -213,6 +210,4 @@ Tuple* filterGetTuple(Operator* op) {
 
 
     }
-
-    return tpl;
 }
diff --git a/src/operators/join.c b/src/operators/join.c
index eca8e0a..5b839e0 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -17,8 +17,7 @@ void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet*
     returnTpl->data = address;
 }
 
-Tuple* joinGetTuple(Operator* op) {
-
+void joinGetTuple(Operator* op, Tuple* tpl) {
     if (
         op->info.join.left == NULL ||
         op->info.join.right == NULL
@@ -45,10 +44,10 @@ Tuple* joinGetTuple(Operator* op) {
     Tuple* rightTuple;
     // This is only entered first time the operator is called
     while (!op->info.join.rightTuplesCollected) {
-        
-        rightTuple = op->info.join.right->getTuple(op->info.join.right);
+        rightTuple = initTupleOfSize(500); // TODO no magic
+        op->info.join.right->getTuple(op->info.join.right, rightTuple);
     
-        if (rightTuple == NULL) {
+        if (isTupleEmpty(rightTuple)) {
             op->info.join.rightTuplesCollected = true;
             continue; 
         } 
@@ -62,46 +61,53 @@ Tuple* joinGetTuple(Operator* op) {
         }
     }
 
+    
+
     // Nested join loop
     // For each tuple if left relation
     //      For each tuple in right relation
     //          if join_predicates(left,right) return tuple(left,right)
 
-    if (op->info.join.leftTuple == NULL) {
-        op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left);
-    }
+    op->info.join.leftTuple = initTupleOfSize(500);
 
-    Tuple* leftTuple = op->info.join.leftTuple; 
+    if (isTupleEmpty(op->info.join.leftTuple)) {
+        op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
+    }
 
     do {
         
         if (op->info.join.rightTupleIdx >= op->info.join.rightTupleCount) {
             op->info.join.rightTupleIdx = 0;
-            freeTuple(leftTuple);
-            op->info.join.leftTuple = op->info.join.left->getTuple(op->info.join.left);
-            leftTuple = op->info.join.leftTuple;
+            op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
+            if (isTupleEmpty(op->info.join.leftTuple)) {
+                break;
+            }
+
             continue;
         }
 
         rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++);
 
-        if (evaluateTuplesAgainstFilterOps(leftTuple, rightTuple, op->info.join.filter)) {
 
-            Tuple* newTuple = initTuple();
+        if (evaluateTuplesAgainstFilterOps(op->info.join.leftTuple, rightTuple, op->info.join.filter)) {
             // Create a new tuple by concating the tuples
             concatTuples(
-                newTuple,
-                leftTuple,
+                tpl,
+                op->info.join.leftTuple,
                 rightTuple,
                 &op->info.join.left->resultDescription,
                 &op->info.join.right->resultDescription
             );
-            return newTuple;
+            
+            return;
         }
-    } while(leftTuple != NULL);
+    } while(!isTupleEmpty(op->info.join.leftTuple));
     
     // Join complete, we can free the buffer and the tuples associated
     freeTupleBuffer(op->info.join.rightTuples);
-    return NULL;
+    // freeTuple(op->info.join.leftTuple);
+    // freeTuple(rightTuple);
+    markTupleAsEmpty(tpl);
+    
 }
 
diff --git a/src/operators/project.c b/src/operators/project.c
index 858c8ab..2a498aa 100644
--- a/src/operators/project.c
+++ b/src/operators/project.c
@@ -1,6 +1,6 @@
 #include "../include/operators/project.h"
 
-Tuple* projectGetTuple(Operator* op) {
+void projectGetTuple(Operator* op, Tuple* tpl) {
 
     checkPtrNotNull(op->child, "OP_PROJECT has no child");
     checkPtrNotNull(op->child->getTuple, "Child of OP_PROJECT has no getTuple-method");
@@ -13,5 +13,5 @@ Tuple* projectGetTuple(Operator* op) {
         This is an unfortunate extra function call :(
     */
     
-    return op->child->getTuple(op->child);
+    op->child->getTuple(op->child, tpl);
 }
\ No newline at end of file
diff --git a/src/operators/scan.c b/src/operators/scan.c
index abf5a97..c56d4f6 100644
--- a/src/operators/scan.c
+++ b/src/operators/scan.c
@@ -1,6 +1,6 @@
 #include "../include/operators/scan.h"
 
-Tuple* scanGetTuple(Operator* op) {
+void scanGetTuple(Operator* op, Tuple* tpl) {
 
     checkPtrNotNull(op, "NULL pointer passed to scanGetTuple");
 
@@ -24,7 +24,8 @@ Tuple* scanGetTuple(Operator* op) {
      if (line == NULL) {
         free(lineBuffer);
         fclose(op->info.scan.tablefile);
-        return NULL;
+        markTupleAsEmpty(tpl);
+        return;
     }
 
 
@@ -140,9 +141,7 @@ Tuple* scanGetTuple(Operator* op) {
 
     free(lineBuffer);
 
-    Tuple* tpl = initTuple();
     tpl->data = diskBuffer;
 
-    return tpl;
 }
 
diff --git a/src/operators/scanTDB.c b/src/operators/scanTDB.c
index 100e00a..5a44475 100644
--- a/src/operators/scanTDB.c
+++ b/src/operators/scanTDB.c
@@ -29,26 +29,26 @@ void fillBuffer(Operator* op) {
 
 }
 
-Tuple* scanTDBGetTuple(Operator* op) {
+void scanTDBGetTuple(Operator* op, Tuple* tpl) {
 
     checkPtrNotNull(op, "NULL pointer passed to scanTDBGetTuple");
 
     if (op->info.scan.fileRead && op->info.scan.recordsInBuffer == 0) {
         free(op->info.scan.buffer);
-        return NULL;
+        markTupleAsEmpty(tpl);
+        return;
     }
 
     if (op->info.scan.recordsInBuffer == 0) {
         fillBuffer(op);
-        return scanTDBGetTuple(op);
+        scanTDBGetTuple(op, tpl);
+        return;
     }
 
     size_t bufferDataOffset = (op->info.scan.recordsInBuffer - 1) * op->info.scan.recordSize;    
     op->info.scan.recordsInBuffer--;
 
 
-    Tuple* tpl = initTupleOfSize(op->info.scan.recordSize);
     memcpy(tpl->data, op->info.scan.buffer + bufferDataOffset, op->info.scan.recordSize);
 
-    return tpl;
 }
\ No newline at end of file

From 500e85fbf656127be9a339b263387d6693f474cd Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Sun, 9 Mar 2025 19:54:40 +0200
Subject: [PATCH 12/29] do not allocate on every call..

---
 src/operators/scan.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/operators/scan.c b/src/operators/scan.c
index c56d4f6..ea34d39 100644
--- a/src/operators/scan.c
+++ b/src/operators/scan.c
@@ -45,7 +45,7 @@ void scanGetTuple(Operator* op, Tuple* tpl) {
 
     size_t tplSize = 0;
 
-    void* diskBuffer = calloc(1, SCANTUPLESIZE);
+    void* diskBuffer = tpl->data;
     void* diskBufferCursor = diskBuffer;
     checkPtrNotNull(diskBuffer, "could not allocate buffer for scan");
 
@@ -140,8 +140,5 @@ void scanGetTuple(Operator* op, Tuple* tpl) {
     op->resultDescription.size = tplSize;
 
     free(lineBuffer);
-
-    tpl->data = diskBuffer;
-
 }
 

From 52ad7fb503c77114dd223956efd274fadcdcb38c Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Sun, 9 Mar 2025 21:43:47 +0200
Subject: [PATCH 13/29] add EXPLAIN; use single alloc for join buffering

---
 src/executor/executeStatement.c    |  3 ++
 src/executor/statements/explain.c  | 68 ++++++++++++++++++++++++++++++
 src/executor/tuple.c               |  4 +-
 src/executor/tuplebuffer.c         | 53 ++++++++++++++++++-----
 src/include/executor/executor.h    |  3 +-
 src/include/executor/tuplebuffer.h | 10 ++++-
 src/include/parser/parsetree.h     |  1 +
 src/operators/join.c               | 12 +++---
 src/parser/parser.c                | 13 ++++++
 9 files changed, 144 insertions(+), 23 deletions(-)
 create mode 100644 src/executor/statements/explain.c

diff --git a/src/executor/executeStatement.c b/src/executor/executeStatement.c
index ace3cca..e6fbd79 100644
--- a/src/executor/executeStatement.c
+++ b/src/executor/executeStatement.c
@@ -16,6 +16,9 @@ void executeStatement(Node* node) {
         case STMTINSERT:
             executeInsert(node);
             break;
+        case STMTEXPLAIN:
+            executeExplain(node);
+            break;
         default:
             printf("Don't know how execute statement of type %d\n", node->type);
             exit(1);
diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c
new file mode 100644
index 0000000..01fad91
--- /dev/null
+++ b/src/executor/statements/explain.c
@@ -0,0 +1,68 @@
+#include "../../include/executor/executor.h"
+
+void printOp(Operator* op) {
+
+    switch (op->type) {
+        case OP_SCANTDB:
+            printf("OP_SCANTDB");
+            break;
+        case OP_SCAN:
+            printf("OP_SCAN");
+            break;
+        case OP_PROJECT:
+            printf("OP_PROJECT");
+            break;
+        case OP_FILTER:
+            printf("OP_FILTER");
+            break;
+        case OP_JOIN:
+            printf("OP_JOIN");
+            break;
+        case OP_AGGREGATE:
+            printf("OP_AGGREGATE");
+            break;
+        default:
+            printf("Unknown operation type");
+            break;
+    }
+
+}
+
+void explainOp(Operator* op) {
+
+    if (!op) return;
+
+    printOp(op);
+    printf(", size: %ld\n", op->resultDescription.size);
+    if (op->child) {
+
+        if (op->type == OP_FILTER) {
+            explainOp(op->info.filter.next);
+        }
+
+        if (op->type == OP_JOIN) {
+            explainOp(op->info.join.filter);
+            explainOp(op->info.join.left);
+            explainOp(op->info.join.right);
+        }
+
+        explainOp(op->child);
+    }
+
+}
+
+
+void executeExplain(Node* node) {
+    
+    /* Plan the query */
+    Operator* queryplan = planQuery(node->next);
+
+    /* Print the query plan */
+    printf("******* EXPLAIN **********\n");
+    explainOp(queryplan);
+    printf("**************************\n");
+
+
+    freeQueryplan(queryplan);
+}
+
diff --git a/src/executor/tuple.c b/src/executor/tuple.c
index b313e76..90b29ac 100644
--- a/src/executor/tuple.c
+++ b/src/executor/tuple.c
@@ -2,14 +2,14 @@
 
 
 Tuple* initTuple() {
-    Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
+    Tuple* tpl = malloc(sizeof(Tuple));
     tpl->size = 0;
     return tpl;
 }
 
 
 Tuple* initTupleOfSize(size_t p_size) {
-    Tuple* tpl = malloc(sizeof(Tuple)); // Heap allocation
+    Tuple* tpl = malloc(sizeof(Tuple));
     tpl->data = calloc(1, p_size);
     tpl->size = p_size;
     return tpl;
diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c
index c3985f6..aa9dd88 100644
--- a/src/executor/tuplebuffer.c
+++ b/src/executor/tuplebuffer.c
@@ -1,37 +1,68 @@
 #include "../include/executor/tuplebuffer.h"
 
 
-TupleBuffer* initTupleBuffer(size_t p_capacity) {
+
+TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize) {
     TupleBuffer* buff = malloc(sizeof(TupleBuffer));
-    buff->capacity  = p_capacity;
-    buff->tuples    = malloc(p_capacity * sizeof(Tuple*));
-    buff->size      = 0;
-    return buff;
+    buff->capacity      = p_capacity;
+    buff->tupledatasize = p_tuplesize;
+    buff->tuples        = malloc(p_capacity * sizeof(Tuple));
+    buff->data          = malloc(p_capacity * p_tuplesize);
+    buff->size          = 0;
+    buff->cursor        = 0;
+    return buff;    
 }
 
 
 void resizeTupleBuffer(TupleBuffer* buff) {
+
     buff->capacity *= 2;
-    buff->tuples = realloc(buff->tuples, buff->capacity * sizeof(Tuple));
+
+    Tuple* tmpTpl   = realloc(buff->tuples, buff->capacity * sizeof(Tuple));
+    if (tmpTpl == NULL) {
+        printf("ERROR: Could resize tuplebuffer from %ld to %ld\n", buff->capacity, buff->capacity * 2);
+        exit(1);
+    }
+
+    buff->tuples    = tmpTpl;
+    void* tmpData = realloc(buff->data, buff->capacity * buff->tupledatasize);
+    if (tmpData == NULL) {
+        printf("ERROR: Could resize tuplebuffer data from %ld to %ld\n", buff->capacity * buff->tupledatasize, buff->capacity * buff->tupledatasize * 2);
+        exit(1);
+    }
+
+    buff->data = tmpData;
 }
 
-void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff) {
-    if (buff->size == buff->capacity) {
+
+Tuple* getTupleFromBuffer(TupleBuffer* buff) {
+
+    if (buff->size >= (buff->capacity-1)) {
         resizeTupleBuffer(buff);
     }
 
-    buff->tuples[buff->size++] = tpl;
+    Tuple* tpl = &buff->tuples[buff->size++];
+    tpl->size = buff->tupledatasize;
+    tpl->data = buff->data + buff->cursor;
+    buff->cursor += buff->tupledatasize;
+    return tpl;
 }
 
+void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx) {
+    tpl->data = buff->data + (idx * buff->tupledatasize);
+}
+
+
 void freeTupleBuffer(TupleBuffer* buff) {
-    for (size_t i = 0; i < buff->size; i++) freeTuple(buff->tuples[i]);
     free(buff->tuples);
+    free(buff->data);
     free(buff);
 }
 
 
 Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx) {
-    return buff->tuples[idx];
+    updateTupleDataptr(buff, &buff->tuples[idx], idx);
+    return &buff->tuples[idx];
 }
 
 size_t isTupleBufferEmpty(TupleBuffer* buff) {
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index a0abaee..b382411 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -22,4 +22,5 @@ extern Bufferpool* buffpool;
 void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl));
 void executeStatement(Node* node);
 void executeCreateTable(Node* node);
-void executeInsert(Node* node);
\ No newline at end of file
+void executeInsert(Node* node);
+void executeExplain(Node* node);
\ No newline at end of file
diff --git a/src/include/executor/tuplebuffer.h b/src/include/executor/tuplebuffer.h
index 91c72ae..4dc2ce2 100644
--- a/src/include/executor/tuplebuffer.h
+++ b/src/include/executor/tuplebuffer.h
@@ -1,15 +1,21 @@
 #pragma once
 #include "tuple.h"
+#include <stdio.h>
 
 typedef struct {
-    Tuple** tuples;
+    Tuple* tuples;
+    void*  data;
+    size_t cursor;
+    size_t tupledatasize;
     size_t size;
     size_t capacity;
 } TupleBuffer;
 
-TupleBuffer* initTupleBuffer(size_t p_capacity);
+TupleBuffer* initTupleBuffer(size_t p_capacity, size_t p_tuplesize);
 void resizeTupleBuffer(TupleBuffer* buff);
 void addTupleToBuffer(Tuple* tpl, TupleBuffer* buff);
 void freeTupleBuffer(TupleBuffer* buff);
+void updateTupleDataptr(TupleBuffer* buff, Tuple* tpl, size_t idx);
 Tuple* getTupleByIndex(TupleBuffer* buff, size_t idx);
+Tuple* getTupleFromBuffer(TupleBuffer* buff);
 size_t isTupleBufferEmpty(TupleBuffer* buff);
diff --git a/src/include/parser/parsetree.h b/src/include/parser/parsetree.h
index 7b6e84f..0482b18 100644
--- a/src/include/parser/parsetree.h
+++ b/src/include/parser/parsetree.h
@@ -32,6 +32,7 @@ enum nodeType {
     AND,
     STMTCREATE,
     STMTINSERT,
+    STMTEXPLAIN,
     TABLE,
     OR
 };
diff --git a/src/operators/join.c b/src/operators/join.c
index 5b839e0..45332e6 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -38,13 +38,15 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     */
     
     if (!op->info.join.rightTuples) {
-        op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER);
+        op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER, 500); // TODO no magic
     }
 
     Tuple* rightTuple;
     // This is only entered first time the operator is called
     while (!op->info.join.rightTuplesCollected) {
-        rightTuple = initTupleOfSize(500); // TODO no magic
+
+        rightTuple = getTupleFromBuffer(op->info.join.rightTuples);
+
         op->info.join.right->getTuple(op->info.join.right, rightTuple);
     
         if (isTupleEmpty(rightTuple)) {
@@ -52,7 +54,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
             continue; 
         } 
 
-        addTupleToBuffer(rightTuple, op->info.join.rightTuples);
         op->info.join.rightTupleCount++;
 
         if (op->info.join.rightTupleCount >= JOINPTRBUFFER) {
@@ -61,7 +62,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
         }
     }
 
-    
 
     // Nested join loop
     // For each tuple if left relation
@@ -88,7 +88,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
 
         rightTuple = getTupleByIndex(op->info.join.rightTuples, op->info.join.rightTupleIdx++);
 
-
         if (evaluateTuplesAgainstFilterOps(op->info.join.leftTuple, rightTuple, op->info.join.filter)) {
             // Create a new tuple by concating the tuples
             concatTuples(
@@ -105,8 +104,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     
     // Join complete, we can free the buffer and the tuples associated
     freeTupleBuffer(op->info.join.rightTuples);
-    // freeTuple(op->info.join.leftTuple);
-    // freeTuple(rightTuple);
+    freeTuple(op->info.join.leftTuple);
     markTupleAsEmpty(tpl);
     
 }
diff --git a/src/parser/parser.c b/src/parser/parser.c
index 9c85243..b45325d 100644
--- a/src/parser/parser.c
+++ b/src/parser/parser.c
@@ -442,6 +442,13 @@ void insert() {
 }
 
 
+void explain() {
+    keyword("EXPLAIN", STMTEXPLAIN);
+    skipWhite();
+    query();
+}
+
+
 size_t parse(char* input, Node* p_root) {
 
     root = p_root;
@@ -451,6 +458,12 @@ size_t parse(char* input, Node* p_root) {
     qsize = strlen(rawSql);
     getNextChar();
 
+
+    if (peekWordMatches("EXPLAIN")) {
+        explain();
+        return nodeCount;
+    }
+
     if (peekWordMatches("CREATE")) {
         create();
         return nodeCount;

From e58c8c2c4d08333bc4d7bbb6b7a62a1819fadab8 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Mon, 10 Mar 2025 21:01:45 +0200
Subject: [PATCH 14/29] remove bufferpool entirely

---
 src/bufferpool/bufferpool.c         | 84 -----------------------------
 src/executor/executor.c             | 13 +----
 src/include/bufferpool/bufferpool.h | 40 --------------
 src/include/const.h                 |  6 +--
 src/include/executor/executor.h     |  4 --
 src/include/operators/aggregate.h   |  1 -
 src/include/operators/filter.h      |  1 -
 src/include/operators/join.h        |  1 -
 src/include/operators/project.h     |  1 -
 src/include/operators/scan.h        |  1 -
 src/include/operators/scanTDB.h     |  1 -
 src/operators/aggregate.c           |  2 +-
 src/operators/join.c                |  9 +---
 src/squel.c                         |  1 -
 14 files changed, 8 insertions(+), 157 deletions(-)
 delete mode 100644 src/bufferpool/bufferpool.c
 delete mode 100644 src/include/bufferpool/bufferpool.h

diff --git a/src/bufferpool/bufferpool.c b/src/bufferpool/bufferpool.c
deleted file mode 100644
index d843741..0000000
--- a/src/bufferpool/bufferpool.c
+++ /dev/null
@@ -1,84 +0,0 @@
-#include "../include/bufferpool/bufferpool.h"
-#include "../include/planner/planner.h"
-
-
-void growBufferpoolIfNeedBe(size_t size) {
-    
-    if (buffpool->used + (long) size < buffpool->capacity) return;
-    long oldCapacity = buffpool->capacity;
-    buffpool->capacity *= 2;
-    buffpool->pool = realloc(buffpool->pool, buffpool->capacity);
-    checkPtrNotNull(buffpool->pool, "Could not allocate memory for bufferpool");
-    memset(buffpool->pool + oldCapacity, 0, oldCapacity);
-    
-}
-
-void copyToBufferPool(int destinationoffset, void* source, size_t size) {
-    growBufferpoolIfNeedBe(size);
-    void* destination = getTuple(destinationoffset);
-    memcpy(destination, source, size);
-}
-
-int addToBufferPoolFromOffset(int originOffset, size_t size) {
-    growBufferpoolIfNeedBe(size);
-    void* target = getNextFreeSlot();
-    memcpy(target, getTuple(originOffset), size);
-    int offset = buffpool->used;
-    buffpool->used += size;
-    return offset;
-}
-
-
-int addToBufferPool(void* source, size_t size) {
-    growBufferpoolIfNeedBe(size);
-    void* target = getNextFreeSlot();
-    memcpy(target, source, size);
-    int offset = buffpool->used;
-    buffpool->used += size;
-    return offset;
-}
-
-void reserveSpaceBufferpool(int offset, size_t size) {
-    growBufferpoolIfNeedBe(size);
-    void* from = getTuple(offset);
-    memset(from, 0, size);
-    buffpool->used += size;
-}
-
-
-int getCurrentOffset() { return buffpool->used; }
-
-void* getCol(int pooloffset, size_t colOffset) {
-    return buffpool->pool + pooloffset + colOffset;
-}
-
-void* getTuple(int pooloffset) {
-    return buffpool->pool + pooloffset;
-}
-
-void* getNextFreeSlot() {
-    return buffpool->pool + buffpool->used;
-}
-
-void getColAsChar(char* target, int pooloffset, size_t colOffset, Datatype type) {
-    if (type == DTYPE_STR) {
-        strcpy(target, getCol(pooloffset, colOffset));
-        return;
-    }
-    if (type == DTYPE_INT) {
-        char tmp[CHARMAXSIZE];
-        sprintf(tmp, "%d", *(int*) getCol(pooloffset, colOffset));
-        memcpy(target, tmp, strlen(tmp));
-        return;
-    }
-    if (type == DTYPE_LONG) {
-        char tmp[CHARMAXSIZE];
-        sprintf(tmp, "%ld", *(long*) getCol(pooloffset, colOffset));
-        memcpy(target, tmp, strlen(tmp));
-        return;
-    }
-    printf("Don't know how to represent type %d as char\n", type);
-    exit(1);
-}
-
-
diff --git a/src/executor/executor.c b/src/executor/executor.c
index e96b5dd..f595b84 100644
--- a/src/executor/executor.c
+++ b/src/executor/executor.c
@@ -1,7 +1,7 @@
 #include "../include/executor/executor.h"
 #include "../include/executor/tuple.h"
 
-Bufferpool* buffpool;
+
 
 void assignGetTupleFunction(Operator *op) {
 
@@ -35,9 +35,6 @@ void assignGetTupleFunction(Operator *op) {
 }
 
 
-
-
-
 void doAssignGetTupleFunction(Operator* p_op) {
 
     if (p_op == NULL) {
@@ -63,10 +60,6 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl))
         return;
     }
 
-    buffpool            = calloc(1, sizeof(Bufferpool));
-    buffpool->pool      = calloc(BUFFERPOOLSIZE, 1);
-    buffpool->capacity  = BUFFERPOOLSIZE;
-    buffpool->used      = 0;
  
     doAssignGetTupleFunction(op);
 
@@ -86,7 +79,7 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl))
     }
 
     // Get tuples one by one
-    Tuple* tpl = initTupleOfSize(500); // TODO no magic constants
+    Tuple* tpl = initTupleOfSize(TUPLESIZE);
     for (;;) {
         op->getTuple(op, tpl);
         if (isTupleEmpty(tpl)) break;
@@ -97,6 +90,4 @@ void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl))
     freeTuple(tpl);
 
 
-    free(buffpool->pool);
-    free(buffpool);
 }
diff --git a/src/include/bufferpool/bufferpool.h b/src/include/bufferpool/bufferpool.h
deleted file mode 100644
index abc0797..0000000
--- a/src/include/bufferpool/bufferpool.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#pragma once
-#include <stdlib.h>
-#include "../const.h"
-#include "../parser/parsetree.h"
-
-/*
-    The bufferpool maintains a pool for tuples
-    to which tuples can be added and removed (freed) from.
-
-    A tuple is struct with a fixed size despite
-    the fact that the data is likely not fixed in size.
-
-    A tuple contains it's data as a string. Columns
-    are pointers to the string.
-
-    The rest of the system passes around pointers to the buffer pool.
-
-*/
-
-
-typedef struct {
-    void* pool;
-    long capacity;
-    long used;
-} Bufferpool;
-
-extern Bufferpool* buffpool;
-
-
-void* getNextFreeSlot();
-void getColAsChar(char* target, int pooloffset, size_t colIdx, Datatype type);
-void copyToBufferPool(int destinationoffset, void* source, size_t size);
-int addToBufferPool(void* source, size_t size);
-int addToBufferPoolFromOffset(int offset, size_t size);
-void reserveSpaceBufferpool(int offset, size_t size);
-
-int getCurrentOffset();
-
-void* getTuple(int pooloffset);
-void* getCol(int pooloffset, size_t colOffset);
\ No newline at end of file
diff --git a/src/include/const.h b/src/include/const.h
index 74e230e..625e12d 100644
--- a/src/include/const.h
+++ b/src/include/const.h
@@ -14,9 +14,9 @@
 #define JOINTUPLESIZE    1000
 #define SCANTUPLESIZE    2000
 
-// Bufferpool
-#define BUFFERPOOLSIZE      100000
-#define JOINPTRBUFFER       100000
+// Query execution
+#define JOINBUFFSIZE    100000
+#define TUPLESIZE       500    
 
 // Define max size (in chars) of expressions and query
 #define MAXQUERYSIZE 1000
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index b382411..08e147d 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -4,7 +4,6 @@
 #include "../const.h"
 #include "../parser/parsetree.h"
 #include "../planner/planner.h"
-#include "../bufferpool/bufferpool.h"
 #include "../operators/join.h"
 #include "../operators/filter.h"
 #include "../operators/scan.h"
@@ -13,12 +12,9 @@
 #include "../operators/aggregate.h"
 #include "../io/tdb.h"
 
-
 extern char *buffercache;
 extern char *bufferscan;
 
-extern Bufferpool* buffpool;
-
 void execute(Operator* op, bool printColNames, void (*tupleHandler)(Tuple* tpl));
 void executeStatement(Node* node);
 void executeCreateTable(Node* node);
diff --git a/src/include/operators/aggregate.h b/src/include/operators/aggregate.h
index cd58290..f0586a9 100644
--- a/src/include/operators/aggregate.h
+++ b/src/include/operators/aggregate.h
@@ -1,6 +1,5 @@
 #pragma once
 #include <stdbool.h>
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
diff --git a/src/include/operators/filter.h b/src/include/operators/filter.h
index f3bd5d8..f1582ea 100644
--- a/src/include/operators/filter.h
+++ b/src/include/operators/filter.h
@@ -1,6 +1,5 @@
 #pragma once
 #include <stdbool.h>
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/tuple.h"
 
diff --git a/src/include/operators/join.h b/src/include/operators/join.h
index e54a6da..8b38e50 100644
--- a/src/include/operators/join.h
+++ b/src/include/operators/join.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
diff --git a/src/include/operators/project.h b/src/include/operators/project.h
index 46bef38..e4b4a10 100644
--- a/src/include/operators/project.h
+++ b/src/include/operators/project.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/tuple.h"
 
diff --git a/src/include/operators/scan.h b/src/include/operators/scan.h
index 10ab0bc..da6ad44 100644
--- a/src/include/operators/scan.h
+++ b/src/include/operators/scan.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
diff --git a/src/include/operators/scanTDB.h b/src/include/operators/scanTDB.h
index 7f8b6a9..f34c20d 100644
--- a/src/include/operators/scanTDB.h
+++ b/src/include/operators/scanTDB.h
@@ -1,5 +1,4 @@
 #pragma once
-#include "../bufferpool/bufferpool.h"
 #include "../planner/planner.h"
 #include "../executor/executor.h"
 #include "../executor/tuple.h"
diff --git a/src/operators/aggregate.c b/src/operators/aggregate.c
index 395b1d7..1742aa8 100644
--- a/src/operators/aggregate.c
+++ b/src/operators/aggregate.c
@@ -67,7 +67,7 @@ void aggregateGetTuple(Operator* op, Tuple* tpl) {
 
     size_t observations = 0;
     
-    Tuple* tmpTpl = initTupleOfSize(500); // TODO no magic
+    Tuple* tmpTpl = initTupleOfSize(TUPLESIZE);
 
     for (;;) {
         
diff --git a/src/operators/join.c b/src/operators/join.c
index 45332e6..9ccadc2 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -38,7 +38,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     */
     
     if (!op->info.join.rightTuples) {
-        op->info.join.rightTuples = initTupleBuffer(JOINPTRBUFFER, 500); // TODO no magic
+        op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE);
     }
 
     Tuple* rightTuple;
@@ -55,11 +55,6 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
         } 
 
         op->info.join.rightTupleCount++;
-
-        if (op->info.join.rightTupleCount >= JOINPTRBUFFER) {
-            printf("Can't fit the right table in the query into joinbuffer. Increase JOINPTRBUFFER\n");
-            exit(1);
-        }
     }
 
 
@@ -68,7 +63,7 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     //      For each tuple in right relation
     //          if join_predicates(left,right) return tuple(left,right)
 
-    op->info.join.leftTuple = initTupleOfSize(500);
+    op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
 
     if (isTupleEmpty(op->info.join.leftTuple)) {
         op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
diff --git a/src/squel.c b/src/squel.c
index 8469934..a4256a1 100644
--- a/src/squel.c
+++ b/src/squel.c
@@ -2,7 +2,6 @@
 #include "./include/parser/parser.h"
 #include "./include/planner/planner.h"
 #include "./include/io/tdb.h"
-#include "./include/bufferpool/bufferpool.h"
 
 #define METADATABUFFSIZE 10
 

From e6a2927f35792ec6b9fa6229b321aa02cd3447d5 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Mon, 10 Mar 2025 21:20:05 +0200
Subject: [PATCH 15/29] explain and test

---
 src/executor/statements/explain.c | 22 ++++++++++------------
 test/test-explain.bats            | 27 +++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 test/test-explain.bats

diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c
index 01fad91..ccb18f2 100644
--- a/src/executor/statements/explain.c
+++ b/src/executor/statements/explain.c
@@ -33,22 +33,21 @@ void explainOp(Operator* op) {
     if (!op) return;
 
     printOp(op);
-    printf(", size: %ld\n", op->resultDescription.size);
-    if (op->child) {
+    printf("\n");
 
-        if (op->type == OP_FILTER) {
-            explainOp(op->info.filter.next);
-        }
+    if (op->type == OP_FILTER) {
+        explainOp(op->info.filter.next);
+    }
 
-        if (op->type == OP_JOIN) {
-            explainOp(op->info.join.filter);
-            explainOp(op->info.join.left);
-            explainOp(op->info.join.right);
-        }
+    if (op->type == OP_JOIN) {
+        explainOp(op->info.join.filter);
+        explainOp(op->info.join.left);
+        explainOp(op->info.join.right);
+    }
 
+    if (op->child) {
         explainOp(op->child);
     }
-
 }
 
 
@@ -62,7 +61,6 @@ void executeExplain(Node* node) {
     explainOp(queryplan);
     printf("**************************\n");
 
-
     freeQueryplan(queryplan);
 }
 
diff --git a/test/test-explain.bats b/test/test-explain.bats
new file mode 100644
index 0000000..003d530
--- /dev/null
+++ b/test/test-explain.bats
@@ -0,0 +1,27 @@
+
+#!/usr/bin/env bats
+
+setup_file() {
+    run make 
+}
+
+@test "Simple subquery \w WHERE" {
+    run ./build/squel "EXPLAIN SELECT col3 FROM (SELECT col3,col1 FROM './test/data/small.csv') WHERE col3>100"
+    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
+    [[ $"${lines[1]}" == "OP_PROJECT" ]]
+    [[ $"${lines[2]}" == "OP_FILTER" ]]
+    [[ $"${lines[3]}" == "OP_PROJECT" ]]
+    [[ $"${lines[4]}" == "OP_SCAN" ]]
+    [[ $"${lines[5]}" == "**************************" ]]
+}
+
+@test "EXPLAIN a query" {
+    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int"
+    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
+    [[ $"${lines[1]}" == "OP_PROJECT" ]]
+    [[ $"${lines[2]}" == "OP_JOIN" ]]
+    [[ $"${lines[3]}" == "OP_FILTER" ]]
+    [[ $"${lines[4]}" == "OP_SCANTDB" ]]
+    [[ $"${lines[5]}" == "OP_SCANTDB" ]]
+    [[ $"${lines[6]}" == "**************************" ]]
+}

From 18803bdfc65725cd20e9c702f9ee191ff81d2ebc Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Tue, 11 Mar 2025 21:08:11 +0200
Subject: [PATCH 16/29] hashjoin started

---
 Makefile                          |  2 +-
 perf/results/count.csv            |  8 +--
 perf/results/join.csv             | 12 ++--
 src/executor/executor.c           |  7 ++-
 src/executor/statements/explain.c |  7 ++-
 src/include/executor/executor.h   |  1 +
 src/include/operators/hashjoin.h  | 10 ++++
 src/include/operators/join.h      |  3 +-
 src/include/planner/planner.h     |  5 +-
 src/include/util/hashmap.h        | 28 ++++++++++
 src/operators/hashjoin.c          | 92 +++++++++++++++++++++++++++++++
 src/operators/join.c              |  1 +
 src/planner/operators/join.c      | 27 ++++++++-
 src/util/hashmap.c                | 46 ++++++++++++++++
 test/data/animals.csv             |  5 ++
 test/data/fruits.csv              |  5 ++
 test/hashmap_test.c               | 28 ++++++++++
 test/test-explain.bats            | 15 ++++-
 test/test_hashmap.bats            | 14 +++++
 test/test_join_duplicate.bats     | 30 ++++++++++
 20 files changed, 325 insertions(+), 21 deletions(-)
 create mode 100644 src/include/operators/hashjoin.h
 create mode 100644 src/include/util/hashmap.h
 create mode 100644 src/operators/hashjoin.c
 create mode 100644 src/util/hashmap.c
 create mode 100644 test/data/animals.csv
 create mode 100644 test/data/fruits.csv
 create mode 100644 test/hashmap_test.c
 create mode 100644 test/test_hashmap.bats
 create mode 100644 test/test_join_duplicate.bats

diff --git a/Makefile b/Makefile
index dfea7e7..a5aa287 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ $(ODIR)/%.o: $(SRC)%.c
 	$(CC) -g -c $< -o $@  $(CFLAGS)
 
 dirs:
-	mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/bufferpool build/operators
+	mkdir -p data build/parser build/planner/operators build/binder build/io build/executor build/executor/statements build/operators build/util/hashmap
 
 clean:
 	rm -f ./build/squel $(OBJ)
diff --git a/perf/results/count.csv b/perf/results/count.csv
index aaf89ab..b958cfb 100644
--- a/perf/results/count.csv
+++ b/perf/results/count.csv
@@ -1,7 +1,7 @@
 filetype;records;time
-CSV;100000;0:00.05
+CSV;100000;0:00.04
 TDB;100000;0:00.00
-CSV;1000000;0:00.46
+CSV;1000000;0:00.30
 TDB;1000000;0:00.04
-Command terminated by signal 2
-CSV;10000000;0:01.30
+CSV;10000000;0:02.96
+TDB;10000000;0:00.44
diff --git a/perf/results/join.csv b/perf/results/join.csv
index 5d44775..d10b3b1 100644
--- a/perf/results/join.csv
+++ b/perf/results/join.csv
@@ -1,9 +1,5 @@
 filetype;records_left;records_right;time
-CSV;10000;100;0:00.06
-TDB;10000;100;0:00.04
-CSV;10000;1000;0:00.40
-TDB;10000;1000;0:00.39
-CSV;100000;100;0:00.45
-TDB;100000;100;0:00.38
-CSV;100000;1000;0:03.93
-TDB;100000;1000;0:03.80
+CSV;10000;100;0:00.02
+TDB;10000;100;0:00.00
+Command terminated by signal 9
+CSV;10000;1000;0:23.16
diff --git a/src/executor/executor.c b/src/executor/executor.c
index f595b84..053caa8 100644
--- a/src/executor/executor.c
+++ b/src/executor/executor.c
@@ -25,11 +25,14 @@ void assignGetTupleFunction(Operator *op) {
         case (OP_JOIN):
             op->getTuple = &joinGetTuple;
             break;
+        case (OP_HASHJOIN):
+            op->getTuple = &hashjoinGetTuple;
+            break;
         case (OP_AGGREGATE):
             op->getTuple = &aggregateGetTuple;
             break;
         default:
-            printf("Don't know how to handle op-type %d\n", op->type);
+            printf("EXECUTOR-error: Don't know how to handle op-type %d\n", op->type);
             exit(1);
     }
 }
@@ -47,7 +50,7 @@ void doAssignGetTupleFunction(Operator* p_op) {
         doAssignGetTupleFunction(p_op->child);
     }
 
-    if (p_op->type == OP_JOIN) {
+    if (p_op->type == OP_JOIN || p_op->type == OP_HASHJOIN) {
         doAssignGetTupleFunction(p_op->info.join.left);
         doAssignGetTupleFunction(p_op->info.join.right);
     }
diff --git a/src/executor/statements/explain.c b/src/executor/statements/explain.c
index ccb18f2..8314855 100644
--- a/src/executor/statements/explain.c
+++ b/src/executor/statements/explain.c
@@ -21,8 +21,11 @@ void printOp(Operator* op) {
         case OP_AGGREGATE:
             printf("OP_AGGREGATE");
             break;
+        case OP_HASHJOIN:
+            printf("OP_HASHJOIN");
+            break;
         default:
-            printf("Unknown operation type");
+            printf("EXPLAIN-error: Unknown operation type");
             break;
     }
 
@@ -39,7 +42,7 @@ void explainOp(Operator* op) {
         explainOp(op->info.filter.next);
     }
 
-    if (op->type == OP_JOIN) {
+    if (op->type == OP_JOIN || op->type == OP_HASHJOIN) {
         explainOp(op->info.join.filter);
         explainOp(op->info.join.left);
         explainOp(op->info.join.right);
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 08e147d..58d11da 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -5,6 +5,7 @@
 #include "../parser/parsetree.h"
 #include "../planner/planner.h"
 #include "../operators/join.h"
+#include "../operators/hashjoin.h"
 #include "../operators/filter.h"
 #include "../operators/scan.h"
 #include "../operators/scanTDB.h"
diff --git a/src/include/operators/hashjoin.h b/src/include/operators/hashjoin.h
new file mode 100644
index 0000000..24a5d86
--- /dev/null
+++ b/src/include/operators/hashjoin.h
@@ -0,0 +1,10 @@
+#pragma once
+#include "../planner/planner.h"
+#include "../executor/executor.h"
+#include "../executor/tuple.h"
+#include "../executor/tuplebuffer.h"
+#include "./join.h"
+#include "../util/hashmap.h"
+
+
+void hashjoinGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
diff --git a/src/include/operators/join.h b/src/include/operators/join.h
index 8b38e50..2a6fbec 100644
--- a/src/include/operators/join.h
+++ b/src/include/operators/join.h
@@ -5,4 +5,5 @@
 #include "../executor/tuplebuffer.h"
 
 
-void joinGetTuple(Operator* op, Tuple* tpl);
\ No newline at end of file
+void joinGetTuple(Operator* op, Tuple* tpl);
+void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet* left, ResultSet* right);
\ No newline at end of file
diff --git a/src/include/planner/planner.h b/src/include/planner/planner.h
index df3efb2..01f86ea 100644
--- a/src/include/planner/planner.h
+++ b/src/include/planner/planner.h
@@ -8,6 +8,7 @@
 #include "../parser/parsetree.h"
 #include "../executor/tuple.h"
 #include "../executor/tuplebuffer.h"
+#include "../util/hashmap.h"
 
 
 typedef enum {
@@ -16,7 +17,8 @@ typedef enum {
     OP_PROJECT,
     OP_FILTER,
     OP_JOIN,
-    OP_AGGREGATE
+    OP_AGGREGATE,
+    OP_HASHJOIN
 } OperatorType;
 
 typedef enum ComparisonType {
@@ -100,6 +102,7 @@ typedef struct {
     struct Operator* left;
     struct Operator* right;
     struct Operator* filter;
+    Hashmap* hashmap;
     TupleBuffer* rightTuples;
     Tuple* leftTuple;
     size_t rightTupleIdx;
diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
new file mode 100644
index 0000000..e5e74ae
--- /dev/null
+++ b/src/include/util/hashmap.h
@@ -0,0 +1,28 @@
+#pragma once
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+typedef struct {
+    char key[100];
+    size_t value;
+    size_t obs;
+    struct MapNode* next;
+} MapNode;
+
+
+typedef struct  {
+    MapNode* data;
+    size_t table_size;
+} Hashmap;
+
+
+
+Hashmap*    initHashmap(size_t table_size);
+void        insertToHashmap(Hashmap* map, const char* key, size_t value);
+size_t      isInHashmap(Hashmap* map, const char* value);
+void        freeHashmap(Hashmap* map);
+size_t getValueFromHashmap(Hashmap* map, const char* key);
+
+unsigned int hash(const char *key, size_t table_size);
\ No newline at end of file
diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c
new file mode 100644
index 0000000..b12aad1
--- /dev/null
+++ b/src/operators/hashjoin.c
@@ -0,0 +1,92 @@
+#include "../include/operators/hashjoin.h"
+
+
+
+void hashjoinGetTuple(Operator* op, Tuple* tpl) {
+    if (
+        op->info.join.left == NULL ||
+        op->info.join.right == NULL
+        ) {
+        printf("Join left or right operator is NULL\n");
+        exit(1);
+    }
+
+    int joinColIdx     = op->info.join.filter->info.filter.boolExprList[2];
+    int joinColOffset  = op->info.join.right->resultDescription.pCols[joinColIdx];
+    
+    if (!op->info.join.hashmap) {
+        op->info.join.hashmap = initHashmap(1000); // TODO magic
+        op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE);
+    }
+
+
+    Tuple* rightTuple = initTupleOfSize(TUPLESIZE);
+    const char* joinValue;
+
+    // This is only entered first time the operator is called
+    while (!op->info.join.rightTuplesCollected) {
+
+        rightTuple = getTupleFromBuffer(op->info.join.rightTuples);
+
+        op->info.join.right->getTuple(op->info.join.right, rightTuple);
+    
+        if (isTupleEmpty(rightTuple)) {
+            op->info.join.rightTuplesCollected = true;
+            continue; 
+        } 
+        // Get value of join column
+        joinValue = (const char*) getTupleCol(rightTuple, joinColOffset);
+
+        insertToHashmap(op->info.join.hashmap, joinValue, op->info.join.rightTupleCount);
+
+        op->info.join.rightTupleCount++;
+    }
+
+
+
+    // Nested join loop
+    // For each tuple if left relation
+    //      For each tuple in right relation
+    //          if join_predicates(left,right) return tuple(left,right)
+
+    op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
+
+    if (isTupleEmpty(op->info.join.leftTuple)) {
+        op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
+    }
+
+    joinColIdx     = op->info.join.filter->info.filter.boolExprList[0];
+    joinColOffset  = op->info.join.left->resultDescription.pCols[joinColIdx];
+
+
+    do {
+        joinValue = (const char*) getTupleCol(op->info.join.leftTuple, joinColOffset);
+
+        if (!isInHashmap(op->info.join.hashmap, joinValue)) {
+            op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
+            continue;
+        }
+
+        rightTuple = getTupleByIndex(op->info.join.rightTuples, getValueFromHashmap(op->info.join.hashmap, joinValue));
+
+        // Create a new tuple by concating the tuples
+        concatTuples(
+            tpl,
+            op->info.join.leftTuple,
+            rightTuple,
+            &op->info.join.left->resultDescription,
+            &op->info.join.right->resultDescription
+        );
+            
+        return;
+
+    } while (!isTupleEmpty(op->info.join.leftTuple));
+    
+    // Join complete, we can free the buffer and the tuples associated
+    freeTupleBuffer(op->info.join.rightTuples);
+    freeTuple(op->info.join.leftTuple);
+    markTupleAsEmpty(tpl);
+    
+}
+
+
diff --git a/src/operators/join.c b/src/operators/join.c
index 9ccadc2..58247f4 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -104,3 +104,4 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     
 }
 
+
diff --git a/src/planner/operators/join.c b/src/planner/operators/join.c
index 9e7c452..e586650 100644
--- a/src/planner/operators/join.c
+++ b/src/planner/operators/join.c
@@ -40,6 +40,28 @@ Operator* makeJoinFilterOps(
     return filterOps;
 }
 
+OperatorType deduceJoinType(Operator* filterOp) {
+    // Atm we can do a hash join
+    // if and only if:
+    //  - There's only one join condition
+    //  - The condition is an equality comparison
+
+    if (filterOp->info.filter.next) {
+        return OP_JOIN;
+    }
+
+    if (filterOp->info.filter.boolExprListSize < 3) {
+        return OP_JOIN;
+    }
+
+    if (filterOp->info.filter.boolExprList[1] != -1) {
+        return OP_JOIN;
+    }
+
+    return OP_HASHJOIN;
+
+}
+
 
 Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) {
     
@@ -54,7 +76,6 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) {
         Operator* opJoin = (Operator*) calloc(1, sizeof(Operator));
         opJoin->info.join.left     = left;
         opJoin->info.join.right    = right;
-        opJoin->type = OP_JOIN;
         opJoin->info.join.rightTupleCount = 0;
         opJoin->info.join.rightTupleIdx = 0;
         opJoin->info.join.rightTuplesCollected = false;
@@ -94,5 +115,9 @@ Operator* makeJoinOp(Operator* left, Operator* right, Node* ON) {
         Operator* opFilter = makeJoinFilterOps(ON, opJoin, left->resultDescription, right->resultDescription);
         opJoin->info.join.filter = opFilter;
 
+
+        opJoin->type = deduceJoinType(opFilter);
+
+
         return opJoin;
 }
\ No newline at end of file
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
new file mode 100644
index 0000000..1b241a2
--- /dev/null
+++ b/src/util/hashmap.c
@@ -0,0 +1,46 @@
+#include "../include/util/hashmap.h"
+
+
+
+Hashmap* initHashmap(size_t table_size) {
+    Hashmap* map = malloc(sizeof(Hashmap));
+    map->data = calloc(table_size, sizeof(MapNode));
+    map->table_size = table_size;
+    return map;
+}
+
+void insertToHashmap(Hashmap* map, const char* key, size_t value) {
+    unsigned int idx = hash(key, map->table_size);
+    if (map->data[idx].obs == 0) {
+        memcpy(map->data[idx].key, key, strlen(key));
+    }
+    map->data[idx].value = value;
+    map->data[idx].obs++;
+    // TODO handle collisions
+}
+
+size_t isInHashmap(Hashmap* map, const char* key) {
+    unsigned int idx = hash(key, map->table_size);
+    return map->data[idx].obs > 0 ? 1 : 0;
+}
+
+size_t getValueFromHashmap(Hashmap* map, const char* key) {
+    unsigned int idx = hash(key, map->table_size);
+    return map->data[idx].value;
+}
+
+
+
+
+void freeHashmap(Hashmap* map) {
+    free(map->data); // TODO free any adjacent nodes after handling collitions
+    free(map);
+}
+
+unsigned int hash(const char *key, size_t table_size) {
+    unsigned long int hashval = 0;
+    while (*key) {
+        hashval = (hashval << 5) + *key++;
+    }
+    return hashval % table_size;
+}
\ No newline at end of file
diff --git a/test/data/animals.csv b/test/data/animals.csv
new file mode 100644
index 0000000..cff6b78
--- /dev/null
+++ b/test/data/animals.csv
@@ -0,0 +1,5 @@
+animal;size
+monkey;small
+cat;small
+whale;very big
+horse;medium
\ No newline at end of file
diff --git a/test/data/fruits.csv b/test/data/fruits.csv
new file mode 100644
index 0000000..98b24cf
--- /dev/null
+++ b/test/data/fruits.csv
@@ -0,0 +1,5 @@
+fruit;size
+grape;small
+strawberry;small
+watermelon;very big
+orange;medium
\ No newline at end of file
diff --git a/test/hashmap_test.c b/test/hashmap_test.c
new file mode 100644
index 0000000..ec2df1f
--- /dev/null
+++ b/test/hashmap_test.c
@@ -0,0 +1,28 @@
+#include "../src/util/hashmap.c"
+#include <stdio.h>
+
+int main() {
+
+    Hashmap* map = initHashmap(1000);
+
+    insertToHashmap(map, "12345", 1442);
+    insertToHashmap(map, "12346", 2);
+    insertToHashmap(map, "12X46", 3);
+
+    if (isInHashmap(map, "12345")) {
+        printf("12345 in map with value %ld\n", getValueFromHashmap(map, "12345"));
+    }
+
+    if (isInHashmap(map, "12X46")) {
+        printf("12X46 in map\n");
+    }
+
+    if (isInHashmap(map, "123fASFA")) {
+        printf("12X46 in map\n");
+    }
+
+
+    freeHashmap(map);
+
+    return 0;
+}
\ No newline at end of file
diff --git a/test/test-explain.bats b/test/test-explain.bats
index 003d530..0c99167 100644
--- a/test/test-explain.bats
+++ b/test/test-explain.bats
@@ -15,10 +15,23 @@ setup_file() {
     [[ $"${lines[5]}" == "**************************" ]]
 }
 
-@test "EXPLAIN a query" {
+@test "EXPLAIN - hash join" {
     run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int"
     [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
     [[ $"${lines[1]}" == "OP_PROJECT" ]]
+    [[ $"${lines[2]}" == "OP_HASHJOIN" ]]
+    [[ $"${lines[3]}" == "OP_FILTER" ]]
+    [[ $"${lines[4]}" == "OP_SCANTDB" ]]
+    [[ $"${lines[5]}" == "OP_SCANTDB" ]]
+    [[ $"${lines[6]}" == "**************************" ]]
+}
+
+
+
+@test "EXPLAIN - join with nested loop join" {
+    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int"
+    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
+    [[ $"${lines[1]}" == "OP_PROJECT" ]]
     [[ $"${lines[2]}" == "OP_JOIN" ]]
     [[ $"${lines[3]}" == "OP_FILTER" ]]
     [[ $"${lines[4]}" == "OP_SCANTDB" ]]
diff --git a/test/test_hashmap.bats b/test/test_hashmap.bats
new file mode 100644
index 0000000..fd5eb73
--- /dev/null
+++ b/test/test_hashmap.bats
@@ -0,0 +1,14 @@
+
+#!/usr/bin/env bats
+
+setup_file() {
+    run rm ./build/hashmap_test.o
+    run gcc ./test/hashmap_test.c -o ./build/hashmap_test.o
+}
+
+@test "Hashmap functionality" {
+    run ./build/hashmap_test.o
+    [[ $"${lines[0]}" == "12345 in map with value 1442" ]]
+    [[ $"${lines[1]}" == "12X46 in map" ]]
+}
+
diff --git a/test/test_join_duplicate.bats b/test/test_join_duplicate.bats
new file mode 100644
index 0000000..330c652
--- /dev/null
+++ b/test/test_join_duplicate.bats
@@ -0,0 +1,30 @@
+#!/usr/bin/env bats
+
+setup_file() {
+    run make 
+}
+
+@test "Join animals to fruits duplicating rows" {
+    run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/animals.csv' AS a JOIN './test/data/fruits.csv' AS f ON a.size=f.size"
+    [[ $"${lines[0]}" == "size;animal;fruit" ]]
+    [[ $"${lines[1]}" == "small;monkey;grape" ]]
+    [[ $"${lines[2]}" == "small;monkey;strawberry" ]]
+    [[ $"${lines[3]}" == "small;cat;grape" ]]
+    [[ $"${lines[4]}" == "small;cat;strawberry" ]]
+    [[ $"${lines[5]}" == "very big;whale;watermelon" ]]
+    [[ $"${lines[6]}" == "medium;horse;orange" ]]
+
+}
+
+
+@test "Join fruits to animals duplicating rows" {
+    run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/fruits.csv' AS f JOIN './test/data/animals.csv' AS a ON a.size=f.size"
+    [[ $"${lines[0]}" == "size;animal;fruit" ]]
+    [[ $"${lines[1]}" == "small;monkey;grape" ]]
+    [[ $"${lines[2]}" == "small;monkey;strawberry" ]]
+    [[ $"${lines[3]}" == "small;cat;grape" ]]
+    [[ $"${lines[4]}" == "small;cat;strawberry" ]]
+    [[ $"${lines[5]}" == "very big;whale;watermelon" ]]
+    [[ $"${lines[6]}" == "medium;horse;orange" ]]
+
+}

From 442cf93b9c627eb66548041ac6580890c4bde28c Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Tue, 11 Mar 2025 21:38:09 +0200
Subject: [PATCH 17/29] hashjoin sort of works

---
 src/include/util/hashmap.h    |  9 +++++----
 src/operators/hashjoin.c      | 17 +++++++++++++----
 src/operators/join.c          |  4 +++-
 src/util/hashmap.c            | 21 ++++++++++++++++-----
 test/test_join_duplicate.bats |  5 ++---
 5 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index e5e74ae..39d1156 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -5,9 +5,10 @@
 #include <stdio.h>
 
 typedef struct {
-    char key[100];
-    size_t value;
+    char key[10000]; // TODO no magic;
+    size_t values[10000];
     size_t obs;
+    size_t cursor;
     struct MapNode* next;
 } MapNode;
 
@@ -18,11 +19,11 @@ typedef struct  {
 } Hashmap;
 
 
-
 Hashmap*    initHashmap(size_t table_size);
 void        insertToHashmap(Hashmap* map, const char* key, size_t value);
 size_t      isInHashmap(Hashmap* map, const char* value);
 void        freeHashmap(Hashmap* map);
-size_t getValueFromHashmap(Hashmap* map, const char* key);
+size_t      getValueFromHashmap(Hashmap* map, const char* key);
+void        resetCursor(Hashmap* map, const char* key);
 
 unsigned int hash(const char *key, size_t table_size);
\ No newline at end of file
diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c
index b12aad1..5ca7975 100644
--- a/src/operators/hashjoin.c
+++ b/src/operators/hashjoin.c
@@ -12,7 +12,8 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     }
 
     int joinColIdx     = op->info.join.filter->info.filter.boolExprList[2];
-    int joinColOffset  = op->info.join.right->resultDescription.pCols[joinColIdx];
+    int joinColOffset  = op->info.join.filter->resultDescription.pCols[joinColIdx];
+    // int joinColOffset  = op->info.join.right->resultDescription.pCols[joinColIdx];
     
     if (!op->info.join.hashmap) {
         op->info.join.hashmap = initHashmap(1000); // TODO magic
@@ -49,25 +50,33 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     //      For each tuple in right relation
     //          if join_predicates(left,right) return tuple(left,right)
 
-    op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
+    if (op->info.join.leftTuple == NULL) {
+        op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
+    }
 
     if (isTupleEmpty(op->info.join.leftTuple)) {
         op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
     }
 
     joinColIdx     = op->info.join.filter->info.filter.boolExprList[0];
-    joinColOffset  = op->info.join.left->resultDescription.pCols[joinColIdx];
+    joinColOffset  = op->info.join.filter->resultDescription.pCols[joinColIdx];
 
 
+    int tupleIdx;
     do {
         joinValue = (const char*) getTupleCol(op->info.join.leftTuple, joinColOffset);
 
         if (!isInHashmap(op->info.join.hashmap, joinValue)) {
+            resetCursor(op->info.join.hashmap, joinValue);
             op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
             continue;
         }
 
-        rightTuple = getTupleByIndex(op->info.join.rightTuples, getValueFromHashmap(op->info.join.hashmap, joinValue));
+
+        tupleIdx = getValueFromHashmap(op->info.join.hashmap, joinValue);
+        if (tupleIdx < 0) continue;
+
+        rightTuple = getTupleByIndex(op->info.join.rightTuples, tupleIdx);
 
         // Create a new tuple by concating the tuples
         concatTuples(
diff --git a/src/operators/join.c b/src/operators/join.c
index 58247f4..49bc5f0 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -63,7 +63,9 @@ void joinGetTuple(Operator* op, Tuple* tpl) {
     //      For each tuple in right relation
     //          if join_predicates(left,right) return tuple(left,right)
 
-    op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
+    if (op->info.join.leftTuple == NULL) {
+        op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
+    }
 
     if (isTupleEmpty(op->info.join.leftTuple)) {
         op->info.join.left->getTuple(op->info.join.left, op->info.join.leftTuple);
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index 1b241a2..5599c94 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -1,7 +1,5 @@
 #include "../include/util/hashmap.h"
 
-
-
 Hashmap* initHashmap(size_t table_size) {
     Hashmap* map = malloc(sizeof(Hashmap));
     map->data = calloc(table_size, sizeof(MapNode));
@@ -14,23 +12,36 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) {
     if (map->data[idx].obs == 0) {
         memcpy(map->data[idx].key, key, strlen(key));
     }
-    map->data[idx].value = value;
+    map->data[idx].values[map->data[idx].obs] = value;
     map->data[idx].obs++;
+    if (map->data[idx].obs >= 100) {
+        printf("OUT OF BOUNDS\n"); // TODO
+    }
     // TODO handle collisions
 }
 
 size_t isInHashmap(Hashmap* map, const char* key) {
     unsigned int idx = hash(key, map->table_size);
+    if (map->data[idx].cursor == map->data[idx].obs) return 0;
     return map->data[idx].obs > 0 ? 1 : 0;
 }
 
-size_t getValueFromHashmap(Hashmap* map, const char* key) {
+
+void resetCursor(Hashmap* map, const char* key) {
     unsigned int idx = hash(key, map->table_size);
-    return map->data[idx].value;
+    map->data[idx].cursor = 0;
 }
 
+size_t getValueFromHashmap(Hashmap* map, const char* key) {
+    unsigned int idx = hash(key, map->table_size);
 
+    if (map->data[idx].cursor == map->data[idx].obs)  return -1;
 
+    size_t rtrn = map->data[idx].values[map->data[idx].cursor++];
+
+    
+    return rtrn;
+}
 
 void freeHashmap(Hashmap* map) {
     free(map->data); // TODO free any adjacent nodes after handling collitions
diff --git a/test/test_join_duplicate.bats b/test/test_join_duplicate.bats
index 330c652..7bba3cc 100644
--- a/test/test_join_duplicate.bats
+++ b/test/test_join_duplicate.bats
@@ -16,13 +16,12 @@ setup_file() {
 
 }
 
-
 @test "Join fruits to animals duplicating rows" {
     run ./build/squel "SELECT a.size,a.animal,f.fruit FROM './test/data/fruits.csv' AS f JOIN './test/data/animals.csv' AS a ON a.size=f.size"
     [[ $"${lines[0]}" == "size;animal;fruit" ]]
     [[ $"${lines[1]}" == "small;monkey;grape" ]]
-    [[ $"${lines[2]}" == "small;monkey;strawberry" ]]
-    [[ $"${lines[3]}" == "small;cat;grape" ]]
+    [[ $"${lines[2]}" == "small;cat;grape" ]]
+    [[ $"${lines[3]}" == "small;monkey;strawberry" ]]
     [[ $"${lines[4]}" == "small;cat;strawberry" ]]
     [[ $"${lines[5]}" == "very big;whale;watermelon" ]]
     [[ $"${lines[6]}" == "medium;horse;orange" ]]

From ef00fbb9f661180af715637f522f43062f57f8d9 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Tue, 11 Mar 2025 22:16:43 +0200
Subject: [PATCH 18/29] hashmap without collision detection

---
 src/include/util/hashmap.h | 4 ++--
 src/operators/hashjoin.c   | 3 +--
 src/util/hashmap.c         | 6 +++++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index 39d1156..645b3d3 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -5,8 +5,8 @@
 #include <stdio.h>
 
 typedef struct {
-    char key[10000]; // TODO no magic;
-    size_t values[10000];
+    char key[100]; // TODO no magic;
+    size_t values[1000];
     size_t obs;
     size_t cursor;
     struct MapNode* next;
diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c
index 5ca7975..15b99c0 100644
--- a/src/operators/hashjoin.c
+++ b/src/operators/hashjoin.c
@@ -13,10 +13,9 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
 
     int joinColIdx     = op->info.join.filter->info.filter.boolExprList[2];
     int joinColOffset  = op->info.join.filter->resultDescription.pCols[joinColIdx];
-    // int joinColOffset  = op->info.join.right->resultDescription.pCols[joinColIdx];
     
     if (!op->info.join.hashmap) {
-        op->info.join.hashmap = initHashmap(1000); // TODO magic
+        op->info.join.hashmap = initHashmap(300000); // TODO magic
         op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE);
     }
 
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index 5599c94..ec6b1e6 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -3,6 +3,10 @@
 Hashmap* initHashmap(size_t table_size) {
     Hashmap* map = malloc(sizeof(Hashmap));
     map->data = calloc(table_size, sizeof(MapNode));
+    if (map->data == NULL) {
+        printf("Error: unable to reserve %ld bytes\n", (sizeof(MapNode) * table_size) / 1024);
+        exit(1);
+    }
     map->table_size = table_size;
     return map;
 }
@@ -14,7 +18,7 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) {
     }
     map->data[idx].values[map->data[idx].obs] = value;
     map->data[idx].obs++;
-    if (map->data[idx].obs >= 100) {
+    if (map->data[idx].obs >= 10000) {
         printf("OUT OF BOUNDS\n"); // TODO
     }
     // TODO handle collisions

From 0d7d45983357c101e0cd6eec116ce9bdb0f362a8 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Thu, 13 Mar 2025 20:48:01 +0200
Subject: [PATCH 19/29] minor changes

---
 src/executor/tuplebuffer.c | 3 +++
 src/include/util/hashmap.h | 2 +-
 src/util/hashmap.c         | 6 ++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/executor/tuplebuffer.c b/src/executor/tuplebuffer.c
index aa9dd88..74080fa 100644
--- a/src/executor/tuplebuffer.c
+++ b/src/executor/tuplebuffer.c
@@ -19,13 +19,16 @@ void resizeTupleBuffer(TupleBuffer* buff) {
     buff->capacity *= 2;
 
     Tuple* tmpTpl   = realloc(buff->tuples, buff->capacity * sizeof(Tuple));
+
     if (tmpTpl == NULL) {
         printf("ERROR: Could resize tuplebuffer from %ld to %ld\n", buff->capacity, buff->capacity * 2);
         exit(1);
     }
 
     buff->tuples    = tmpTpl;
+
     void* tmpData = realloc(buff->data, buff->capacity * buff->tupledatasize);
+    
     if (tmpData == NULL) {
         printf("ERROR: Could resize tuplebuffer data from %ld to %ld\n", buff->capacity * buff->tupledatasize, buff->capacity * buff->tupledatasize * 2);
         exit(1);
diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index 645b3d3..c2408d5 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -6,7 +6,7 @@
 
 typedef struct {
     char key[100]; // TODO no magic;
-    size_t values[1000];
+    size_t values[10000];
     size_t obs;
     size_t cursor;
     struct MapNode* next;
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index ec6b1e6..67f69ae 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -17,10 +17,12 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) {
         memcpy(map->data[idx].key, key, strlen(key));
     }
     map->data[idx].values[map->data[idx].obs] = value;
-    map->data[idx].obs++;
+    
     if (map->data[idx].obs >= 10000) {
-        printf("OUT OF BOUNDS\n"); // TODO
+        // printf("OUT OF BOUNDS\n"); // TODOs
+        return;
     }
+    map->data[idx].obs++;
     // TODO handle collisions
 }
 

From c5196f5a62c7ef45e5652680161b542c3e705774 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Thu, 13 Mar 2025 21:51:01 +0200
Subject: [PATCH 20/29] fix join memleaks; start collision handling

---
 src/include/util/hashmap.h |  1 +
 src/operators/hashjoin.c   | 12 +++++-------
 src/operators/join.c       |  7 +++----
 src/planner/planner.c      |  2 +-
 src/util/hashmap.c         | 33 +++++++++++++++++++++++++++------
 5 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index c2408d5..6e5420c 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -25,5 +25,6 @@ size_t      isInHashmap(Hashmap* map, const char* value);
 void        freeHashmap(Hashmap* map);
 size_t      getValueFromHashmap(Hashmap* map, const char* key);
 void        resetCursor(Hashmap* map, const char* key);
+void        _tryInsert(Hashmap* map, const char* key, size_t value, MapNode* node);
 
 unsigned int hash(const char *key, size_t table_size);
\ No newline at end of file
diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c
index 15b99c0..2ea4fd7 100644
--- a/src/operators/hashjoin.c
+++ b/src/operators/hashjoin.c
@@ -15,12 +15,12 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     int joinColOffset  = op->info.join.filter->resultDescription.pCols[joinColIdx];
     
     if (!op->info.join.hashmap) {
-        op->info.join.hashmap = initHashmap(300000); // TODO magic
+        op->info.join.hashmap = initHashmap(30000); // TODO magic
         op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE);
     }
 
 
-    Tuple* rightTuple = initTupleOfSize(TUPLESIZE);
+    Tuple* rightTuple;
     const char* joinValue;
 
     // This is only entered first time the operator is called
@@ -43,11 +43,7 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     }
 
 
-
-    // Nested join loop
-    // For each tuple if left relation
-    //      For each tuple in right relation
-    //          if join_predicates(left,right) return tuple(left,right)
+    // Join
 
     if (op->info.join.leftTuple == NULL) {
         op->info.join.leftTuple = initTupleOfSize(TUPLESIZE);
@@ -93,6 +89,8 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     // Join complete, we can free the buffer and the tuples associated
     freeTupleBuffer(op->info.join.rightTuples);
     freeTuple(op->info.join.leftTuple);
+    
+    freeHashmap(op->info.join.hashmap);
     markTupleAsEmpty(tpl);
     
 }
diff --git a/src/operators/join.c b/src/operators/join.c
index 49bc5f0..8e629da 100644
--- a/src/operators/join.c
+++ b/src/operators/join.c
@@ -11,10 +11,9 @@ void concatTuples(Tuple* returnTpl, Tuple* leftTpl, Tuple* rightTpl, ResultSet*
         exit(1);
     }
 
-    void* address = calloc(1, left->size + right->size);
-    memcpy(address, leftTpl->data, left->size);
-    memcpy(address + left->size, rightTpl->data, right->size);
-    returnTpl->data = address;
+
+    memcpy(returnTpl->data, leftTpl->data, left->size);
+    memcpy(returnTpl->data + left->size, rightTpl->data, right->size);
 }
 
 void joinGetTuple(Operator* op, Tuple* tpl) {
diff --git a/src/planner/planner.c b/src/planner/planner.c
index b595a0d..101eeb6 100644
--- a/src/planner/planner.c
+++ b/src/planner/planner.c
@@ -11,7 +11,7 @@ void freeQueryplan(Operator *node) {
     }
 
 
-    if (node->type == OP_JOIN) {
+    if (node->type == OP_JOIN || node->type == OP_JOIN) {
         freeQueryplan(node->info.join.left);
         freeQueryplan(node->info.join.right);
         freeQueryplan(node->info.join.filter);
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index 67f69ae..b28abb7 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -13,17 +13,38 @@ Hashmap* initHashmap(size_t table_size) {
 
 void insertToHashmap(Hashmap* map, const char* key, size_t value) {
     unsigned int idx = hash(key, map->table_size);
-    if (map->data[idx].obs == 0) {
-        memcpy(map->data[idx].key, key, strlen(key));
+
+    MapNode* node = &map->data[idx];
+    _tryInsert(map, key, value, node);
+}
+
+
+
+void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t value, MapNode* node) {
+
+    if (node->obs == 0) {
+
+        memcpy(node->key, key, strlen(key));
+
+    } else {
+
+        // if (strcmp(key, node->key) == 0) {
+        //     printf("Collision\n");
+        //     if (!node->next) {
+        //         node->next = calloc(1, sizeof(MapNode));
+        //     }
+
+        //     _tryInsert(map, key, value, node);
+        //     return;
+        // }
     }
-    map->data[idx].values[map->data[idx].obs] = value;
+    node->values[node->obs] = value;
     
-    if (map->data[idx].obs >= 10000) {
+    if (node->obs >= 10000) {
         // printf("OUT OF BOUNDS\n"); // TODOs
         return;
     }
-    map->data[idx].obs++;
-    // TODO handle collisions
+    node->obs++;
 }
 
 size_t isInHashmap(Hashmap* map, const char* key) {

From 699436746dc3f932c3aa049f50c229af250898a3 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Thu, 13 Mar 2025 22:17:23 +0200
Subject: [PATCH 21/29] actually fix memleak

---
 src/planner/planner.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/planner/planner.c b/src/planner/planner.c
index 101eeb6..1bc1df3 100644
--- a/src/planner/planner.c
+++ b/src/planner/planner.c
@@ -11,7 +11,7 @@ void freeQueryplan(Operator *node) {
     }
 
 
-    if (node->type == OP_JOIN || node->type == OP_JOIN) {
+    if (node->type == OP_JOIN || node->type == OP_HASHJOIN) {
         freeQueryplan(node->info.join.left);
         freeQueryplan(node->info.join.right);
         freeQueryplan(node->info.join.filter);

From 55acfc709e8d86846ba2c6d968bf605f28da41b8 Mon Sep 17 00:00:00 2001
From: toppyy <43851547+toppyy@users.noreply.github.com>
Date: Fri, 14 Mar 2025 21:52:22 +0200
Subject: [PATCH 22/29] cont hashjoin collisions

---
 src/include/util/hashmap.h |  4 +++-
 src/util/hashmap.c         | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index 6e5420c..b62e703 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -4,7 +4,9 @@
 #include <string.h>
 #include <stdio.h>
 
-typedef struct {
+
+
+typedef struct MapNode {
     char key[100]; // TODO no magic;
     size_t values[10000];
     size_t obs;
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index b28abb7..1bdb31b 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -21,6 +21,8 @@ void insertToHashmap(Hashmap* map, const char* key, size_t value) {
 
 
 void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t value, MapNode* node) {
+    
+    if (strlen(key) == 0) return; // Where do these come from?
 
     if (node->obs == 0) {
 
@@ -28,15 +30,15 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va
 
     } else {
 
-        // if (strcmp(key, node->key) == 0) {
-        //     printf("Collision\n");
-        //     if (!node->next) {
-        //         node->next = calloc(1, sizeof(MapNode));
-        //     }
+        if (strcmp(key, node->key) == 0) {
+            printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key));
+            if (!node->next) {
+                node->next = calloc(1, sizeof(MapNode));
+            }
 
-        //     _tryInsert(map, key, value, node);
-        //     return;
-        // }
+            _tryInsert(map, key, value, node);
+            return;
+        }
     }
     node->values[node->obs] = value;
     
@@ -70,8 +72,26 @@ size_t getValueFromHashmap(Hashmap* map, const char* key) {
     return rtrn;
 }
 
+void freeHashMapNode(MapNode* node) {
+    if (!node) return;
+
+    if (node->next) {
+        freeHashMapNode(node->next);
+    }
+    free(node);
+}
+
 void freeHashmap(Hashmap* map) {
-    free(map->data); // TODO free any adjacent nodes after handling collitions
+    MapNode* node;
+    for (size_t i = 0; i < map->table_size; i++) {
+        node = &map->data[i];
+
+        if (node == 0) break;
+
+        freeHashMapNode(node->next); // Only adjacents need to be freed
+        
+    }
+    free(map->data);
     free(map);
 }
 

From ac37de8198a0b77961742f955ae1793db9eb43d1 Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 08:30:46 +0200
Subject: [PATCH 23/29] introduce options

---
 src/include/const.h      |  3 +-
 src/include/squel.h      | 16 ++++++++++
 src/operators/hashjoin.c |  4 +--
 src/squel.c              | 63 +++++++++++++++++++++++++++++++++++-----
 src/util/hashmap.c       |  2 +-
 5 files changed, 77 insertions(+), 11 deletions(-)
 create mode 100644 src/include/squel.h

diff --git a/src/include/const.h b/src/include/const.h
index 625e12d..287f302 100644
--- a/src/include/const.h
+++ b/src/include/const.h
@@ -14,9 +14,10 @@
 #define JOINTUPLESIZE    1000
 #define SCANTUPLESIZE    2000
 
-// Query execution
+// Defaults for query execution
 #define JOINBUFFSIZE    100000
 #define TUPLESIZE       500    
+#define HTSIZE          3000
 
 // Define max size (in chars) of expressions and query
 #define MAXQUERYSIZE 1000
diff --git a/src/include/squel.h b/src/include/squel.h
new file mode 100644
index 0000000..d72a828
--- /dev/null
+++ b/src/include/squel.h
@@ -0,0 +1,16 @@
+#pragma once
+
+
+typedef enum {
+    OPT_HTSIZE
+} Option;
+
+typedef struct Options {
+    size_t htsize;
+} Options;
+
+extern Options* OPTIONS;
+
+Options* initOptions();
+
+size_t getOption(Option opt);
\ No newline at end of file
diff --git a/src/operators/hashjoin.c b/src/operators/hashjoin.c
index 2ea4fd7..eede747 100644
--- a/src/operators/hashjoin.c
+++ b/src/operators/hashjoin.c
@@ -1,5 +1,5 @@
 #include "../include/operators/hashjoin.h"
-
+#include "../include/squel.h"
 
 
 void hashjoinGetTuple(Operator* op, Tuple* tpl) {
@@ -15,7 +15,7 @@ void hashjoinGetTuple(Operator* op, Tuple* tpl) {
     int joinColOffset  = op->info.join.filter->resultDescription.pCols[joinColIdx];
     
     if (!op->info.join.hashmap) {
-        op->info.join.hashmap = initHashmap(30000); // TODO magic
+        op->info.join.hashmap = initHashmap(getOption(OPT_HTSIZE));
         op->info.join.rightTuples = initTupleBuffer(JOINBUFFSIZE, TUPLESIZE);
     }
 
diff --git a/src/squel.c b/src/squel.c
index a4256a1..24990aa 100644
--- a/src/squel.c
+++ b/src/squel.c
@@ -2,11 +2,14 @@
 #include "./include/parser/parser.h"
 #include "./include/planner/planner.h"
 #include "./include/io/tdb.h"
+#include "./include/const.h"
+#include "./include/squel.h"
 
 #define METADATABUFFSIZE 10
 
-
+// Globals :/
 ResultSet* resultDescToPrint = NULL;
+Options* OPTIONS;
 
 void printTree(Node *node) {
 
@@ -49,8 +52,6 @@ void valueToChar(char* target, Tuple* tpl, size_t colOffset, Datatype type) {
 }
 
 
-
-
 void printTuple(Tuple* tpl) {
 
     if (resultDescToPrint == NULL) {
@@ -73,6 +74,26 @@ void printTuple(Tuple* tpl) {
 
 }
 
+Options* initOptions() {
+    OPTIONS = malloc(sizeof(Options));
+    OPTIONS->htsize = HTSIZE;
+    return OPTIONS;
+}
+
+size_t getOption(Option opt) {
+    printf("Getting opt!\n");
+    switch(opt) {
+        case OPT_HTSIZE:
+            return OPTIONS->htsize;
+    }
+
+    printf("getOption: Tried to retrieve an unknown option\n");
+    exit(1);
+}
+
+
+
+
 int main(int argc, char* argv[]) {
 
     if (argc == 1) {
@@ -80,16 +101,42 @@ int main(int argc, char* argv[]) {
         exit(1);
     }
 
-    if (strlen(argv[1]) >= MAXQUERYSIZE) {
+    Options* opts = initOptions();
+
+    size_t query_arg = 1;
+
+    // Loop through the arguments
+    for (int i = 1; i < argc; i++) {
+       
+        if (strcmp(argv[i], "--help") == 0) {
+            printf("Help: See README.md.\n");
+            return 0;
+        } 
+        else if (strcmp(argv[i], "--htsize") == 0) {
+            i++;
+            char*  endptr;
+            size_t htsize = strtoull(argv[i], &endptr, 10);
+
+            if (endptr == argv[i]) {
+                printf("--htsize expects an integer\n");
+                exit(1);
+            }
+
+            opts->htsize = htsize;
+
+            query_arg += 2;
+        }
+    }
+
+
+    if (strlen(argv[query_arg]) >= MAXQUERYSIZE) {
         printf("Error: Query length exceeds maximum.\n");
         exit(1);
     }
 
     /* Allocate memory for parse tree and parse the raw query */
     Node* parsetree = createParsetree();
-    parse(argv[1], parsetree);
-
-    // printTree(parsetree);
+    parse(argv[query_arg], parsetree);
 
     // It's either a SELECT or a STMT
     Operator* queryplan = NULL;
@@ -112,4 +159,6 @@ int main(int argc, char* argv[]) {
         freeQueryplan(queryplan);
     }
 
+    free(opts);
+
 }
\ No newline at end of file
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index 1bdb31b..6a7f90d 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -30,7 +30,7 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va
 
     } else {
 
-        if (strcmp(key, node->key) == 0) {
+        if (strcmp(key, node->key) != 0) {
             printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key));
             if (!node->next) {
                 node->next = calloc(1, sizeof(MapNode));

From 049a1880aae106497c6d8792fa2e08bc047adfa6 Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 08:49:20 +0200
Subject: [PATCH 24/29] handle collisions in hashmap

---
 src/include/util/hashmap.h |  2 ++
 src/squel.c                |  4 ----
 src/util/hashmap.c         | 38 ++++++++++++++++++++++++++++++--------
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/src/include/util/hashmap.h b/src/include/util/hashmap.h
index b62e703..97a3bff 100644
--- a/src/include/util/hashmap.h
+++ b/src/include/util/hashmap.h
@@ -24,8 +24,10 @@ typedef struct  {
 Hashmap*    initHashmap(size_t table_size);
 void        insertToHashmap(Hashmap* map, const char* key, size_t value);
 size_t      isInHashmap(Hashmap* map, const char* value);
+size_t      _isInHashmap(Hashmap* map, MapNode* node, const char* key);
 void        freeHashmap(Hashmap* map);
 size_t      getValueFromHashmap(Hashmap* map, const char* key);
+size_t      _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key);
 void        resetCursor(Hashmap* map, const char* key);
 void        _tryInsert(Hashmap* map, const char* key, size_t value, MapNode* node);
 
diff --git a/src/squel.c b/src/squel.c
index 24990aa..945ad14 100644
--- a/src/squel.c
+++ b/src/squel.c
@@ -81,7 +81,6 @@ Options* initOptions() {
 }
 
 size_t getOption(Option opt) {
-    printf("Getting opt!\n");
     switch(opt) {
         case OPT_HTSIZE:
             return OPTIONS->htsize;
@@ -91,9 +90,6 @@ size_t getOption(Option opt) {
     exit(1);
 }
 
-
-
-
 int main(int argc, char* argv[]) {
 
     if (argc == 1) {
diff --git a/src/util/hashmap.c b/src/util/hashmap.c
index 6a7f90d..6acbd02 100644
--- a/src/util/hashmap.c
+++ b/src/util/hashmap.c
@@ -31,12 +31,11 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va
     } else {
 
         if (strcmp(key, node->key) != 0) {
-            printf("Collision %s (%ld) vs %s (%ld)\n", key, strlen(key), node->key, strlen(key));
             if (!node->next) {
                 node->next = calloc(1, sizeof(MapNode));
             }
 
-            _tryInsert(map, key, value, node);
+            _tryInsert(map, key, value, node->next);
             return;
         }
     }
@@ -49,10 +48,22 @@ void _tryInsert(Hashmap* map __attribute__((unused)), const char* key, size_t va
     node->obs++;
 }
 
+size_t _isInHashmap(Hashmap* map, MapNode* node, const char* key) {
+    if (strcmp(key, node->key) != 0) {
+        if (!node->next) {
+            return 0;
+        }
+        return _isInHashmap(map, node->next, key);
+    }
+    if (node->cursor == node->obs) return 0;
+    return node->obs > 0 ? 1 : 0;
+}
+
+
 size_t isInHashmap(Hashmap* map, const char* key) {
     unsigned int idx = hash(key, map->table_size);
-    if (map->data[idx].cursor == map->data[idx].obs) return 0;
-    return map->data[idx].obs > 0 ? 1 : 0;
+    MapNode* node = &map->data[idx];
+    return _isInHashmap(map, node, key);
 }
 
 
@@ -63,13 +74,24 @@ void resetCursor(Hashmap* map, const char* key) {
 
 size_t getValueFromHashmap(Hashmap* map, const char* key) {
     unsigned int idx = hash(key, map->table_size);
+    MapNode* node = &map->data[idx];
+    return _getValueFromHashmap(map, node, key);
+}
 
-    if (map->data[idx].cursor == map->data[idx].obs)  return -1;
+size_t _getValueFromHashmap(Hashmap* map, MapNode* node, const char* key) {
 
-    size_t rtrn = map->data[idx].values[map->data[idx].cursor++];
+    if (strcmp(key, node->key) != 0) {
+        if (!node->next) {
+            return 0;
+        }
+
+        return _getValueFromHashmap(map, node->next, key);
+    }
+
+    if (node->cursor == node->obs)  return -1;
+
+    return node->values[node->cursor++];
 
-    
-    return rtrn;
 }
 
 void freeHashMapNode(MapNode* node) {

From c17a722275e73673c2132ae626cdf7a71f17a13c Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 08:53:21 +0200
Subject: [PATCH 25/29] hashjoin with small ht-size

---
 test/test_simple_join.bats | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/test_simple_join.bats b/test/test_simple_join.bats
index 57bc7d3..b5dc044 100644
--- a/test/test_simple_join.bats
+++ b/test/test_simple_join.bats
@@ -30,6 +30,11 @@ setup_file() {
     [[ $"${lines[5]}" == "" ]]
 }
 
-
+@test "Hashjoin with small hashtable" {
+    run ./build/squel --htsize 10 "SELECT COUNT(u.unemployed) FROM './test/data/lt_unemployed.csv' AS lt JOIN './test/data/unemployed.csv' AS u ON u.time=lt.time"
+    [[ $"${lines[0]}" == "unemployed" ]]
+    [[ $"${lines[1]}" == "213" ]]
+    [[ $"${lines[2]}" == "" ]]
+}
 
 

From 20e2893de9e4f9a74db4094deb2b078c2e6bf94e Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 08:58:02 +0200
Subject: [PATCH 26/29] update perf-stats

---
 perf/results/count.csv |  6 +++---
 perf/results/join.csv  | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/perf/results/count.csv b/perf/results/count.csv
index b958cfb..5af0628 100644
--- a/perf/results/count.csv
+++ b/perf/results/count.csv
@@ -1,7 +1,7 @@
 filetype;records;time
-CSV;100000;0:00.04
+CSV;100000;0:00.03
 TDB;100000;0:00.00
 CSV;1000000;0:00.30
 TDB;1000000;0:00.04
-CSV;10000000;0:02.96
-TDB;10000000;0:00.44
+CSV;10000000;0:03.36
+TDB;10000000;0:00.36
diff --git a/perf/results/join.csv b/perf/results/join.csv
index d10b3b1..7760768 100644
--- a/perf/results/join.csv
+++ b/perf/results/join.csv
@@ -1,5 +1,9 @@
 filetype;records_left;records_right;time
-CSV;10000;100;0:00.02
-TDB;10000;100;0:00.00
-Command terminated by signal 9
-CSV;10000;1000;0:23.16
+CSV;10000;100;0:00.05
+TDB;10000;100;0:00.05
+CSV;10000;1000;0:00.50
+TDB;10000;1000;0:00.51
+CSV;100000;100;0:00.55
+TDB;100000;100;0:00.50
+CSV;100000;1000;0:05.08
+TDB;100000;1000;0:05.01

From a772d1334fae11b6e4b63f92db15a3bc98ba35b9 Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 10:02:00 +0200
Subject: [PATCH 27/29] add bats submodules

---
 .gitmodules                   |  6 ++++++
 test/bats-core                |  2 +-
 test/test-explain.bats        | 19 +++++++++----------
 test/test_helper/bats-assert  |  1 +
 test/test_helper/bats-support |  1 +
 5 files changed, 18 insertions(+), 11 deletions(-)
 create mode 160000 test/test_helper/bats-assert
 create mode 160000 test/test_helper/bats-support

diff --git a/.gitmodules b/.gitmodules
index 85e69a6..7cda21a 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,9 @@
 [submodule "bats"]
 	path = test/bats-core
 	url = https://github.com/bats-core/bats-core
+[submodule "test/test_helper/bats-assert"]
+	path = test/test_helper/bats-assert
+	url = https://github.com/bats-core/bats-assert.git
+[submodule "test/test_helper/bats-support"]
+	path = test/test_helper/bats-support
+	url = https://github.com/bats-core/bats-support.git
diff --git a/test/bats-core b/test/bats-core
index de96df0..261b029 160000
--- a/test/bats-core
+++ b/test/bats-core
@@ -1 +1 @@
-Subproject commit de96df03197ecc51635463fd9e35e26638191a90
+Subproject commit 261b029f3b3957a154f3e69abcbf19fe3e265c0a
diff --git a/test/test-explain.bats b/test/test-explain.bats
index 0c99167..80d9619 100644
--- a/test/test-explain.bats
+++ b/test/test-explain.bats
@@ -1,18 +1,15 @@
+#!/usr/bin/env bash
 
-#!/usr/bin/env bats
-
-setup_file() {
+setup() {
+    load './test_helper/bats-support/load'
+    load './test_helper/bats-assert/load'
     run make 
 }
 
-@test "Simple subquery \w WHERE" {
+@test "EXPLAIN - subquery \w WHERE" {
     run ./build/squel "EXPLAIN SELECT col3 FROM (SELECT col3,col1 FROM './test/data/small.csv') WHERE col3>100"
-    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
-    [[ $"${lines[1]}" == "OP_PROJECT" ]]
-    [[ $"${lines[2]}" == "OP_FILTER" ]]
-    [[ $"${lines[3]}" == "OP_PROJECT" ]]
-    [[ $"${lines[4]}" == "OP_SCAN" ]]
-    [[ $"${lines[5]}" == "**************************" ]]
+    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_FILTER\nOP_PROJECT\nOP_SCAN\n**************************")
+    assert_output "$expected_output"
 }
 
 @test "EXPLAIN - hash join" {
@@ -38,3 +35,5 @@ setup_file() {
     [[ $"${lines[5]}" == "OP_SCANTDB" ]]
     [[ $"${lines[6]}" == "**************************" ]]
 }
+
+
diff --git a/test/test_helper/bats-assert b/test/test_helper/bats-assert
new file mode 160000
index 0000000..0ec504e
--- /dev/null
+++ b/test/test_helper/bats-assert
@@ -0,0 +1 @@
+Subproject commit 0ec504eb523fd87af924ad77e1221ee4fb8c1596
diff --git a/test/test_helper/bats-support b/test/test_helper/bats-support
new file mode 160000
index 0000000..9bf10e8
--- /dev/null
+++ b/test/test_helper/bats-support
@@ -0,0 +1 @@
+Subproject commit 9bf10e876dd6b624fe44423f0b35e064225f7556

From a73f4c7c2998fbb15a78657ce5a16bf7c1a14862 Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 10:06:01 +0200
Subject: [PATCH 28/29] use assert_output

---
 test/test-explain.bats | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/test/test-explain.bats b/test/test-explain.bats
index 80d9619..996f70d 100644
--- a/test/test-explain.bats
+++ b/test/test-explain.bats
@@ -14,26 +14,15 @@ setup() {
 
 @test "EXPLAIN - hash join" {
     run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int"
-    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
-    [[ $"${lines[1]}" == "OP_PROJECT" ]]
-    [[ $"${lines[2]}" == "OP_HASHJOIN" ]]
-    [[ $"${lines[3]}" == "OP_FILTER" ]]
-    [[ $"${lines[4]}" == "OP_SCANTDB" ]]
-    [[ $"${lines[5]}" == "OP_SCANTDB" ]]
-    [[ $"${lines[6]}" == "**************************" ]]
-}
-
+    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n")
+    assert_output "$expected_output"
 
+}
 
 @test "EXPLAIN - join with nested loop join" {
     run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int"
-    [[ $"${lines[0]}" == "******* EXPLAIN **********" ]]
-    [[ $"${lines[1]}" == "OP_PROJECT" ]]
-    [[ $"${lines[2]}" == "OP_JOIN" ]]
-    [[ $"${lines[3]}" == "OP_FILTER" ]]
-    [[ $"${lines[4]}" == "OP_SCANTDB" ]]
-    [[ $"${lines[5]}" == "OP_SCANTDB" ]]
-    [[ $"${lines[6]}" == "**************************" ]]
+    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n")
+    assert_output "$expected_output"
 }
 
 

From e2bdf66c084196fa3540bcfa7d7060a4c73244ff Mon Sep 17 00:00:00 2001
From: toby <43851547+toppyy@users.noreply.github.com>
Date: Sun, 16 Mar 2025 10:09:10 +0200
Subject: [PATCH 29/29] use csv-files for tests

---
 test/test-explain.bats | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test-explain.bats b/test/test-explain.bats
index 996f70d..3b84ca6 100644
--- a/test/test-explain.bats
+++ b/test/test-explain.bats
@@ -13,15 +13,15 @@ setup() {
 }
 
 @test "EXPLAIN - hash join" {
-    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3=int"
-    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n")
+    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3=int"
+    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_HASHJOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n")
     assert_output "$expected_output"
 
 }
 
 @test "EXPLAIN - join with nested loop join" {
-    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM test_small JOIN test_small2 ON col3>int"
-    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCANTDB\nOP_SCANTDB\n**************************\n")
+    run ./build/squel "EXPLAIN SELECT col1,col3,int FROM './test/data/small.csv' JOIN './test/data/small2.csv' ON col3>int"
+    expected_output=$(printf "******* EXPLAIN **********\nOP_PROJECT\nOP_JOIN\nOP_FILTER\nOP_SCAN\nOP_SCAN\n**************************\n")
     assert_output "$expected_output"
 }