diff --git a/.gitignore b/.gitignore index 44f6f7b..79dd818 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,5 @@ # debug information files *.dwo .cache +build/ +build*/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..9a4ae10 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,25 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Launch bo-sql (lldb)", + "type": "lldb", + "request": "launch", + "program": "${workspaceFolder}/build/cli/bo-sql", + "args": [], + "cwd": "${workspaceFolder}", + "preLaunchTask": "Meson: Build all targets", + "stopOnEntry": false, + "terminal": "integrated" + }, + { + "type": "lldb", + "request": "attach", + "name": "Attach", + "program": "${workspaceFolder}/build/cli/bo-sql" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..373bb54 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "clangd.arguments": [ + "--compile-commands-dir=build-dev", + "--background-index", + "--clang-tidy", + "--header-insertion=iwyu", + "--completion-style=detailed" + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..b9d81f7 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,17 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "meson", + "mode": "build", + "problemMatcher": [ + "$meson-gcc" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "label": "Meson: Build all targets" + } + ] +} \ No newline at end of file diff --git a/catalog/catalog.cpp b/catalog/catalog.cpp new file mode 100644 index 0000000..8696520 --- /dev/null +++ b/catalog/catalog.cpp @@ -0,0 +1,3 @@ +#include "catalog/catalog.h" + +// Catalog implementation - currently header-only, but can be extended \ No newline at end of file diff --git a/catalog/meson.build b/catalog/meson.build deleted file mode 100644 index d44399e..0000000 --- a/catalog/meson.build +++ /dev/null @@ -1 +0,0 @@ -# Catalog module meson.build \ No newline at end of file diff --git a/cli/main.cpp b/cli/main.cpp new file mode 100644 index 0000000..2366534 --- /dev/null +++ b/cli/main.cpp @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include "catalog/catalog.h" +#include "storage/csv_loader.h" +#include "types.h" + +template +void print_info(std::string_view fmt, Args&&... args) { fmt::print("{}\n", fmt::vformat(fmt, fmt::make_format_args(std::forward(args)...))); } + +template +void print_success(std::string_view fmt, Args&&... args) { fmt::print(fg(fmt::color::green), "{}\n", fmt::vformat(fmt, fmt::make_format_args(std::forward(args)...))); } + +template +void print_warning(std::string_view fmt, Args&&... args) { fmt::print(fg(fmt::color::yellow), "{}\n", fmt::vformat(fmt, fmt::make_format_args(std::forward(args)...))); } + +template +void print_error(std::string_view fmt, Args&&... args) { fmt::print(fg(fmt::color::red), "{}\n", fmt::vformat(fmt, fmt::make_format_args(std::forward(args)...))); } + +std::string type_name(TypeId type) { + switch (type) { + case TypeId::INT64: return "INT64"; + case TypeId::DOUBLE: return "DOUBLE"; + case TypeId::STRING: return "STRING"; + case TypeId::DATE32: return "DATE32"; + default: return "UNKNOWN"; + } +} + +int main() { + Catalog catalog; + std::string line; + + fmt::print("bo-sql CLI\n> "); + + while (std::getline(std::cin, line)) { + std::istringstream iss(line); + std::string command; + iss >> command; + + if (command == "LOAD") { + std::string table_keyword, table_name, from_keyword, filename; + iss >> table_keyword >> table_name >> from_keyword >> filename; + if (table_keyword != "TABLE" || from_keyword != "FROM") { + print_warning("Syntax: LOAD TABLE FROM 'file.csv'"); + } else { + // Remove quotes from filename + if (!filename.empty() && filename.front() == '\'' && filename.back() == '\'') { + filename = filename.substr(1, filename.size() - 2); + } + try { + std::pair result = load_csv(filename); + result.first.name = table_name; + result.second.name = table_name; + catalog.register_table(std::move(result.second)); + + print_success("Loaded table '{}' with {} rows", table_name, result.second.row_count); + } catch (const std::exception& e) { + print_error("Error loading CSV: {}", e.what()); + } + } + } else if (command == "SHOW") { + std::string tables_keyword; + iss >> tables_keyword; + if (tables_keyword == "TABLES") { + auto tables = catalog.list_tables(); + if (tables.empty()) { + print_info("No tables loaded"); + } else { + for (const auto& table : tables) { + fmt::print("{}\n", table); + } + } + } else { + print_warning("Unknown command"); + } + } else if (command == "DESCRIBE") { + std::string table_name; + iss >> table_name; + const TableMeta* meta = catalog.get_table(table_name); + if (!meta) { + print_error("Table '{}' not found", table_name); + } else { + fmt::print("Table: {} ({} rows)\n", meta->name, meta->row_count); + fmt::print("Columns:\n"); + for (const auto& col : meta->columns) { + fmt::print(" {} {} (ndv: {}", col.name, type_name(col.type), col.stats.ndv); + if (col.type == TypeId::INT64) { + fmt::print(", min: {}, max: {}", col.stats.min_i64, col.stats.max_i64); + } else if (col.type == TypeId::DOUBLE) { + fmt::print(", min: {}, max: {}", col.stats.min_f64, col.stats.max_f64); + } else if (col.type == TypeId::DATE32) { + fmt::print(", min: {}, max: {}", col.stats.min_date, col.stats.max_date); + } + fmt::print(")\n"); + } + } + } else if (command == "EXIT" || command == "QUIT") { + break; + } else { + print_warning("Unknown command. Available: LOAD TABLE, SHOW TABLES, DESCRIBE , EXIT"); + } + + fmt::print("> "); + } + + return 0; +} \ No newline at end of file diff --git a/cli/meson.build b/cli/meson.build index 5d81ab1..310fb85 100644 --- a/cli/meson.build +++ b/cli/meson.build @@ -1 +1,10 @@ -# CLI module meson.build \ No newline at end of file +# CLI module meson.build + +cli_sources = files('main.cpp') + +cli_exe = executable('bo-sql', + sources: cli_sources, + include_directories: inc, + link_with: libcore, + dependencies: [dependency('fmt')] +) \ No newline at end of file diff --git a/engine/meson.build b/engine/meson.build deleted file mode 100644 index 911daba..0000000 --- a/engine/meson.build +++ /dev/null @@ -1 +0,0 @@ -# Engine module meson.build \ No newline at end of file diff --git a/exec/meson.build b/exec/meson.build deleted file mode 100644 index af97e68..0000000 --- a/exec/meson.build +++ /dev/null @@ -1 +0,0 @@ -# Exec module meson.build \ No newline at end of file diff --git a/include/catalog/catalog.h b/include/catalog/catalog.h new file mode 100644 index 0000000..3328fc8 --- /dev/null +++ b/include/catalog/catalog.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "types.h" + +// Column statistics +struct ColumnStats { + i64 min_i64 = 0, max_i64 = 0; + f64 min_f64 = 0.0, max_f64 = 0.0; + Date32 min_date = 0, max_date = 0; + size_t ndv = 0; +}; + +// Column metadata with statistics +struct ColumnMeta { + std::string name; + TypeId type; + ColumnStats stats; + + ColumnMeta(std::string n, TypeId t, size_t ndv = 0) + : name(std::move(n)), type(t) { stats.ndv = ndv; } +}; + +// Table metadata +struct TableMeta { + std::string name; + std::vector columns; + size_t row_count; + // Additional stats could go here if needed + + TableMeta() = default; + TableMeta(std::string n, std::vector cols, size_t rows) + : name(std::move(n)), columns(std::move(cols)), row_count(rows) {} +}; + +// Catalog for managing table metadata +class Catalog { +private: + std::unordered_map tables_; + +public: + // Register a table in the catalog + void register_table(TableMeta table_meta) { + tables_[table_meta.name] = std::move(table_meta); + } + + // Get table metadata by name + const TableMeta* get_table(const std::string& name) const { + auto it = tables_.find(name); + return it != tables_.end() ? &it->second : nullptr; + } + + // List all table names + std::vector list_tables() const { + std::vector names; + names.reserve(tables_.size()); + for (std::unordered_map::const_iterator it = tables_.begin(); it != tables_.end(); ++it) { + names.push_back(it->first); + } + return names; + } +}; \ No newline at end of file diff --git a/parser/ast.h b/include/parser/ast.h similarity index 73% rename from parser/ast.h rename to include/parser/ast.h index 0311486..6f812a6 100644 --- a/parser/ast.h +++ b/include/parser/ast.h @@ -4,8 +4,9 @@ #include #include #include -#include "../include/types.h" +#include "types.h" +// Types of expressions in the AST enum class ExprType { COLUMN_REF, LITERAL_INT, @@ -14,36 +15,46 @@ enum class ExprType { BINARY_OP }; +// Binary operators enum class BinaryOp { EQ, NE, LT, LE, GT, GE, ADD, SUB, MUL, DIV }; +// Base expression node struct Expr { ExprType type; - std::variant value; // for literals + // For literals: use separate fields since no variant + std::string str_val; + i64 i64_val; + f64 f64_val; BinaryOp op; // for binary std::unique_ptr left, right; // for binary }; +// Item in SELECT list with optional alias struct SelectItem { std::string alias; std::unique_ptr expr; }; +// Aggregate functions enum class AggFunc { NONE, SUM, COUNT, AVG }; +// GROUP BY clause struct GroupByClause { std::vector > columns; }; +// Item in ORDER BY clause struct OrderByItem { std::unique_ptr expr; bool asc = true; }; +// SELECT statement AST node struct SelectStmt { std::vector select_list; std::string from_table; diff --git a/include/storage/csv_loader.h b/include/storage/csv_loader.h new file mode 100644 index 0000000..b11fdc3 --- /dev/null +++ b/include/storage/csv_loader.h @@ -0,0 +1,179 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "types.h" +#include "storage/table.h" +#include "catalog/catalog.h" + +std::pair load_csv(const std::string& filename) { + std::ifstream file(filename); + if (!file.is_open()) { + throw std::runtime_error("Cannot open file: " + filename); + } + + Table table; + table.dict = std::make_shared(); + std::vector column_metas; + + std::string line; + std::vector headers; + std::vector > rows; + + // Read headers + if (std::getline(file, line)) { + std::stringstream ss(line); + std::string token; + while (std::getline(ss, token, ',')) { + headers.push_back(token); + } + } + + // Read data rows + while (std::getline(file, line)) { + std::stringstream ss(line); + std::string token; + std::vector row; + while (std::getline(ss, token, ',')) { + row.push_back(token); + } + if (row.size() != headers.size()) { + throw std::runtime_error("Row size mismatch"); + } + rows.push_back(row); + } + + // Infer types and create columns + size_t num_rows = rows.size(); + for (size_t col = 0; col < headers.size(); ++col) { + std::string col_name = headers[col]; + TableColumn column; + column.name = col_name; + ColumnMeta meta(col_name, TypeId::STRING); // default to string + + // Check if all values are date (8 digits) + bool all_date = true; + Date32 min_date = std::numeric_limits::max(); + Date32 max_date = std::numeric_limits::min(); + for (const auto& row : rows) { + if (row[col].size() != 8) { + all_date = false; + break; + } + try { + Date32 d = std::stoi(row[col]); + if (d < 19000000 || d > 21000000) all_date = false; + min_date = std::min(min_date, d); + max_date = std::max(max_date, d); + } catch (...) { + all_date = false; + break; + } + } + if (all_date && !rows.empty()) { + std::vector data; + data.reserve(num_rows); + for (const auto& row : rows) { + data.push_back(std::stoi(row[col])); + } + std::set uniques(data.begin(), data.end()); + column.data.reset(new ColumnVector(std::move(data))); + meta.type = TypeId::DATE32; + meta.stats.min_date = min_date; + meta.stats.max_date = max_date; + meta.stats.ndv = uniques.size(); + table.columns.push_back(std::move(column)); + column_metas.push_back(std::move(meta)); + continue; + } + + // Check if all values are i64 + bool all_i64 = true; + i64 min_i64 = std::numeric_limits::max(); + i64 max_i64 = std::numeric_limits::min(); + for (const auto& row : rows) { + try { + f64 val = std::stod(row[col]); + if (val != std::floor(val) || val < std::numeric_limits::min() || val > std::numeric_limits::max()) { + all_i64 = false; + break; + } + i64 ival = static_cast(val); + min_i64 = std::min(min_i64, ival); + max_i64 = std::max(max_i64, ival); + } catch (...) { + all_i64 = false; + break; + } + } + if (all_i64 && !rows.empty()) { + std::vector data; + data.reserve(num_rows); + for (const auto& row : rows) { + data.push_back(static_cast(std::stod(row[col]))); + } + std::set uniques(data.begin(), data.end()); + column.data.reset(new ColumnVector(std::move(data))); + meta.type = TypeId::INT64; + meta.stats.min_i64 = min_i64; + meta.stats.max_i64 = max_i64; + meta.stats.ndv = uniques.size(); + table.columns.push_back(std::move(column)); + column_metas.push_back(std::move(meta)); + continue; + } + + // Check if all values are f64 + bool all_f64 = true; + f64 min_f64 = std::numeric_limits::max(); + f64 max_f64 = std::numeric_limits::lowest(); + for (const auto& row : rows) { + try { + f64 val = std::stod(row[col]); + min_f64 = std::min(min_f64, val); + max_f64 = std::max(max_f64, val); + } catch (...) { + all_f64 = false; + break; + } + } + if (all_f64 && !rows.empty()) { + std::vector data; + data.reserve(num_rows); + for (const auto& row : rows) { + data.push_back(std::stod(row[col])); + } + std::set uniques(data.begin(), data.end()); + column.data.reset(new ColumnVector(std::move(data))); + meta.type = TypeId::DOUBLE; + meta.stats.min_f64 = min_f64; + meta.stats.max_f64 = max_f64; + meta.stats.ndv = uniques.size(); + table.columns.push_back(std::move(column)); + column_metas.push_back(std::move(meta)); + continue; + } + + // Else, string + std::vector data; + data.reserve(num_rows); + for (const auto& row : rows) { + data.push_back(table.dict->get_or_add(row[col])); + } + std::set uniques(data.begin(), data.end()); + column.data.reset(new ColumnVector(std::move(data))); + meta.stats.ndv = uniques.size(); // NDV for strings + table.columns.push_back(std::move(column)); + column_metas.push_back(std::move(meta)); + } + + TableMeta table_meta("", std::move(column_metas), num_rows); + return std::make_pair(std::move(table), std::move(table_meta)); +} \ No newline at end of file diff --git a/storage/dictionary.h b/include/storage/dictionary.h similarity index 86% rename from storage/dictionary.h rename to include/storage/dictionary.h index 42717a8..b957a74 100644 --- a/storage/dictionary.h +++ b/include/storage/dictionary.h @@ -3,8 +3,9 @@ #include #include #include -#include "../include/types.h" +#include "types.h" +// Dictionary for encoding strings to IDs and vice versa class Dictionary { public: std::vector strings; diff --git a/storage/table.h b/include/storage/table.h similarity index 59% rename from storage/table.h rename to include/storage/table.h index af4f65a..eb90ea6 100644 --- a/storage/table.h +++ b/include/storage/table.h @@ -5,19 +5,21 @@ #include #include #include -#include "../include/types.h" -#include "dictionary.h" +#include "types.h" +#include "storage/dictionary.h" -using ColumnData = std::variant, ColumnVector, ColumnVector, ColumnVector >; +// Removed variant, using unique_ptr instead -struct Column { +// Represents a column in a table with name and data +struct TableColumn { std::string name; - ColumnData data; + std::unique_ptr data; }; +// Represents a table with columns and a shared dictionary for strings struct Table { std::string name; - std::vector columns; + std::vector columns; std::shared_ptr dict; // Helper to get column index by name @@ -29,7 +31,7 @@ struct Table { } // Get column data - const ColumnData& get_column_data(const std::string& col_name) const { + const std::unique_ptr& get_column_data(const std::string& col_name) const { return columns[get_column_index(col_name)].data; } }; \ No newline at end of file diff --git a/include/types.h b/include/types.h index e58f821..99a3bfe 100644 --- a/include/types.h +++ b/include/types.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include using i64 = int64_t; @@ -8,6 +10,7 @@ using f64 = double; using StrId = uint32_t; // Dictionary-encoded string ID using Date32 = int32_t; // YYYYMMDD format +// Enumeration of supported data types enum class TypeId { INT64, DOUBLE, STRING, DATE32 }; // Datum union for type-safe value storage @@ -18,6 +21,7 @@ union DatumValue { Date32 date32_val; }; +// Type-safe wrapper for a single value with its type struct Datum { TypeId type; DatumValue value; @@ -91,9 +95,12 @@ TypeId type_id_for() { return TypeId::DOUBLE; } template<> TypeId type_id_for() { return TypeId::DATE32; } +template<> +TypeId type_id_for() { return TypeId::STRING; } + // Base class struct Column { - virtual ~Column() = default; + virtual ~Column() {} virtual TypeId type() const = 0; virtual size_t size() const = 0; }; @@ -104,6 +111,7 @@ struct ColumnVector : public Column { std::vector data; explicit ColumnVector(size_t reserve = 0) { data.reserve(reserve); } + explicit ColumnVector(std::vector d) : data(std::move(d)) {} TypeId type() const override { return type_id_for(); } size_t size() const override { return data.size(); } @@ -114,7 +122,7 @@ struct ColumnVector : public Column { // RecordBatch abstraction struct RecordBatch { std::vector schema; - std::vector> columns; + std::vector > columns; RecordBatch(std::vector s) : schema(std::move(s)) { columns.reserve(schema.size()); @@ -130,7 +138,7 @@ struct RecordBatch { // Add a column to the batch template - void add_column(std::unique_ptr> col) { + void add_column(std::unique_ptr > col) { columns.push_back(std::move(col)); } diff --git a/meson.build b/meson.build index fd27f89..6bdca2d 100644 --- a/meson.build +++ b/meson.build @@ -1,17 +1,22 @@ project('bo-sql', 'cpp', - version : '0.1', - default_options : ['cpp_std=c++20'] + version : '0.1', + default_options : ['cpp_std=c++20'] ) # Include directories inc = include_directories('include') -# Subdirectories -subdir('engine') -subdir('parser') -subdir('planner') -subdir('exec') -subdir('storage') -subdir('catalog') -subdir('cli') +# Core library +core_sources = files( + 'storage/dictionary.cpp', + 'storage/table.cpp', + 'catalog/catalog.cpp' +) + +libcore = static_library('core', + sources: core_sources, + include_directories: inc +) + + subdir('tests') \ No newline at end of file diff --git a/parser/parser.h b/parser/parser.h index fc23597..b7ff3f2 100644 --- a/parser/parser.h +++ b/parser/parser.h @@ -4,7 +4,7 @@ #include #include #include -#include "ast.h" +#include "parser/ast.h" enum class TokenType { SELECT, FROM, WHERE, INNER, JOIN, ON, GROUP, BY, ORDER, ASC, DESC, LIMIT, diff --git a/planner/meson.build b/planner/meson.build deleted file mode 100644 index 857dd6f..0000000 --- a/planner/meson.build +++ /dev/null @@ -1 +0,0 @@ -# Planner module meson.build \ No newline at end of file diff --git a/storage/csv_loader.h b/storage/csv_loader.h deleted file mode 100644 index b556586..0000000 --- a/storage/csv_loader.h +++ /dev/null @@ -1,134 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include "../include/types.h" -#include "table.h" - -Table load_csv(const std::string& filename) { - std::ifstream file(filename); - if (!file.is_open()) { - throw std::runtime_error("Cannot open file: " + filename); - } - - Table table; - table.dict = std::make_shared(); - - std::string line; - std::vector headers; - std::vector> rows; - - // Read headers - if (std::getline(file, line)) { - std::stringstream ss(line); - std::string token; - while (std::getline(ss, token, ',')) { - headers.push_back(token); - } - } - - // Read data rows - while (std::getline(file, line)) { - std::stringstream ss(line); - std::string token; - std::vector row; - while (std::getline(ss, token, ',')) { - row.push_back(token); - } - if (row.size() != headers.size()) { - throw std::runtime_error("Row size mismatch"); - } - rows.push_back(row); - } - - // Infer types and create columns - size_t num_rows = rows.size(); - for (size_t col = 0; col < headers.size(); ++col) { - std::string col_name = headers[col]; - Column column; - column.name = col_name; - - // Check if all values are i64 - bool all_i64 = true; - for (const auto& row : rows) { - try { - std::stoll(row[col]); - } catch (...) { - all_i64 = false; - break; - } - } - if (all_i64 && !rows.empty()) { - ColumnVector vec; - vec.data.reserve(num_rows); - for (const auto& row : rows) { - vec.data.push_back(std::stoll(row[col])); - } - column.data = std::move(vec); - table.columns.push_back(std::move(column)); - continue; - } - - // Check if all values are f64 - bool all_f64 = true; - for (const auto& row : rows) { - try { - std::stod(row[col]); - } catch (...) { - all_f64 = false; - break; - } - } - if (all_f64 && !rows.empty()) { - ColumnVector vec; - vec.data.reserve(num_rows); - for (const auto& row : rows) { - vec.data.push_back(std::stod(row[col])); - } - column.data = std::move(vec); - table.columns.push_back(std::move(column)); - continue; - } - - // Check if all values are date (8 digits) - bool all_date = true; - for (const auto& row : rows) { - if (row[col].size() != 8) { - all_date = false; - break; - } - try { - int d = std::stoi(row[col]); - if (d < 19000000 || d > 21000000) all_date = false; - } catch (...) { - all_date = false; - break; - } - } - if (all_date && !rows.empty()) { - ColumnVector vec; - vec.data.reserve(num_rows); - for (const auto& row : rows) { - vec.data.push_back(std::stoi(row[col])); - } - column.data = std::move(vec); - table.columns.push_back(std::move(column)); - continue; - } - - // Else, string - ColumnVector vec; - vec.data.reserve(num_rows); - for (const auto& row : rows) { - vec.data.push_back(table.dict->get_or_add(row[col])); - } - column.data = std::move(vec); - table.columns.push_back(std::move(column)); - } - - return table; -} \ No newline at end of file diff --git a/storage/meson.build b/storage/meson.build deleted file mode 100644 index 33bbe33..0000000 --- a/storage/meson.build +++ /dev/null @@ -1,7 +0,0 @@ -storage_inc = include_directories('.') - -storage_lib = static_library('storage', - 'dictionary.cpp', - 'table.cpp', - include_directories : [inc, storage_inc] -) \ No newline at end of file diff --git a/subprojects/fmt.wrap b/subprojects/fmt.wrap new file mode 100644 index 0000000..8f0a27e --- /dev/null +++ b/subprojects/fmt.wrap @@ -0,0 +1,13 @@ +[wrap-file] +directory = fmt-11.2.0 +source_url = https://github.com/fmtlib/fmt/archive/11.2.0.tar.gz +source_filename = fmt-11.2.0.tar.gz +source_hash = bc23066d87ab3168f27cef3e97d545fa63314f5c79df5ea444d41d56f962c6af +patch_filename = fmt_11.2.0-2_patch.zip +patch_url = https://wrapdb.mesonbuild.com/v2/fmt_11.2.0-2/get_patch +patch_hash = cc555cbfc9e334d5b670763894586ad6fbaf7f85eb5e67221cfe519b919c6542 +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/fmt_11.2.0-2/fmt-11.2.0.tar.gz +wrapdb_version = 11.2.0-2 + +[provide] +dependency_names = fmt \ No newline at end of file diff --git a/test.csv b/test.csv new file mode 100644 index 0000000..ce35876 --- /dev/null +++ b/test.csv @@ -0,0 +1,4 @@ +id,name,value +1,Alice,100 +2,Bob,200 +3,Charlie,300 \ No newline at end of file diff --git a/tests/meson.build b/tests/meson.build index 232942a..cf167c9 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -1,9 +1,11 @@ catch2_dep = dependency('catch2-with-main') -test_exe = executable('tests', - 'test_main.cpp', # Assuming a test file exists - dependencies : [catch2_dep], - include_directories : inc +tests_sources = files('test_main.cpp') +tests_exe = executable('tests', + sources: tests_sources, + include_directories: inc, + link_with: libcore, + dependencies: [catch2_dep] ) -test('unit tests', test_exe) \ No newline at end of file +test('unit tests', tests_exe) \ No newline at end of file diff --git a/tests/test_main.cpp b/tests/test_main.cpp index 0a768b5..af56e90 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -1,4 +1,8 @@ -#include +#define CATCH_CONFIG_MAIN +#include +#include "storage/csv_loader.h" +#include "catalog/catalog.h" +#include #include "types.h" TEST_CASE("Basic test", "[basic]") { @@ -7,10 +11,10 @@ TEST_CASE("Basic test", "[basic]") { TEST_CASE("ColumnVector smoke test", "[columnar]") { // Instantiate ColumnVector - ColumnVector col(10); + ColumnVector col(10); // Fill with values - for (int64_t i = 0; i < 5; ++i) { + for (i64 i = 0; i < 5; ++i) { col.append(i * 10); } @@ -30,22 +34,22 @@ TEST_CASE("ColumnVector smoke test", "[columnar]") { TEST_CASE("Datum union test", "[types]") { // Test int64_t datum - auto d1 = Datum::from_i64(42); + Datum d1 = Datum::from_i64(42); REQUIRE(d1.type == TypeId::INT64); REQUIRE(d1.as_i64() == 42); // Test double datum - auto d2 = Datum::from_f64(3.14); + Datum d2 = Datum::from_f64(3.14); REQUIRE(d2.type == TypeId::DOUBLE); REQUIRE(d2.as_f64() == 3.14); // Test string ID datum - auto d3 = Datum::from_str(123); + Datum d3 = Datum::from_str(123); REQUIRE(d3.type == TypeId::STRING); REQUIRE(d3.as_str() == 123); // Test date32 datum - auto d4 = Datum::from_date32(20231225); + Datum d4 = Datum::from_date32(20231225); REQUIRE(d4.type == TypeId::DATE32); REQUIRE(d4.as_date32() == 20231225); @@ -83,12 +87,12 @@ TEST_CASE("RecordBatch test", "[columnar]") { REQUIRE(batch.num_rows() == 0); // Add columns - auto col1 = std::make_unique>(); + std::unique_ptr > col1(new ColumnVector()); col1->append(1); col1->append(2); col1->append(3); - auto col2 = std::make_unique>(); + std::unique_ptr > col2(new ColumnVector()); col2->append(1.1); col2->append(2.2); col2->append(3.3); @@ -101,8 +105,8 @@ TEST_CASE("RecordBatch test", "[columnar]") { REQUIRE(batch.num_rows() == 3); // Check column access - auto* c1 = dynamic_cast*>(batch.get_column(0)); - auto* c2 = dynamic_cast*>(batch.get_column(1)); + ColumnVector* c1 = dynamic_cast*>(batch.get_column(0)); + ColumnVector* c2 = dynamic_cast*>(batch.get_column(1)); REQUIRE(c1 != nullptr); REQUIRE(c2 != nullptr); @@ -112,4 +116,92 @@ TEST_CASE("RecordBatch test", "[columnar]") { // Check schema access REQUIRE(batch.get_column_type(0).name == "id"); REQUIRE(batch.get_column_type(1).type_id == TypeId::DOUBLE); +} + +TEST_CASE("CSV load test", "[csv]") { + // Create a temporary CSV file + std::ofstream csv_file("test_load.csv"); + csv_file << "id,name,value\n"; + csv_file << "1,Alice,100.5\n"; + csv_file << "2,Bob,200.25\n"; + csv_file << "3,Charlie,300.75\n"; + csv_file.close(); + + // Load CSV + std::pair result = load_csv("test_load.csv"); + Table& table = result.first; + TableMeta& meta = result.second; + + // Check table metadata + REQUIRE(meta.name.empty()); // Not set yet + REQUIRE(meta.row_count == 3); + REQUIRE(meta.columns.size() == 3); + + // Check columns + REQUIRE(meta.columns[0].name == "id"); + REQUIRE(meta.columns[0].type == TypeId::INT64); + REQUIRE(meta.columns[0].stats.min_i64 == 1); + REQUIRE(meta.columns[0].stats.max_i64 == 3); + + REQUIRE(meta.columns[1].name == "name"); + REQUIRE(meta.columns[1].type == TypeId::STRING); + REQUIRE(meta.columns[1].stats.ndv == 3); + + REQUIRE(meta.columns[2].name == "value"); + REQUIRE(meta.columns[2].type == TypeId::DOUBLE); + REQUIRE(meta.columns[2].stats.min_f64 == 100.5); + REQUIRE(meta.columns[2].stats.max_f64 == 300.75); + + // Check table data + REQUIRE(table.columns.size() == 3); + REQUIRE(table.columns[0].name == "id"); + REQUIRE(table.columns[1].name == "name"); + REQUIRE(table.columns[2].name == "value"); + + // Check dictionary + REQUIRE(table.dict->get(0) == "Alice"); + REQUIRE(table.dict->get(1) == "Bob"); + REQUIRE(table.dict->get(2) == "Charlie"); + + // Clean up + std::remove("test_load.csv"); +} + +TEST_CASE("Catalog roundtrip test", "[catalog]") { + // Create a temporary CSV file + std::ofstream csv_file("test_catalog.csv"); + csv_file << "id,value\n"; + csv_file << "10,1.1\n"; + csv_file << "20,2.2\n"; + csv_file.close(); + + // Load and register + std::pair result2 = load_csv("test_catalog.csv"); + Table& table = result2.first; + TableMeta& meta = result2.second; + table.name = "mytable"; + meta.name = "mytable"; + + Catalog catalog; + catalog.register_table(std::move(meta)); + + // Retrieve from catalog + const TableMeta* retrieved = catalog.get_table("mytable"); + REQUIRE(retrieved != nullptr); + REQUIRE(retrieved->name == "mytable"); + REQUIRE(retrieved->row_count == 2); + REQUIRE(retrieved->columns.size() == 2); + + REQUIRE(retrieved->columns[0].name == "id"); + REQUIRE(retrieved->columns[0].type == TypeId::INT64); + REQUIRE(retrieved->columns[1].name == "value"); + REQUIRE(retrieved->columns[1].type == TypeId::DOUBLE); + + // Check table list + auto tables = catalog.list_tables(); + REQUIRE(tables.size() == 1); + REQUIRE(tables[0] == "mytable"); + + // Clean up + std::remove("test_catalog.csv"); } \ No newline at end of file