From 1b86a66739b42cbb560e66425af30435ea1aac7a Mon Sep 17 00:00:00 2001 From: IamNailong Date: Wed, 29 Oct 2025 19:58:30 +0800 Subject: [PATCH 1/5] Test --- .gitignore | 4 +- CMakeLists.txt | 11 ++++ README | 2 +- include/chibcc.h | 136 +++++++++++++++++++++++++++++++++++++++++++++++ main.cpp | 11 ++++ 5 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 include/chibcc.h create mode 100644 main.cpp diff --git a/.gitignore b/.gitignore index 4cef982..eb7be4d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -build -.cache +build +.cache install \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1df2169 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.22) +project(chibcpp VERSION 0.1 + LANGUAGES CXX) + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +set(CMAKE_RUNTIME_OUTPUT_LIB_DIRECTORY "${CMAKE_BINARY_DIR}/lib") + +add_executable(chibcpp + main.cpp +) \ No newline at end of file diff --git a/README b/README index ae4184c..1b6ac03 100644 --- a/README +++ b/README @@ -1 +1 @@ -## A CPP implementation for [chibcc](https://github.com/rui314/chibicc.git) \ No newline at end of file +## A CPP implementation for (chibcc)[https://github.com/rui314/chibicc.git] \ No newline at end of file diff --git a/include/chibcc.h b/include/chibcc.h new file mode 100644 index 0000000..6f9d982 --- /dev/null +++ b/include/chibcc.h @@ -0,0 +1,136 @@ +#include +#include +#include +#include +#include +#include +#include + +// +// tokenize.c +// + +typedef enum { + TK_PUNCT, // Keywords or punctuators + TK_NUM, // Numeric literals + TK_EOF, // End-of-file markers +} TokenKind; + +// Token type +typedef struct Token Token; +struct Token { + TokenKind kind; // Token kind + Token *next; // Next token + int val; // If kind is TK_NUM, its value + char *loc; // Token location + int len; // Token length +}; + +void error(char *fmt, ...); +void error_at(char *loc, char *fmt, ...); +void error_tok(Token *tok, char *fmt, ...); +bool equal(Token *tok, char *op); +Token *skip(Token *tok, char *op); +Token *tokenize(char *input); + +// +// parse.c +// + +typedef enum { + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_NUM, // Integer +} NodeKind; + +// AST node type +typedef struct Node Node; +struct Node { + NodeKind kind; // Node kind + Node *lhs; // Left-hand side + Node *rhs; // Right-hand side + int val; // Used if kind == ND_NUM +}; + +Node *parse(Token *tok); + +// +// codegen.c +// + +void codegen(Node *node); +#include +#include +#include +#include +#include +#include +#include + +// +// tokenize.c +// + +typedef enum { + TK_PUNCT, // Keywords or punctuators + TK_NUM, // Numeric literals + TK_EOF, // End-of-file markers +} TokenKind; + +// Token type +typedef struct Token Token; +struct Token { + TokenKind kind; // Token kind + Token *next; // Next token + int val; // If kind is TK_NUM, its value + char *loc; // Token location + int len; // Token length +}; + +void error(char *fmt, ...); +void error_at(char *loc, char *fmt, ...); +void error_tok(Token *tok, char *fmt, ...); +bool equal(Token *tok, char *op); +Token *skip(Token *tok, char *op); +Token *tokenize(char *input); + +// +// parse.c +// + +typedef enum { + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_NUM, // Integer +} NodeKind; + +// AST node type +typedef struct Node Node; +struct Node { + NodeKind kind; // Node kind + Node *lhs; // Left-hand side + Node *rhs; // Right-hand side + int val; // Used if kind == ND_NUM +}; + +Node *parse(Token *tok); + +// +// codegen.c +// + +void codegen(Node *node); diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..f60159e --- /dev/null +++ b/main.cpp @@ -0,0 +1,11 @@ +// #include "chibicc.h" + +int main(int argc, char **argv) { + // if (argc != 2) + // error("%s: invalid number of arguments", argv[0]); + + // Token *tok = tokenize(argv[1]); + // Node *node = parse(tok); + // codegen(node); + return 0; +} \ No newline at end of file From 95ea4a67ea179b69b9ed6252362ad53dba04aacf Mon Sep 17 00:00:00 2001 From: IamNailong Date: Fri, 31 Oct 2025 15:30:43 +0800 Subject: [PATCH 2/5] Update --- CMakeLists.txt | 36 ++++- a.out | Bin 0 -> 15744 bytes include/AST.h | 37 +++++ include/CodeGenerator.h | 28 ++++ include/Common.h | 32 ++++ include/Diagnostic.h | 194 ++++++++++++++++++++++ include/DiagnosticKinds.def | 87 ++++++++++ include/Parser.h | 39 +++++ include/Token.h | 136 ++++++++++++++++ include/TokenKinds.def | 143 ++++++++++++++++ include/Tokenizer.h | 81 ++++++++++ include/chibcc.h | 136 ---------------- main.cpp | 64 ++++++-- main.s | 16 ++ src/CodeGenerator.cpp | 85 ++++++++++ src/Diagnostic.cpp | 196 ++++++++++++++++++++++ src/Parser.cpp | 165 +++++++++++++++++++ src/TokenKinds.cpp | 38 +++++ src/Tokenizer.cpp | 314 ++++++++++++++++++++++++++++++++++++ 19 files changed, 1674 insertions(+), 153 deletions(-) create mode 100644 a.out create mode 100644 include/AST.h create mode 100644 include/CodeGenerator.h create mode 100644 include/Common.h create mode 100644 include/Diagnostic.h create mode 100644 include/DiagnosticKinds.def create mode 100644 include/Parser.h create mode 100644 include/Token.h create mode 100644 include/TokenKinds.def create mode 100644 include/Tokenizer.h delete mode 100644 include/chibcc.h create mode 100644 main.s create mode 100644 src/CodeGenerator.cpp create mode 100644 src/Diagnostic.cpp create mode 100644 src/Parser.cpp create mode 100644 src/TokenKinds.cpp create mode 100644 src/Tokenizer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1df2169..a606663 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,33 @@ -cmake_minimum_required(VERSION 3.22) -project(chibcpp VERSION 0.1 - LANGUAGES CXX) +cmake_minimum_required(VERSION 3.16) +project(ChibCC VERSION 1.0.0 LANGUAGES CXX) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") -set(CMAKE_RUNTIME_OUTPUT_LIB_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +# Set C++ standard +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) -add_executable(chibcpp +# Set compiler flags following LLVM style +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic") +set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") + +# Include directories +include_directories(include) + +# Source files +set(SOURCES + src/Diagnostic.cpp + src/TokenKinds.cpp + src/Tokenizer.cpp + src/Parser.cpp + src/CodeGenerator.cpp main.cpp +) + +# Create executable +add_executable(chibcc ${SOURCES}) + +# Set output directory +set_target_properties(chibcc PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib ) \ No newline at end of file diff --git a/a.out b/a.out new file mode 100644 index 0000000000000000000000000000000000000000..df2a7def4bd8c172cb452ff8d9f4ec2ed0f3f50e GIT binary patch literal 15744 zcmeHOTWlOx89uwV8;1n1<2H>+LNfvv5~#J_2}GVK5)Yjj=bZV^nb|YmGasLro=heZ3aLfCSFx4uG)!6w(!-J=NUNjj&2)UL z+OKXSyH#_hJZ*qVO?oN08QX<#A!J=QXPV%CLsJVTp@)pD+bXTJnG&Fq^I)AW5*s|w zmj-~~8|_)rVgx@gG%&2=*sS&%ht-kIXh~cS3PQ&w>y&N$X*f(0tVutCh$qK{KSw2= zQM61g3K-R05M30w@u3a8^AE1mG-t8 zhfaw?1yz6wo=h5_sjn z&3oB`Vw>YRtWHc%9UpTC?LqrUBY${Exl=PI-Hc!M7qXSWFVCDD%M}a$j5nL(>K5|F zf>h@k*QSmf!G!)Z6FfmS_ZRb{x;ML>qkfNY-+b(G3UiX~a3>4 zYi*xpc+Xm}o0@)r^VIbA`bz4+C3bvXko>dS|Hv%Ep>^lVs`Kn0jycb+wK$0voSW;x z9aO>HQbButWj=Lad_&LuyEMYZs`~Ep&gGFLXPqmr1UsFpBe#)p>rlO3zm=iG3vK_R zr|!gusO?7ktpDx9ROq}|<8}BMLSDbSPGXJoyk>Wu)Yo4O7f|RLacxAB9OD#6AdWyB zfj9zj1mXz95r`uYM<9+s9Dz6jaRmNP5#Zm$68l>2v#5Q3zTLlCug?(PLwJ$!e!}|+ z`L|3?iAT<=#8PKs-_C9A*Xco+dH#)l?bUky9qcf^P5%_yZMV^sG})UO}f6z%gi>4NNge2duF7r9StJiF}`8~Y>q*^m>;|FT{$ zvVDUeHYs7R;sx2vX!f`&^#dt?lW|j@5!@lzEx20{ATm{x{gBu*Y)M1tM@;cu`;>{C54Hg>Od z@ceAG5LB&!0ei^q?;WUWbKudzLA!s*9y}xk|EWoiN2~f{J?g{pPEg5335X>Qw-Yw= z=kZ9Y4pox*DDKJ<{zim9EIj5n{HG|BP~ECCZ5WIf@48OVe=#s$O{r2BDZ}mJdI8V# zFrjW&nE&9n>i<`?f0z241>(kG$rY|M9*{)RHm z_IMSm6rLS{})9?9}$0yJ5zC#_7`x^L< z5zqaJ`hS{u3r-e zjot2Ukay>Jzk7u|GevhHSDf{7ZYC&}E3Q{vQgg+8Ddz`%#_n(K!F%tsu2(L5%dTGt z%FAlL?B#toQ_bg>NfNSL@(eb)y6)uJ(UTMI#Hn$Yw%3n;;MC~JsWDQ~dkXG^Bc;yx zS>>LXK7D+2+C4ovd2V9Hof$noJwb}-cK zDY}bZA;Yf&Or0jLOt#=wD}F}N`O?9ityDy-UpR1GdW8UI#P1d~ePLkpdkc+j^ScQ; zO!Q_>WjP;svxGs};39I_0==kEQg)#j`1V4fYM08zl3xy%L)2_FOK+8AGeV6XpX%jV zpxqa}%A&F}%LQt}U{E$C5BcRvwpiF?xKvj5a~=zn&10zSQqCOgg(77Fe~FGf5y-Qs zBWwGMGW3fX(!(-(GY5>Xz>X zAl463tbLJZd^>&kJ0AL@q6b=37EbtH2gk3qh4mLi5A1=XAnrq;`Fka-$M+i0f}>zm z{~+;yr8NTjk983c?PFaO?Y~EQUK2r&brbNqw99?uI75&1PL?=6W$3XE1M>G>IB^}} z^ieW+4Fo;bJwU7{(SP^>A0<7nsbFJW1?-o`qWwo7u2F_#3O&|&z?wLs{pk5WLFIf- zC~w@a<-U#etyH7iLZ0!9^a<-N(E-J5NMHk>is<{L0Eqsg0@%Q>Mf7RW1MiFs7}`aB z--zh(eGbI>AO6w#`)$!T)*6<03P$~51HUJFj33rFz#gd%03GZfNAw3RQvxi(Q4nzh z{WHpR&_^!&#<~vg3r7861Aj%@R7j6?~+d8hxV~<>J)v{KQbLv z3lYqTJE~RCOBfqG^kKx@X*f|PEfw}l$M`ztr5^f&``>or(09r Lhs; + std::unique_ptr Rhs; + int Val; + + explicit Node(NodeKind K) : Kind(K), Lhs(nullptr), Rhs(nullptr), Val(0) {} +}; + +} // namespace chibcc + +#endif // CHIBCC_AST_H \ No newline at end of file diff --git a/include/CodeGenerator.h b/include/CodeGenerator.h new file mode 100644 index 0000000..89431b5 --- /dev/null +++ b/include/CodeGenerator.h @@ -0,0 +1,28 @@ +#ifndef CHIBCC_CODEGENERATOR_H +#define CHIBCC_CODEGENERATOR_H + +#include "AST.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Code Generator +//===----------------------------------------------------------------------===// + +class CodeGenerator { +private: + int Depth; + + void push(); + void pop(const char *Arg); + void genExpr(Node *N); + +public: + CodeGenerator() : Depth(0) {} + + void codegen(Node *N); +}; + +} // namespace chibcc + +#endif // CHIBCC_CODEGENERATOR_H \ No newline at end of file diff --git a/include/Common.h b/include/Common.h new file mode 100644 index 0000000..babd4a1 --- /dev/null +++ b/include/Common.h @@ -0,0 +1,32 @@ +#ifndef CHIBCC_COMMON_H +#define CHIBCC_COMMON_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Forward Declarations +//===----------------------------------------------------------------------===// + +class DiagnosticEngine; +class SourceLocation; + +//===----------------------------------------------------------------------===// +// Legacy Error Handling (for compatibility) +//===----------------------------------------------------------------------===// + +void error(const char *Fmt, ...); +void errorAt(const char *Loc, const char *Fmt, ...); + +} // namespace chibcc + +#endif // CHIBCC_COMMON_H \ No newline at end of file diff --git a/include/Diagnostic.h b/include/Diagnostic.h new file mode 100644 index 0000000..7aabe27 --- /dev/null +++ b/include/Diagnostic.h @@ -0,0 +1,194 @@ +#ifndef CHIBCC_DIAGNOSTIC_H +#define CHIBCC_DIAGNOSTIC_H + +#include "Common.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Diagnostic Levels +//===----------------------------------------------------------------------===// + +enum class DiagnosticLevel { + Ignored = 0, + Note, + Remark, + Warning, + Error, + Fatal +}; + +//===----------------------------------------------------------------------===// +// Diagnostic IDs +//===----------------------------------------------------------------------===// + +namespace diag { +enum { +#define DIAG(ENUM, LEVEL, DESC) ENUM, +#include "DiagnosticKinds.def" + NUM_BUILTIN_DIAGNOSTICS +}; +} // namespace diag + +//===----------------------------------------------------------------------===// +// Source Location +//===----------------------------------------------------------------------===// + +class SourceLocation { +private: + const char *Ptr; + +public: + SourceLocation() : Ptr(nullptr) {} + explicit SourceLocation(const char *Loc) : Ptr(Loc) {} + + bool isValid() const { return Ptr != nullptr; } + bool isInvalid() const { return Ptr == nullptr; } + + const char *getPointer() const { return Ptr; } + + bool operator==(const SourceLocation &RHS) const { return Ptr == RHS.Ptr; } + bool operator!=(const SourceLocation &RHS) const { return Ptr != RHS.Ptr; } +}; + +//===----------------------------------------------------------------------===// +// Source Range +//===----------------------------------------------------------------------===// + +class SourceRange { +private: + SourceLocation Begin, End; + +public: + SourceRange() = default; + SourceRange(SourceLocation Loc) : Begin(Loc), End(Loc) {} + SourceRange(SourceLocation Begin, SourceLocation End) + : Begin(Begin), End(End) {} + + SourceLocation getBegin() const { return Begin; } + SourceLocation getEnd() const { return End; } + + void setBegin(SourceLocation Loc) { Begin = Loc; } + void setEnd(SourceLocation Loc) { End = Loc; } + + bool isValid() const { return Begin.isValid() && End.isValid(); } + bool isInvalid() const { return !isValid(); } +}; + +//===----------------------------------------------------------------------===// +// Diagnostic Engine +//===----------------------------------------------------------------------===// + +class DiagnosticEngine { +private: + const char *SourceBuffer; + std::string FileName; + unsigned NumWarnings; + unsigned NumErrors; + bool SuppressAllDiagnostics; + bool WarningsAsErrors; + + void emitDiagnostic(SourceLocation Loc, DiagnosticLevel Level, + const std::string &Message); + void printSourceLine(SourceLocation Loc); + void printCaretDiagnostic(SourceLocation Loc, SourceRange Range); + +public: + DiagnosticEngine(const char *Buffer, const std::string &File = "") + : SourceBuffer(Buffer), FileName(File), NumWarnings(0), NumErrors(0), + SuppressAllDiagnostics(false), WarningsAsErrors(false) {} + + /// \brief Report a diagnostic at the given location. + void report(SourceLocation Loc, unsigned DiagID, const std::string &Message); + + /// \brief Report a diagnostic with a source range. + void report(SourceRange Range, unsigned DiagID, const std::string &Message); + + /// \brief Convenience methods for common diagnostic levels + void reportError(SourceLocation Loc, const std::string &Message); + void reportWarning(SourceLocation Loc, const std::string &Message); + void reportNote(SourceLocation Loc, const std::string &Message); + void reportFatal(SourceLocation Loc, const std::string &Message); + + /// \brief Get diagnostic counts + unsigned getNumWarnings() const { return NumWarnings; } + unsigned getNumErrors() const { return NumErrors; } + bool hasErrorOccurred() const { return NumErrors > 0; } + + /// \brief Control diagnostic behavior + void setSuppressAllDiagnostics(bool Val = true) { + SuppressAllDiagnostics = Val; + } + void setWarningsAsErrors(bool Val = true) { WarningsAsErrors = Val; } + + /// \brief Get the diagnostic level for a given diagnostic ID + static DiagnosticLevel getDiagnosticLevel(unsigned DiagID); + + /// \brief Get the diagnostic description for a given diagnostic ID + static const char *getDiagnosticText(unsigned DiagID); +}; + +//===----------------------------------------------------------------------===// +// Diagnostic Builder +//===----------------------------------------------------------------------===// + +class DiagnosticBuilder { +private: + DiagnosticEngine *Engine; + SourceLocation Loc; + SourceRange Range; + unsigned DiagID; + std::string Message; + bool IsActive; + +public: + DiagnosticBuilder(DiagnosticEngine *Engine, SourceLocation Loc, + unsigned DiagID) + : Engine(Engine), Loc(Loc), Range(Loc), DiagID(DiagID), IsActive(true) {} + + DiagnosticBuilder(DiagnosticBuilder &&Other) + : Engine(Other.Engine), Loc(Other.Loc), Range(Other.Range), + DiagID(Other.DiagID), Message(std::move(Other.Message)), + IsActive(Other.IsActive) { + Other.IsActive = false; + } + + ~DiagnosticBuilder() { + if (IsActive && Engine) { + Engine->report(Range, DiagID, Message); + } + } + + /// \brief Add a string to the diagnostic message + DiagnosticBuilder &operator<<(const std::string &Str) { + Message += Str; + return *this; + } + + DiagnosticBuilder &operator<<(const char *Str) { + Message += Str; + return *this; + } + + DiagnosticBuilder &operator<<(int Val) { + Message += std::to_string(Val); + return *this; + } + + /// \brief Add a source range to highlight + DiagnosticBuilder &addRange(SourceRange R) { + Range = R; + return *this; + } + + /// \brief Add a fix-it hint + DiagnosticBuilder &addFixItHint(SourceRange, const std::string &Text) { + // For now, just add to message - could be enhanced later + Message += " (fix: replace with '" + Text + "')"; + return *this; + } +}; + +} // namespace chibcc + +#endif // CHIBCC_DIAGNOSTIC_H \ No newline at end of file diff --git a/include/DiagnosticKinds.def b/include/DiagnosticKinds.def new file mode 100644 index 0000000..09f8925 --- /dev/null +++ b/include/DiagnosticKinds.def @@ -0,0 +1,87 @@ +//===--- DiagnosticKinds.def - C Family Diagnostic Kind Database -*- C++ -*-===// +// +// Part of the ChibCC Project +// +//===----------------------------------------------------------------------===// +// +// This file defines the diagnostic kind database. +// +//===----------------------------------------------------------------------===// + +#ifndef DIAG +#define DIAG(ENUM, LEVEL, DESC) +#endif + +//===----------------------------------------------------------------------===// +// Lexical Analysis (Tokenizer) Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(err_invalid_character, Error, "invalid character '%0' in source file") +DIAG(err_unterminated_string, Error, "unterminated string literal") +DIAG(err_unterminated_char, Error, "unterminated character constant") +DIAG(err_empty_character, Error, "empty character constant") +DIAG(err_multichar_character, Error, "multi-character character constant") +DIAG(err_invalid_escape_sequence, Error, "invalid escape sequence '\\%0'") +DIAG(err_invalid_numeric_literal, Error, "invalid numeric literal") +DIAG(err_numeric_literal_too_large, Error, "numeric literal is too large") + +DIAG(warn_trigraph, Warning, "trigraph converted to '%0' character") +DIAG(warn_multichar_character_literal, Warning, "multi-character character constant") + +//===----------------------------------------------------------------------===// +// Parsing Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(err_expected_token, Error, "expected '%0'") +DIAG(err_expected_expression, Error, "expected expression") +DIAG(err_expected_statement, Error, "expected statement") +DIAG(err_expected_declaration, Error, "expected declaration") +DIAG(err_expected_identifier, Error, "expected identifier") +DIAG(err_expected_type, Error, "expected type name") + +DIAG(err_unexpected_token, Error, "unexpected token '%0'") +DIAG(err_extra_tokens, Error, "extra tokens at end of directive") +DIAG(err_missing_semicolon, Error, "expected ';' after %0") +DIAG(err_missing_comma, Error, "expected ',' between %0") + +DIAG(err_unmatched_paren, Error, "expected ')' to match this '('") +DIAG(err_unmatched_brace, Error, "expected '}' to match this '{'") +DIAG(err_unmatched_bracket, Error, "expected ']' to match this '['") + +//===----------------------------------------------------------------------===// +// Semantic Analysis Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(err_undeclared_identifier, Error, "use of undeclared identifier '%0'") +DIAG(err_redefinition, Error, "redefinition of '%0'") +DIAG(err_conflicting_types, Error, "conflicting types for '%0'") +DIAG(err_incompatible_types, Error, "incompatible types: '%0' and '%1'") +DIAG(err_invalid_operands, Error, "invalid operands to binary expression ('%0' and '%1')") +DIAG(err_invalid_unary_operand, Error, "invalid operand to unary expression ('%0')") + +DIAG(err_division_by_zero, Error, "division by zero") +DIAG(err_modulo_by_zero, Error, "modulo by zero") + +DIAG(warn_unused_variable, Warning, "unused variable '%0'") +DIAG(warn_uninitialized_variable, Warning, "variable '%0' is uninitialized when used here") +DIAG(warn_implicit_conversion, Warning, "implicit conversion from '%0' to '%1'") + +//===----------------------------------------------------------------------===// +// Code Generation Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(err_unsupported_feature, Error, "unsupported feature: %0") +DIAG(err_internal_error, Error, "internal compiler error: %0") + +DIAG(note_previous_declaration, Note, "previous declaration is here") +DIAG(note_previous_definition, Note, "previous definition is here") +DIAG(note_to_match_this, Note, "to match this '%0'") + +//===----------------------------------------------------------------------===// +// General Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(fatal_too_many_errors, Fatal, "too many errors emitted, stopping now") +DIAG(note_include_translation_unit, Note, "in file included from %0:%1:") + +#undef DIAG \ No newline at end of file diff --git a/include/Parser.h b/include/Parser.h new file mode 100644 index 0000000..98120b0 --- /dev/null +++ b/include/Parser.h @@ -0,0 +1,39 @@ +#ifndef CHIBCC_PARSER_H +#define CHIBCC_PARSER_H + +#include "AST.h" +#include "Tokenizer.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +class Parser { +private: + std::unique_ptr newNode(NodeKind Kind); + std::unique_ptr newBinary(NodeKind Kind, std::unique_ptr Lhs, + std::unique_ptr Rhs); + std::unique_ptr newUnary(NodeKind Kind, std::unique_ptr Expr); + std::unique_ptr newNum(int Val); + + std::unique_ptr expr(Token **Rest, Token *Tok); + std::unique_ptr equality(Token **Rest, Token *Tok); + std::unique_ptr relational(Token **Rest, Token *Tok); + std::unique_ptr add(Token **Rest, Token *Tok); + std::unique_ptr mul(Token **Rest, Token *Tok); + std::unique_ptr unary(Token **Rest, Token *Tok); + std::unique_ptr primary(Token **Rest, Token *Tok); + + Lexer &Lex; + +public: + explicit Parser(Lexer &L) : Lex(L) {} + + std::unique_ptr parse(Token *Tok); +}; + +} // namespace chibcc + +#endif // CHIBCC_PARSER_H \ No newline at end of file diff --git a/include/Token.h b/include/Token.h new file mode 100644 index 0000000..24be49c --- /dev/null +++ b/include/Token.h @@ -0,0 +1,136 @@ +#ifndef CHIBCC_TOKEN_H +#define CHIBCC_TOKEN_H + +#include "Common.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Token Types +//===----------------------------------------------------------------------===// + +namespace tok { + enum TokenKind : unsigned short { +#define TOK(X) X, +#include "TokenKinds.def" + NUM_TOKENS + }; + + /// \brief Determines the name of a token as used within the front end. + /// + /// The name of a token will be an internal name (such as "l_square") + /// and should not be used as part of diagnostic messages. + const char *getTokenName(TokenKind Kind); + + /// \brief Determines the spelling of simple punctuator tokens like + /// '!' or '%', and returns NULL for literal and annotation tokens. + /// + /// This routine only retrieves the "simple" spelling of the token, + /// and will not produce any alternative spellings (e.g., a + /// digraph spelling, an escaped newline, etc.). For the actual + /// spelling of a given Token, use Preprocessor::getSpelling(). + const char *getPunctuatorSpelling(TokenKind Kind); + + /// \brief Determines the spelling of simple keyword and contextual keyword + /// tokens like 'int' and 'dynamic_cast'. Returns NULL for other token kinds. + const char *getKeywordSpelling(TokenKind Kind); + + /// \brief Return true if this is a raw identifier or an identifier kind. + inline bool isAnyIdentifier(TokenKind K) { + return (K == tok::identifier); + } + + /// \brief Return true if this is a C or C++ string-literal (or + /// C++11 user-defined-string-literal) token. + inline bool isStringLiteral(TokenKind K) { + return K == tok::string_literal; + } + + /// \brief Return true if this is a "literal" kind, like a numeric + /// constant, string, etc. + inline bool isLiteral(TokenKind K) { + return K == tok::numeric_constant || K == tok::char_constant || + isStringLiteral(K); + } + + /// \brief Return true if this is any of tok::annot_* kinds. + inline bool isAnnotation(TokenKind) { + return false; // No annotations in this simple implementation + } +} + +class Token { +public: + tok::TokenKind Kind; + std::unique_ptr Next; + + /// The location of the token. This is actually a pointer into the original + /// source buffer. + const char *Loc; + + /// The length of the token. + unsigned Len; + + union { + /// The actual value of a numeric constant token. + uint64_t IntegerValue; + + /// A pointer to the start of the literal data for string literals. + const char *LiteralData; + }; + + Token() : Kind(tok::unknown), Next(nullptr), Loc(nullptr), Len(0) { + IntegerValue = 0; + } + + Token(tok::TokenKind K, const char *Location, unsigned Length) + : Kind(K), Next(nullptr), Loc(Location), Len(Length) { + IntegerValue = 0; + } + + /// \brief Return true if this token is a literal value. + bool isLiteral() const { + return tok::isLiteral(Kind); + } + + /// \brief Return true if this token is an identifier. + bool isAnyIdentifier() const { + return tok::isAnyIdentifier(Kind); + } + + /// \brief Return a source location identifier for the specified + /// offset in the current file. + const char *getLocation() const { return Loc; } + + /// \brief Return the length of the token. + unsigned getLength() const { return Len; } + + /// \brief Return the actual spelling of this token. + std::string getSpelling() const { + return std::string(Loc, Len); + } + + /// \brief Given a token representing an identifier, return true if it has a + /// specific spelling. + bool is(tok::TokenKind K) const { return Kind == K; } + bool isNot(tok::TokenKind K) const { return Kind != K; } + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { + return is(K1) || is(K2); + } + template + bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const { + return is(K1) || isOneOf(K2, Ks...); + } +}; + +class DiagnosticEngine; + +void errorTok(Token *Tok, const char *Fmt, ...); + +/// \brief Create a diagnostic builder for token-based diagnostics +class DiagnosticBuilder; +DiagnosticBuilder diagnoseTok(DiagnosticEngine &Diags, Token *Tok, unsigned DiagID); + +} // namespace chibcc + +#endif // CHIBCC_TOKEN_H \ No newline at end of file diff --git a/include/TokenKinds.def b/include/TokenKinds.def new file mode 100644 index 0000000..b2677b3 --- /dev/null +++ b/include/TokenKinds.def @@ -0,0 +1,143 @@ +//===--- TokenKinds.def - C Family Token Kind Database ---------*- C++ -*-===// +// +// Part of the ChibCC Project +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenKind database. This includes normal tokens like +// identifiers, string literals, etc. as well as keywords for various languages. +// +//===----------------------------------------------------------------------===// + +#ifndef TOK +#define TOK(X) +#endif +#ifndef PUNCTUATOR +#define PUNCTUATOR(X,Y) TOK(X) +#endif +#ifndef KEYWORD +#define KEYWORD(X,Y) TOK(kw_ ## X) +#endif + +//===----------------------------------------------------------------------===// +// Preprocessor tokens. +//===----------------------------------------------------------------------===// + +// These define members of the tok::* namespace. + +TOK(unknown) // Not a token. +TOK(eof) // End of file. +TOK(eod) // End of preprocessing directive (end of line inside a + // directive). +TOK(code_completion) // Code completion marker + +//===----------------------------------------------------------------------===// +// Language keywords. +//===----------------------------------------------------------------------===// + +// These define members of the tok::kw_* namespace. Note that keywords are +// always first-class tokens. +KEYWORD(auto , KEYALL) +KEYWORD(break , KEYALL) +KEYWORD(case , KEYALL) +KEYWORD(char , KEYALL) +KEYWORD(const , KEYALL) +KEYWORD(continue , KEYALL) +KEYWORD(default , KEYALL) +KEYWORD(do , KEYALL) +KEYWORD(double , KEYALL) +KEYWORD(else , KEYALL) +KEYWORD(enum , KEYALL) +KEYWORD(extern , KEYALL) +KEYWORD(float , KEYALL) +KEYWORD(for , KEYALL) +KEYWORD(goto , KEYALL) +KEYWORD(if , KEYALL) +KEYWORD(int , KEYALL) +KEYWORD(long , KEYALL) +KEYWORD(register , KEYALL) +KEYWORD(return , KEYALL) +KEYWORD(short , KEYALL) +KEYWORD(signed , KEYALL) +KEYWORD(sizeof , KEYALL) +KEYWORD(static , KEYALL) +KEYWORD(struct , KEYALL) +KEYWORD(switch , KEYALL) +KEYWORD(typedef , KEYALL) +KEYWORD(union , KEYALL) +KEYWORD(unsigned , KEYALL) +KEYWORD(void , KEYALL) +KEYWORD(volatile , KEYALL) +KEYWORD(while , KEYALL) + +//===----------------------------------------------------------------------===// +// Literals +//===----------------------------------------------------------------------===// + +TOK(numeric_constant) // 0x123 +TOK(char_constant) // 'a' +TOK(string_literal) // "foo" + +//===----------------------------------------------------------------------===// +// Identifiers. +//===----------------------------------------------------------------------===// + +TOK(identifier) // abcde123 + +//===----------------------------------------------------------------------===// +// C/C++ Punctuators. +//===----------------------------------------------------------------------===// + +PUNCTUATOR(l_square, "[") +PUNCTUATOR(r_square, "]") +PUNCTUATOR(l_paren, "(") +PUNCTUATOR(r_paren, ")") +PUNCTUATOR(l_brace, "{") +PUNCTUATOR(r_brace, "}") +PUNCTUATOR(period, ".") +PUNCTUATOR(ellipsis, "...") +PUNCTUATOR(amp, "&") +PUNCTUATOR(ampamp, "&&") +PUNCTUATOR(ampequal, "&=") +PUNCTUATOR(star, "*") +PUNCTUATOR(starequal, "*=") +PUNCTUATOR(plus, "+") +PUNCTUATOR(plusplus, "++") +PUNCTUATOR(plusequal, "+=") +PUNCTUATOR(minus, "-") +PUNCTUATOR(arrow, "->") +PUNCTUATOR(minusminus, "--") +PUNCTUATOR(minusequal, "-=") +PUNCTUATOR(tilde, "~") +PUNCTUATOR(exclaim, "!") +PUNCTUATOR(exclaimequal, "!=") +PUNCTUATOR(slash, "/") +PUNCTUATOR(slashequal, "/=") +PUNCTUATOR(percent, "%") +PUNCTUATOR(percentequal, "%=") +PUNCTUATOR(less, "<") +PUNCTUATOR(lessless, "<<") +PUNCTUATOR(lessequal, "<=") +PUNCTUATOR(lesslessequal, "<<=") +PUNCTUATOR(greater, ">") +PUNCTUATOR(greatergreater, ">>") +PUNCTUATOR(greaterequal, ">=") +PUNCTUATOR(greatergreaterequal, ">>=") +PUNCTUATOR(caret, "^") +PUNCTUATOR(caretequal, "^=") +PUNCTUATOR(pipe, "|") +PUNCTUATOR(pipepipe, "||") +PUNCTUATOR(pipeequal, "|=") +PUNCTUATOR(question, "?") +PUNCTUATOR(colon, ":") +PUNCTUATOR(semi, ";") +PUNCTUATOR(equal, "=") +PUNCTUATOR(equalequal, "==") +PUNCTUATOR(comma, ",") +PUNCTUATOR(hash, "#") +PUNCTUATOR(hashhash, "##") +PUNCTUATOR(hashat, "#@") + +#undef KEYWORD +#undef PUNCTUATOR +#undef TOK \ No newline at end of file diff --git a/include/Tokenizer.h b/include/Tokenizer.h new file mode 100644 index 0000000..a8ff40d --- /dev/null +++ b/include/Tokenizer.h @@ -0,0 +1,81 @@ +#ifndef CHIBCC_TOKENIZER_H +#define CHIBCC_TOKENIZER_H + +#include "Token.h" +#include "Diagnostic.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Lexer - This provides a simple interface that turns a text buffer into a +// stream of tokens. This provides no support for file reading or buffering, +// or buffering/seeking of tokens, only forward lexing is supported. It relies +// on the specified Preprocessor object to handle preprocessor directives, etc. +//===----------------------------------------------------------------------===// + +class Lexer { +private: + const char *BufferStart; // Start of the buffer. + const char *BufferPtr; // Current pointer into the buffer. + const char *BufferEnd; // End of the buffer. + DiagnosticEngine &Diags; // Diagnostic engine for error reporting. + + /// \brief Create a new token with the specified information. + std::unique_ptr formToken(tok::TokenKind Kind, const char *TokStart); + + /// \brief Skip whitespace and comments, return the first non-whitespace + /// character after skipping whitespace and comments. + bool skipWhitespace(); + + /// \brief We have just read the // characters, skip until we find the + /// newline character that terminates the comment. Then update BufferPtr. + bool skipLineComment(); + + /// \brief We have just read the /* characters, skip until we find the */ + /// characters that terminate the comment. Then update BufferPtr. + bool skipBlockComment(); + + /// \brief Lex a number: integer-constant, floating-constant. + void lexNumericConstant(Token &Result); + + /// \brief Lex a string literal or character constant. + void lexStringLiteral(Token &Result, const char *CurPtr); + + /// \brief Lex an identifier or keyword. + void lexIdentifier(Token &Result, const char *CurPtr); + + /// \brief Return true if the specified string is the body of an identifier. + static bool isIdentifierBody(unsigned char c) { + return isalnum(c) || c == '_'; + } + + /// \brief Return true if the specified string is the start of an identifier. + static bool isIdentifierHead(unsigned char c) { + return isalpha(c) || c == '_'; + } + + /// \brief Matches punctuation tokens. + tok::TokenKind tryMatchPunctuator(const char *CurPtr, unsigned &Size); + +public: + /// \brief Construct a Lexer for the given buffer. + Lexer(const char *InputStart, const char *InputEnd, DiagnosticEngine &Diags); + + /// \brief Lex the next token and return it. + std::unique_ptr lex(); + + /// \brief Return true if the specified token kind is a literal (like a + /// numeric constant, string, etc). + static bool isLiteral(tok::TokenKind K) { + return tok::isLiteral(K); + } + + /// \brief Utility functions for token matching + static bool equal(Token *Tok, const char *Op); + static Token *skip(Token *Tok, const char *Op); + static bool equal(Token *Tok, tok::TokenKind Kind); +}; + +} // namespace chibcc + +#endif // CHIBCC_TOKENIZER_H \ No newline at end of file diff --git a/include/chibcc.h b/include/chibcc.h deleted file mode 100644 index 6f9d982..0000000 --- a/include/chibcc.h +++ /dev/null @@ -1,136 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -// -// tokenize.c -// - -typedef enum { - TK_PUNCT, // Keywords or punctuators - TK_NUM, // Numeric literals - TK_EOF, // End-of-file markers -} TokenKind; - -// Token type -typedef struct Token Token; -struct Token { - TokenKind kind; // Token kind - Token *next; // Next token - int val; // If kind is TK_NUM, its value - char *loc; // Token location - int len; // Token length -}; - -void error(char *fmt, ...); -void error_at(char *loc, char *fmt, ...); -void error_tok(Token *tok, char *fmt, ...); -bool equal(Token *tok, char *op); -Token *skip(Token *tok, char *op); -Token *tokenize(char *input); - -// -// parse.c -// - -typedef enum { - ND_ADD, // + - ND_SUB, // - - ND_MUL, // * - ND_DIV, // / - ND_NEG, // unary - - ND_EQ, // == - ND_NE, // != - ND_LT, // < - ND_LE, // <= - ND_NUM, // Integer -} NodeKind; - -// AST node type -typedef struct Node Node; -struct Node { - NodeKind kind; // Node kind - Node *lhs; // Left-hand side - Node *rhs; // Right-hand side - int val; // Used if kind == ND_NUM -}; - -Node *parse(Token *tok); - -// -// codegen.c -// - -void codegen(Node *node); -#include -#include -#include -#include -#include -#include -#include - -// -// tokenize.c -// - -typedef enum { - TK_PUNCT, // Keywords or punctuators - TK_NUM, // Numeric literals - TK_EOF, // End-of-file markers -} TokenKind; - -// Token type -typedef struct Token Token; -struct Token { - TokenKind kind; // Token kind - Token *next; // Next token - int val; // If kind is TK_NUM, its value - char *loc; // Token location - int len; // Token length -}; - -void error(char *fmt, ...); -void error_at(char *loc, char *fmt, ...); -void error_tok(Token *tok, char *fmt, ...); -bool equal(Token *tok, char *op); -Token *skip(Token *tok, char *op); -Token *tokenize(char *input); - -// -// parse.c -// - -typedef enum { - ND_ADD, // + - ND_SUB, // - - ND_MUL, // * - ND_DIV, // / - ND_NEG, // unary - - ND_EQ, // == - ND_NE, // != - ND_LT, // < - ND_LE, // <= - ND_NUM, // Integer -} NodeKind; - -// AST node type -typedef struct Node Node; -struct Node { - NodeKind kind; // Node kind - Node *lhs; // Left-hand side - Node *rhs; // Right-hand side - int val; // Used if kind == ND_NUM -}; - -Node *parse(Token *tok); - -// -// codegen.c -// - -void codegen(Node *node); diff --git a/main.cpp b/main.cpp index f60159e..aa9f6dc 100644 --- a/main.cpp +++ b/main.cpp @@ -1,11 +1,55 @@ -// #include "chibicc.h" - -int main(int argc, char **argv) { - // if (argc != 2) - // error("%s: invalid number of arguments", argv[0]); - - // Token *tok = tokenize(argv[1]); - // Node *node = parse(tok); - // codegen(node); - return 0; +#include "CodeGenerator.h" +#include "Diagnostic.h" +#include "Parser.h" +#include "Tokenizer.h" +#include + +using namespace chibcc; + +int main(int Argc, char **Argv) { + if (Argc != 2) { + std::cerr << "Usage: " << Argv[0] << " " << std::endl; + return 1; + } + + const char *Input = Argv[1]; + + // Create diagnostic engine + DiagnosticEngine Diags(Input, ""); + + // Create lexer + Lexer Lex(Input, Input + strlen(Input), Diags); + + // Tokenize all input into a linked list + std::unique_ptr Head = std::make_unique(); + Token *Current = Head.get(); + + while (true) { + auto Tok = Lex.lex(); + Current->Next = std::move(Tok); + Current = Current->Next.get(); + if (Current->Kind == tok::eof) { + break; + } + } + + // Check for lexical errors + if (Diags.hasErrorOccurred()) { + return 1; + } + + // Parse tokens into AST + Parser P(Lex); + auto Ast = P.parse(Head->Next.get()); + + // Check for parse errors + if (Diags.hasErrorOccurred()) { + return 1; + } + + // Generate assembly code + CodeGenerator CG; + CG.codegen(Ast.get()); + + return 0; } \ No newline at end of file diff --git a/main.s b/main.s new file mode 100644 index 0000000..2eefdf2 --- /dev/null +++ b/main.s @@ -0,0 +1,16 @@ + .globl main +main: + mov $3, %rax + push %rax + mov $1, %rax + pop %rdi + add %rdi, %rax + push %rax + mov $2, %rax + pop %rdi + imul %rdi, %rax + push %rax + mov $1, %rax + pop %rdi + add %rdi, %rax + ret \ No newline at end of file diff --git a/src/CodeGenerator.cpp b/src/CodeGenerator.cpp new file mode 100644 index 0000000..2592628 --- /dev/null +++ b/src/CodeGenerator.cpp @@ -0,0 +1,85 @@ +#include "CodeGenerator.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Code Generator Implementation +//===----------------------------------------------------------------------===// + +void CodeGenerator::push() { + printf(" push %%rax\n"); + Depth++; +} + +void CodeGenerator::pop(const char *Arg) { + printf(" pop %s\n", Arg); + Depth--; +} + +void CodeGenerator::genExpr(Node *N) { + switch (N->Kind) { + case NodeKind::Num: + printf(" mov $%d, %%rax\n", N->Val); + return; + case NodeKind::Neg: + genExpr(N->Lhs.get()); + printf(" neg %%rax\n"); + return; + default: + break; + } + + genExpr(N->Rhs.get()); + push(); + genExpr(N->Lhs.get()); + pop("%rdi"); + + switch (N->Kind) { + case NodeKind::Add: + printf(" add %%rdi, %%rax\n"); + return; + case NodeKind::Sub: + printf(" sub %%rdi, %%rax\n"); + return; + case NodeKind::Mul: + printf(" imul %%rdi, %%rax\n"); + return; + case NodeKind::Div: + printf(" cqo\n"); + printf(" idiv %%rdi\n"); + return; + case NodeKind::Eq: + case NodeKind::Ne: + case NodeKind::Lt: + case NodeKind::Le: + printf(" cmp %%rdi, %%rax\n"); + + if (N->Kind == NodeKind::Eq) + printf(" sete %%al\n"); + else if (N->Kind == NodeKind::Ne) + printf(" setne %%al\n"); + else if (N->Kind == NodeKind::Lt) + printf(" setl %%al\n"); + else if (N->Kind == NodeKind::Le) + printf(" setle %%al\n"); + + printf(" movzb %%al, %%rax\n"); + return; + default: + break; + } + + error("invalid expression"); +} + +void CodeGenerator::codegen(Node *N) { + printf(" .globl main\n"); + printf("main:\n"); + + genExpr(N); + printf(" ret\n"); + + assert(Depth == 0); +} + +} // namespace chibcc \ No newline at end of file diff --git a/src/Diagnostic.cpp b/src/Diagnostic.cpp new file mode 100644 index 0000000..ad9fed4 --- /dev/null +++ b/src/Diagnostic.cpp @@ -0,0 +1,196 @@ +#include "Diagnostic.h" +#include + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Diagnostic Level and Text Tables +//===----------------------------------------------------------------------===// + +static const DiagnosticLevel DiagnosticLevels[] = { +#define DIAG(ENUM, LEVEL, DESC) DiagnosticLevel::LEVEL, +#include "DiagnosticKinds.def" +}; + +static const char *DiagnosticTexts[] = { +#define DIAG(ENUM, LEVEL, DESC) DESC, +#include "DiagnosticKinds.def" +}; + +//===----------------------------------------------------------------------===// +// DiagnosticEngine Implementation +//===----------------------------------------------------------------------===// + +DiagnosticLevel DiagnosticEngine::getDiagnosticLevel(unsigned DiagID) { + if (DiagID >= diag::NUM_BUILTIN_DIAGNOSTICS) + return DiagnosticLevel::Error; + return DiagnosticLevels[DiagID]; +} + +const char *DiagnosticEngine::getDiagnosticText(unsigned DiagID) { + if (DiagID >= diag::NUM_BUILTIN_DIAGNOSTICS) + return "unknown diagnostic"; + return DiagnosticTexts[DiagID]; +} + +void DiagnosticEngine::emitDiagnostic(SourceLocation Loc, DiagnosticLevel Level, + const std::string &Message) { + if (SuppressAllDiagnostics) + return; + + // Treat warnings as errors if requested + if (Level == DiagnosticLevel::Warning && WarningsAsErrors) + Level = DiagnosticLevel::Error; + + // Update counters + switch (Level) { + case DiagnosticLevel::Warning: + NumWarnings++; + break; + case DiagnosticLevel::Error: + case DiagnosticLevel::Fatal: + NumErrors++; + break; + default: + break; + } + + // Print the diagnostic + const char *LevelStr = ""; + switch (Level) { + case DiagnosticLevel::Note: + LevelStr = "note"; + break; + case DiagnosticLevel::Remark: + LevelStr = "remark"; + break; + case DiagnosticLevel::Warning: + LevelStr = "warning"; + break; + case DiagnosticLevel::Error: + LevelStr = "error"; + break; + case DiagnosticLevel::Fatal: + LevelStr = "fatal error"; + break; + case DiagnosticLevel::Ignored: + return; // Don't print ignored diagnostics + } + + // Calculate line and column + int Line = 1, Column = 1; + if (Loc.isValid() && SourceBuffer) { + const char *Ptr = SourceBuffer; + while (Ptr < Loc.getPointer()) { + if (*Ptr == '\n') { + Line++; + Column = 1; + } else { + Column++; + } + Ptr++; + } + } + + // Print diagnostic header + std::cerr << FileName << ":" << Line << ":" << Column << ": " << LevelStr + << ": " << Message << std::endl; + + // Print source line and caret if location is valid + if (Loc.isValid()) { + printSourceLine(Loc); + printCaretDiagnostic(Loc, SourceRange(Loc)); + } + + // Exit on fatal errors + if (Level == DiagnosticLevel::Fatal) { + std::exit(1); + } +} + +void DiagnosticEngine::printSourceLine(SourceLocation Loc) { + if (!SourceBuffer || Loc.isInvalid()) + return; + + const char *LineStart = Loc.getPointer(); + const char *LineEnd = Loc.getPointer(); + + // Find the start of the line + while (LineStart > SourceBuffer && LineStart[-1] != '\n') + LineStart--; + + // Find the end of the line + while (*LineEnd && *LineEnd != '\n' && *LineEnd != '\r') + LineEnd++; + + // Print the source line + std::cerr << std::string(LineStart, LineEnd) << std::endl; +} + +void DiagnosticEngine::printCaretDiagnostic(SourceLocation Loc, + SourceRange Range) { + if (!SourceBuffer || Loc.isInvalid()) + return; + + const char *LineStart = Loc.getPointer(); + while (LineStart > SourceBuffer && LineStart[-1] != '\n') + LineStart--; + + // Calculate the column position + int Column = Loc.getPointer() - LineStart; + + // Print spaces up to the caret position + for (int i = 0; i < Column; ++i) { + if (LineStart[i] == '\t') + std::cerr << '\t'; + else + std::cerr << ' '; + } + + // Print the caret + std::cerr << '^'; + + // If we have a range, print tildes for the rest + if (Range.isValid() && Range.getEnd().getPointer() > Loc.getPointer()) { + int RangeLen = Range.getEnd().getPointer() - Loc.getPointer(); + for (int i = 1; i < RangeLen; ++i) { + std::cerr << '~'; + } + } + + std::cerr << std::endl; +} + +void DiagnosticEngine::report(SourceLocation Loc, unsigned DiagID, + const std::string &Message) { + DiagnosticLevel Level = getDiagnosticLevel(DiagID); + emitDiagnostic(Loc, Level, Message); +} + +void DiagnosticEngine::report(SourceRange Range, unsigned DiagID, + const std::string &Message) { + DiagnosticLevel Level = getDiagnosticLevel(DiagID); + emitDiagnostic(Range.getBegin(), Level, Message); +} + +void DiagnosticEngine::reportError(SourceLocation Loc, + const std::string &Message) { + emitDiagnostic(Loc, DiagnosticLevel::Error, Message); +} + +void DiagnosticEngine::reportWarning(SourceLocation Loc, + const std::string &Message) { + emitDiagnostic(Loc, DiagnosticLevel::Warning, Message); +} + +void DiagnosticEngine::reportNote(SourceLocation Loc, + const std::string &Message) { + emitDiagnostic(Loc, DiagnosticLevel::Note, Message); +} + +void DiagnosticEngine::reportFatal(SourceLocation Loc, + const std::string &Message) { + emitDiagnostic(Loc, DiagnosticLevel::Fatal, Message); +} + +} // namespace chibcc \ No newline at end of file diff --git a/src/Parser.cpp b/src/Parser.cpp new file mode 100644 index 0000000..1581d71 --- /dev/null +++ b/src/Parser.cpp @@ -0,0 +1,165 @@ +#include "Parser.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Parser Implementation +//===----------------------------------------------------------------------===// + +std::unique_ptr Parser::newNode(NodeKind Kind) { + return std::make_unique(Kind); +} + +std::unique_ptr Parser::newBinary(NodeKind Kind, std::unique_ptr Lhs, + std::unique_ptr Rhs) { + auto N = newNode(Kind); + N->Lhs = std::move(Lhs); + N->Rhs = std::move(Rhs); + return N; +} + +std::unique_ptr Parser::newUnary(NodeKind Kind, std::unique_ptr Expr) { + auto N = newNode(Kind); + N->Lhs = std::move(Expr); + return N; +} + +std::unique_ptr Parser::newNum(int Val) { + auto N = newNode(NodeKind::Num); + N->Val = Val; + return N; +} + +// expr = equality +std::unique_ptr Parser::expr(Token **Rest, Token *Tok) { + return equality(Rest, Tok); +} + +// equality = relational ("==" relational | "!=" relational)* +std::unique_ptr Parser::equality(Token **Rest, Token *Tok) { + auto N = relational(&Tok, Tok); + + for (;;) { + if (Lexer::equal(Tok, "==")) { + N = newBinary(NodeKind::Eq, std::move(N), relational(&Tok, Tok->Next.get())); + continue; + } + + if (Lexer::equal(Tok, "!=")) { + N = newBinary(NodeKind::Ne, std::move(N), relational(&Tok, Tok->Next.get())); + continue; + } + + *Rest = Tok; + return N; + } +} + +// relational = add ("<" add | "<=" add | ">" add | ">=" add)* +std::unique_ptr Parser::relational(Token **Rest, Token *Tok) { + auto N = add(&Tok, Tok); + + for (;;) { + if (Lexer::equal(Tok, "<")) { + N = newBinary(NodeKind::Lt, std::move(N), add(&Tok, Tok->Next.get())); + continue; + } + + if (Lexer::equal(Tok, "<=")) { + N = newBinary(NodeKind::Le, std::move(N), add(&Tok, Tok->Next.get())); + continue; + } + + if (Lexer::equal(Tok, ">")) { + N = newBinary(NodeKind::Lt, add(&Tok, Tok->Next.get()), std::move(N)); + continue; + } + + if (Lexer::equal(Tok, ">=")) { + N = newBinary(NodeKind::Le, add(&Tok, Tok->Next.get()), std::move(N)); + continue; + } + + *Rest = Tok; + return N; + } +} + +// add = mul ("+" mul | "-" mul)* +std::unique_ptr Parser::add(Token **Rest, Token *Tok) { + auto N = mul(&Tok, Tok); + + for (;;) { + if (Lexer::equal(Tok, "+")) { + N = newBinary(NodeKind::Add, std::move(N), mul(&Tok, Tok->Next.get())); + continue; + } + + if (Lexer::equal(Tok, "-")) { + N = newBinary(NodeKind::Sub, std::move(N), mul(&Tok, Tok->Next.get())); + continue; + } + + *Rest = Tok; + return N; + } +} + +// mul = unary ("*" unary | "/" unary)* +std::unique_ptr Parser::mul(Token **Rest, Token *Tok) { + auto N = unary(&Tok, Tok); + + for (;;) { + if (Lexer::equal(Tok, "*")) { + N = newBinary(NodeKind::Mul, std::move(N), unary(&Tok, Tok->Next.get())); + continue; + } + + if (Lexer::equal(Tok, "/")) { + N = newBinary(NodeKind::Div, std::move(N), unary(&Tok, Tok->Next.get())); + continue; + } + + *Rest = Tok; + return N; + } +} + +// unary = ("+" | "-") unary +// | primary +std::unique_ptr Parser::unary(Token **Rest, Token *Tok) { + if (Lexer::equal(Tok, "+")) + return unary(Rest, Tok->Next.get()); + + if (Lexer::equal(Tok, "-")) + return newUnary(NodeKind::Neg, unary(Rest, Tok->Next.get())); + + return primary(Rest, Tok); +} + +// primary = "(" expr ")" | num +std::unique_ptr Parser::primary(Token **Rest, Token *Tok) { + if (Lexer::equal(Tok, "(")) { + auto N = expr(&Tok, Tok->Next.get()); + *Rest = Lexer::skip(Tok, ")"); + return N; + } + + if (Tok->Kind == tok::numeric_constant) { + auto N = newNum(Tok->IntegerValue); + *Rest = Tok->Next.get(); + return N; + } + + errorTok(Tok, "expected an expression"); + return nullptr; // Never reached +} + +std::unique_ptr Parser::parse(Token *Tok) { + auto N = expr(&Tok, Tok); + if (Tok->Kind != tok::eof) + errorTok(Tok, "extra token"); + return N; +} + +} // namespace chibcc \ No newline at end of file diff --git a/src/TokenKinds.cpp b/src/TokenKinds.cpp new file mode 100644 index 0000000..826c1e9 --- /dev/null +++ b/src/TokenKinds.cpp @@ -0,0 +1,38 @@ +#include "Token.h" + +namespace chibcc { +namespace tok { + +static const char * const TokNames[] = { +#define TOK(X) #X, +#define KEYWORD(X,Y) #X, +#include "TokenKinds.def" + nullptr +}; + +const char *getTokenName(TokenKind Kind) { + if (Kind < NUM_TOKENS) + return TokNames[Kind]; + return nullptr; +} + +const char *getPunctuatorSpelling(TokenKind Kind) { + switch (Kind) { +#define PUNCTUATOR(X,Y) case X: return Y; +#include "TokenKinds.def" + default: break; + } + return nullptr; +} + +const char *getKeywordSpelling(TokenKind Kind) { + switch (Kind) { +#define KEYWORD(X,Y) case kw_ ## X: return #X; +#include "TokenKinds.def" + default: break; + } + return nullptr; +} + +} // namespace tok +} // namespace chibcc \ No newline at end of file diff --git a/src/Tokenizer.cpp b/src/Tokenizer.cpp new file mode 100644 index 0000000..7556fe5 --- /dev/null +++ b/src/Tokenizer.cpp @@ -0,0 +1,314 @@ +#include "Tokenizer.h" + +namespace chibcc { + +//===----------------------------------------------------------------------===// +// Error Handling Implementation +//===----------------------------------------------------------------------===// + +static const char *CurrentInput = nullptr; + +void error(const char *Fmt, ...) { + va_list Ap; + va_start(Ap, Fmt); + vfprintf(stderr, Fmt, Ap); + fprintf(stderr, "\n"); + exit(1); +} + +static void verrorAt(const char *Loc, const char *Fmt, va_list Ap) { + int Pos = Loc - CurrentInput; + fprintf(stderr, "%s\n", CurrentInput); + fprintf(stderr, "%*s", Pos, ""); // print Pos spaces. + fprintf(stderr, "^ "); + vfprintf(stderr, Fmt, Ap); + fprintf(stderr, "\n"); + exit(1); +} + +void errorAt(const char *Loc, const char *Fmt, ...) { + va_list Ap; + va_start(Ap, Fmt); + verrorAt(Loc, Fmt, Ap); +} + +void errorTok(Token *Tok, const char *Fmt, ...) { + va_list Ap; + va_start(Ap, Fmt); + verrorAt(Tok->Loc, Fmt, Ap); +} + +//===----------------------------------------------------------------------===// +// Lexer Implementation +//===----------------------------------------------------------------------===// + +Lexer::Lexer(const char *InputStart, const char *InputEnd, DiagnosticEngine &Diags) + : BufferStart(InputStart), BufferPtr(InputStart), BufferEnd(InputEnd), Diags(Diags) { + CurrentInput = InputStart; +} + +std::unique_ptr Lexer::formToken(tok::TokenKind Kind, const char *TokStart) { + auto Tok = std::make_unique(Kind, TokStart, BufferPtr - TokStart); + return Tok; +} + +bool Lexer::skipWhitespace() { + while (BufferPtr != BufferEnd) { + switch (*BufferPtr) { + case ' ': + case '\t': + case '\f': + case '\v': + case '\r': + case '\n': + ++BufferPtr; + break; + default: + return false; + } + } + return true; +} + +void Lexer::lexNumericConstant(Token &Result) { + const char *CurPtr = BufferPtr; + + // Lex the number + while (BufferPtr != BufferEnd && isdigit(*BufferPtr)) + ++BufferPtr; + + Result.Kind = tok::numeric_constant; + Result.Loc = CurPtr; + Result.Len = BufferPtr - CurPtr; + + // Convert to integer value + std::string NumStr(CurPtr, BufferPtr - CurPtr); + Result.IntegerValue = std::stoull(NumStr); +} + +void Lexer::lexIdentifier(Token &Result, const char *CurPtr) { + // Match [a-zA-Z_][a-zA-Z0-9_]* + while (BufferPtr != BufferEnd && isIdentifierBody(*BufferPtr)) + ++BufferPtr; + + Result.Kind = tok::identifier; + Result.Loc = CurPtr; + Result.Len = BufferPtr - CurPtr; + + // Check if this is a keyword + std::string Spelling(CurPtr, BufferPtr - CurPtr); + + // Simple keyword lookup - in a real implementation this would use a hash table + if (Spelling == "auto") Result.Kind = tok::kw_auto; + else if (Spelling == "break") Result.Kind = tok::kw_break; + else if (Spelling == "case") Result.Kind = tok::kw_case; + else if (Spelling == "char") Result.Kind = tok::kw_char; + else if (Spelling == "const") Result.Kind = tok::kw_const; + else if (Spelling == "continue") Result.Kind = tok::kw_continue; + else if (Spelling == "default") Result.Kind = tok::kw_default; + else if (Spelling == "do") Result.Kind = tok::kw_do; + else if (Spelling == "double") Result.Kind = tok::kw_double; + else if (Spelling == "else") Result.Kind = tok::kw_else; + else if (Spelling == "enum") Result.Kind = tok::kw_enum; + else if (Spelling == "extern") Result.Kind = tok::kw_extern; + else if (Spelling == "float") Result.Kind = tok::kw_float; + else if (Spelling == "for") Result.Kind = tok::kw_for; + else if (Spelling == "goto") Result.Kind = tok::kw_goto; + else if (Spelling == "if") Result.Kind = tok::kw_if; + else if (Spelling == "int") Result.Kind = tok::kw_int; + else if (Spelling == "long") Result.Kind = tok::kw_long; + else if (Spelling == "register") Result.Kind = tok::kw_register; + else if (Spelling == "return") Result.Kind = tok::kw_return; + else if (Spelling == "short") Result.Kind = tok::kw_short; + else if (Spelling == "signed") Result.Kind = tok::kw_signed; + else if (Spelling == "sizeof") Result.Kind = tok::kw_sizeof; + else if (Spelling == "static") Result.Kind = tok::kw_static; + else if (Spelling == "struct") Result.Kind = tok::kw_struct; + else if (Spelling == "switch") Result.Kind = tok::kw_switch; + else if (Spelling == "typedef") Result.Kind = tok::kw_typedef; + else if (Spelling == "union") Result.Kind = tok::kw_union; + else if (Spelling == "unsigned") Result.Kind = tok::kw_unsigned; + else if (Spelling == "void") Result.Kind = tok::kw_void; + else if (Spelling == "volatile") Result.Kind = tok::kw_volatile; + else if (Spelling == "while") Result.Kind = tok::kw_while; +} + +tok::TokenKind Lexer::tryMatchPunctuator(const char *CurPtr, unsigned &Size) { + switch (*CurPtr) { + case '[': Size = 1; return tok::l_square; + case ']': Size = 1; return tok::r_square; + case '(': Size = 1; return tok::l_paren; + case ')': Size = 1; return tok::r_paren; + case '{': Size = 1; return tok::l_brace; + case '}': Size = 1; return tok::r_brace; + case '.': + if (CurPtr + 2 < BufferEnd && CurPtr[1] == '.' && CurPtr[2] == '.') { + Size = 3; return tok::ellipsis; + } + Size = 1; return tok::period; + case '&': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '&') { + Size = 2; return tok::ampamp; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::ampequal; + } + Size = 1; return tok::amp; + case '*': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::starequal; + } + Size = 1; return tok::star; + case '+': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '+') { + Size = 2; return tok::plusplus; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::plusequal; + } + Size = 1; return tok::plus; + case '-': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '>') { + Size = 2; return tok::arrow; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '-') { + Size = 2; return tok::minusminus; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::minusequal; + } + Size = 1; return tok::minus; + case '~': Size = 1; return tok::tilde; + case '!': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::exclaimequal; + } + Size = 1; return tok::exclaim; + case '/': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::slashequal; + } + Size = 1; return tok::slash; + case '%': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::percentequal; + } + Size = 1; return tok::percent; + case '<': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '<') { + if (CurPtr + 2 < BufferEnd && CurPtr[2] == '=') { + Size = 3; return tok::lesslessequal; + } + Size = 2; return tok::lessless; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::lessequal; + } + Size = 1; return tok::less; + case '>': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '>') { + if (CurPtr + 2 < BufferEnd && CurPtr[2] == '=') { + Size = 3; return tok::greatergreaterequal; + } + Size = 2; return tok::greatergreater; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::greaterequal; + } + Size = 1; return tok::greater; + case '^': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::caretequal; + } + Size = 1; return tok::caret; + case '|': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '|') { + Size = 2; return tok::pipepipe; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::pipeequal; + } + Size = 1; return tok::pipe; + case '?': Size = 1; return tok::question; + case ':': Size = 1; return tok::colon; + case ';': Size = 1; return tok::semi; + case '=': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '=') { + Size = 2; return tok::equalequal; + } + Size = 1; return tok::equal; + case ',': Size = 1; return tok::comma; + case '#': + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '#') { + Size = 2; return tok::hashhash; + } + if (CurPtr + 1 < BufferEnd && CurPtr[1] == '@') { + Size = 2; return tok::hashat; + } + Size = 1; return tok::hash; + default: + Size = 0; + return tok::unknown; + } +} + +std::unique_ptr Lexer::lex() { + // Skip whitespace + if (skipWhitespace()) { + return formToken(tok::eof, BufferPtr); + } + + const char *TokStart = BufferPtr; + + // Handle end of file + if (BufferPtr >= BufferEnd) { + return formToken(tok::eof, BufferPtr); + } + + unsigned char Char = *BufferPtr; + + // Identifier: [a-zA-Z_] + if (isIdentifierHead(Char)) { + auto Result = std::make_unique(); + lexIdentifier(*Result, TokStart); + return Result; + } + + // Numeric constant: [0-9] + if (isdigit(Char)) { + auto Result = std::make_unique(); + lexNumericConstant(*Result); + return Result; + } + + // Punctuator + unsigned Size; + tok::TokenKind Kind = tryMatchPunctuator(TokStart, Size); + if (Kind != tok::unknown) { + BufferPtr += Size; + return formToken(Kind, TokStart); + } + + // Unknown character - report diagnostic + SourceLocation Loc(TokStart); + Diags.report(Loc, diag::err_invalid_character, + std::string("invalid character '") + char(*TokStart) + "'"); + ++BufferPtr; + return formToken(tok::unknown, TokStart); +} + +bool Lexer::equal(Token *Tok, const char *Op) { + return Tok->getSpelling() == Op; +} + +bool Lexer::equal(Token *Tok, tok::TokenKind Kind) { + return Tok->Kind == Kind; +} + +Token *Lexer::skip(Token *Tok, const char *Op) { + if (!equal(Tok, Op)) + errorTok(Tok, "expected '%s'", Op); + return Tok->Next.get(); +} + +} // namespace chibcc \ No newline at end of file From 685f61f6369552c7719606b1c8b91fc9ab75decc Mon Sep 17 00:00:00 2001 From: IamNailong Date: Tue, 4 Nov 2025 17:04:45 +0800 Subject: [PATCH 3/5] Update --- .github/workflows/test.yml | 65 +++++++++++++++++++++++++++ .gitignore | 5 ++- CMakeLists.txt | 6 --- test_compiler.sh | 91 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/test.yml create mode 100644 test_compiler.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..1eaf7de --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,65 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential cmake gcc g++ + + - name: Setup build directory + run: mkdir -p build + + - name: Configure CMake + run: | + cd build + cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ + + - name: Build + run: | + cd build + make -j$(nproc) + + - name: Verify binary exists + run: | + ls -la build/bin/ + test -f build/bin/chibcc + echo "Binary size: $(stat -c%s build/bin/chibcc) bytes" + + - name: Run basic compiler test + run: | + ./build/bin/chibcc "1+1" > test_output.s + echo "Generated assembly:" + cat test_output.s + + # Try to assemble and run + gcc -o test_program test_output.s + ./test_program + echo "Exit code: $?" + + - name: Run comprehensive tests + run: | + chmod +x test_compiler.sh + ./test_compiler.sh + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-gcc + path: | + test_results/ + test_output.s + retention-days: 7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index eb7be4d..f32b71f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ build .cache -install \ No newline at end of file +install +test_cases +test_results +a.out \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index a606663..fd95a24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,12 +5,6 @@ project(ChibCC VERSION 1.0.0 LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# Set compiler flags following LLVM style -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic") -set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") -set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") - -# Include directories include_directories(include) # Source files diff --git a/test_compiler.sh b/test_compiler.sh new file mode 100644 index 0000000..6f79945 --- /dev/null +++ b/test_compiler.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# Test script for chibcc compiler +# Usage: ./test_compiler.sh + +# Don't exit on error, we want to capture and report them + +COMPILER="./build/bin/chibcc" +TEST_DIR="test_cases" +RESULTS_DIR="test_results" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Create directories +mkdir -p "$TEST_DIR" "$RESULTS_DIR" + +# Function to run a test case +run_test() { + local test_name="$1" + local input="$2" + local expected_exit_code="${3:-0}" + + echo -e "${YELLOW}Testing: $test_name${NC}" + echo "Input: $input" + + # Generate assembly + if $COMPILER "$input" > "$RESULTS_DIR/${test_name}.s" 2> "$RESULTS_DIR/${test_name}.err"; then + echo -e "${GREEN}✓ Compilation successful${NC}" + + # Show generated assembly + echo "Generated assembly:" + cat "$RESULTS_DIR/${test_name}.s" + + # Try to assemble and link + if gcc -o "$RESULTS_DIR/${test_name}" "$RESULTS_DIR/${test_name}.s" 2>/dev/null; then + # Run the executable + if ./"$RESULTS_DIR/${test_name}"; then + exit_code=$? + echo -e "${GREEN}✓ Execution successful (exit code: $exit_code)${NC}" + if [ $exit_code -eq $expected_exit_code ]; then + echo -e "${GREEN}✓ Expected exit code matched${NC}" + else + echo -e "${RED}✗ Expected exit code $expected_exit_code, got $exit_code${NC}" + fi + else + exit_code=$? + echo -e "${YELLOW}Program exited with code: $exit_code${NC}" + fi + else + echo -e "${RED}✗ Assembly/linking failed${NC}" + fi + else + echo -e "${RED}✗ Compilation failed${NC}" + if [ -s "$RESULTS_DIR/${test_name}.err" ]; then + echo "Error output:" + cat "$RESULTS_DIR/${test_name}.err" + fi + fi + echo "----------------------------------------" +} + +# Test cases +echo -e "${YELLOW}Starting compiler tests...${NC}" +echo "========================================" + +# Basic arithmetic tests +run_test "simple_addition" "1+1" 2 +run_test "simple_subtraction" "5-3" 2 +run_test "simple_multiplication" "3*4" 12 +run_test "simple_division" "8/2" 4 + +# More complex expressions +run_test "complex_expr1" "1+2*3" 7 +run_test "complex_expr2" "(1+2)*3" 9 +run_test "complex_expr3" "10-2*3" 4 + +# Edge cases +run_test "single_number" "42" 42 +run_test "zero" "0" 0 +run_test "negative" "-5" 251 # -5 as unsigned byte = 251 + +# Parentheses tests +run_test "nested_parens" "((1+2)*3)+4" 13 +run_test "multiple_parens" "(1+2)*(3+4)" 21 + +echo -e "${GREEN}All tests completed!${NC}" +echo "Check $RESULTS_DIR/ for detailed results." \ No newline at end of file From b7d43681a47a6a414e18c4cb579f88421b98951a Mon Sep 17 00:00:00 2001 From: IamNailong Date: Tue, 4 Nov 2025 17:19:51 +0800 Subject: [PATCH 4/5] Update --- .github/workflows/test.yml | 3 +++ test_compiler.sh | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1eaf7de..820dd5b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,6 +44,9 @@ jobs: echo "Generated assembly:" cat test_output.s + # Add GNU stack note to fix linker warning + echo ".section .note.GNU-stack,\"\",@progbits" >> test_output.s + # Try to assemble and run gcc -o test_program test_output.s ./test_program diff --git a/test_compiler.sh b/test_compiler.sh index 6f79945..2cfca5f 100644 --- a/test_compiler.sh +++ b/test_compiler.sh @@ -31,6 +31,9 @@ run_test() { if $COMPILER "$input" > "$RESULTS_DIR/${test_name}.s" 2> "$RESULTS_DIR/${test_name}.err"; then echo -e "${GREEN}✓ Compilation successful${NC}" + # Add GNU stack note to fix linker warning + echo ".section .note.GNU-stack,\"\",@progbits" >> "$RESULTS_DIR/${test_name}.s" + # Show generated assembly echo "Generated assembly:" cat "$RESULTS_DIR/${test_name}.s" From 6a38c63ee535afd6c04e49cb85e5eef466026e7a Mon Sep 17 00:00:00 2001 From: IamNailong Date: Tue, 4 Nov 2025 17:30:58 +0800 Subject: [PATCH 5/5] Update --- .github/workflows/test.yml | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 820dd5b..37cbb9e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,28 +29,7 @@ jobs: - name: Build run: | - cd build - make -j$(nproc) - - - name: Verify binary exists - run: | - ls -la build/bin/ - test -f build/bin/chibcc - echo "Binary size: $(stat -c%s build/bin/chibcc) bytes" - - - name: Run basic compiler test - run: | - ./build/bin/chibcc "1+1" > test_output.s - echo "Generated assembly:" - cat test_output.s - - # Add GNU stack note to fix linker warning - echo ".section .note.GNU-stack,\"\",@progbits" >> test_output.s - - # Try to assemble and run - gcc -o test_program test_output.s - ./test_program - echo "Exit code: $?" + cmake --build build -j32 - name: Run comprehensive tests run: | @@ -64,5 +43,4 @@ jobs: name: test-results-gcc path: | test_results/ - test_output.s - retention-days: 7 \ No newline at end of file + retention-days: 14 \ No newline at end of file