From 1bbddb7d3de68adb993240e31c748ffcce154d1b Mon Sep 17 00:00:00 2001 From: Rob Hand <146272+sinon@users.noreply.github.com> Date: Sun, 3 Aug 2025 17:36:58 +0100 Subject: [PATCH] Move ast to seperate lib crate --- Cargo.lock | 9 ++ Cargo.toml | 2 +- crates/ast/Cargo.toml | 13 +++ crates/ast/src/lib.rs | 168 +++++++++++++++++++++++++++++++ crates/loxide/Cargo.toml | 1 + crates/loxide/src/builtins.rs | 8 +- crates/loxide/src/interpreter.rs | 124 +++++++---------------- crates/loxide/src/lib.rs | 1 - crates/loxide/src/value.rs | 59 ----------- crates/parser/Cargo.toml | 1 + crates/parser/src/lib.rs | 90 +---------------- 11 files changed, 233 insertions(+), 243 deletions(-) create mode 100644 crates/ast/Cargo.toml create mode 100644 crates/ast/src/lib.rs delete mode 100644 crates/loxide/src/value.rs diff --git a/Cargo.lock b/Cargo.lock index 22ee2f9..0a21ac9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -92,6 +92,13 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ast" +version = "0.1.0" +dependencies = [ + "lexer", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -410,6 +417,7 @@ name = "loxide" version = "0.1.0" dependencies = [ "assert_cmd", + "ast", "clap", "insta", "lexer", @@ -495,6 +503,7 @@ checksum = "fb37767f6569cd834a413442455e0f066d0d522de8630436e2a1761d9726ba56" name = "parser" version = "0.1.0" dependencies = [ + "ast", "insta", "lexer", "rstest", diff --git a/Cargo.toml b/Cargo.toml index 8d58a72..6b1c6e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] resolver = "3" -members = ["crates/lexer", "./", "crates/parser", "crates/loxide"] +members = ["crates/lexer", "crates/parser", "crates/loxide", "crates/ast"] [workspace.package] diff --git a/crates/ast/Cargo.toml b/crates/ast/Cargo.toml new file mode 100644 index 0000000..3140f59 --- /dev/null +++ b/crates/ast/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "ast" +version.workspace = true +authors.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +keywords.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +lexer = { path = "../lexer" } diff --git a/crates/ast/src/lib.rs b/crates/ast/src/lib.rs new file mode 100644 index 0000000..070a0d8 --- /dev/null +++ b/crates/ast/src/lib.rs @@ -0,0 +1,168 @@ +use std::fmt::Display; + +use lexer::Token; + +/// The value that an expression has evaluated too, this can be a literal. +#[derive(Clone, Debug)] +pub enum EvaluatedValue { + /// String value `"hello"` + String(String), + /// Number value. Note Lox only supports double precision floating point + Number(f64), + /// nil, the unset/null value + Nil, + /// Boolean value `true`/`false` + Bool(bool), + /// builtin fn + NativeFunction(NativeFunction), + /// fn + LoxFunction { name: String, func_id: u64 }, +} + +impl From for bool { + fn from(val: EvaluatedValue) -> Self { + match val { + EvaluatedValue::String(_) + | EvaluatedValue::Number(_) + | EvaluatedValue::NativeFunction(_) + | EvaluatedValue::LoxFunction { .. } => true, + EvaluatedValue::Nil => false, + EvaluatedValue::Bool(b) => b, + } + } +} + +impl Display for EvaluatedValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(s) => write!(f, "{s}"), + Self::Number(n) => write!(f, "{n}"), + Self::Nil => write!(f, "nil"), + Self::Bool(b) => write!(f, "{b:}"), + Self::NativeFunction(native_fn) => write!(f, "{native_fn:?}"), + Self::LoxFunction { name, func_id } => write!(f, "{name:?}-{func_id:?}"), + } + } +} + +/// A user defined lox function +#[derive(Debug, Clone)] +pub struct LoxFunction<'de> { + /// The identifier + pub name: Token<'de>, + /// The parameter values + pub parameters: Vec>, + /// The body of the function + pub body: Vec>, +} + +/// `NativeFunction` is used to represent builtin native functions +#[derive(Clone, Debug)] +pub struct NativeFunction { + /// `name` of the native function + pub name: String, + /// Numbers of arguments that should be passed to `callable` + pub arity: u8, + /// A function to be run + pub callable: fn(&[EvaluatedValue]) -> Result, +} + +impl NativeFunction { + #[must_use] + pub const fn arity(&self) -> u8 { + self.arity + } + #[allow(clippy::missing_errors_doc)] + pub fn call(&self, args: &[EvaluatedValue]) -> Result { + (self.callable)(args) + } +} + +#[derive(Debug, Clone)] +/// `Stmt` represents the possible statements supported +pub enum Stmt<'de> { + /// A print statement + Print(Expr<'de>), + /// An expression statement + ExpressionStatement(Expr<'de>), + /// Var statement + // var = expr; + Var(&'de str, Option>), + /// Block + Block(Vec>), + /// If statement + If(Expr<'de>, Box>, Option>>), + /// While statement + While { + /// The condition that must be `true` for the body to be run + condition: Expr<'de>, + /// The statements that will be executed repreatedly if `condition` + body: Box>, + }, + /// Func statement + Function { + name: Token<'de>, + parameters: Vec>, + body: Vec>, + }, +} + +#[derive(Debug, Clone)] +/// `Expr` represents a unit of an AST +pub enum Expr<'de> { + /// `Binary` is a binary expression such as `1 * 2` + Binary { + /// The left item `Expr` in an expression + left: Box>, + /// The operator to be applied on the `left` and `right` `Expr` + operator: Token<'de>, + /// The right item `Expr` in an expression. + right: Box>, + }, + /// `Unary` is a unary expression such as `!true` + Unary { + /// The operator to be applied on the `right` `Expr` + operator: Token<'de>, + /// The expression the unary operator will be applied to + right: Box>, + }, + /// `Literal` is a value + Literal(LiteralAtom<'de>), + /// `Grouping` holds other `Expr` such as `(1 * 2)` + Grouping(Box>), + /// `Variable` + Variable(Token<'de>), + /// `Assign` + Assign(&'de str, Box>), + /// `Logical` - `or` and `and` + Logical { + /// The left expression of a Logical expression + left: Box>, + /// The operator of a Logical expression + operator: Token<'de>, + /// The right expression of a Logical expression + right: Box>, + }, + /// Function `Call` + Call { + /// function to be called + callee: Box>, + /// paren token + paren: Token<'de>, + /// arguments to be passed to function call + arguments: Vec>, + }, +} + +#[derive(Debug, Clone, PartialEq)] +/// `LiteralAtom` represents the types of literals supported by Lox +pub enum LiteralAtom<'de> { + /// `String` literal for example `"foo"` + String(&'de str), + /// Number literal for example `123.1` + Number(f64), + /// Nil literal + Nil, + /// Bool literals `false` or `true` + Bool(bool), +} diff --git a/crates/loxide/Cargo.toml b/crates/loxide/Cargo.toml index dcc12ab..7ddb02b 100644 --- a/crates/loxide/Cargo.toml +++ b/crates/loxide/Cargo.toml @@ -13,6 +13,7 @@ repository.workspace = true clap = { version = "4.5.23", features = ["derive"] } lexer = { path = "../lexer" } parser = { path = "../parser" } +ast = { path = "../ast" } miette = { workspace = true } [dev-dependencies] diff --git a/crates/loxide/src/builtins.rs b/crates/loxide/src/builtins.rs index 147a4ec..58a4d13 100644 --- a/crates/loxide/src/builtins.rs +++ b/crates/loxide/src/builtins.rs @@ -1,12 +1,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; -use crate::interpreter; -use crate::value::EvaluatedValue; +use ast::EvaluatedValue; -pub fn clock( - _interpreter: &mut interpreter::Interpreter, - _args: &[EvaluatedValue], -) -> Result { +pub fn clock(_args: &[EvaluatedValue]) -> Result { let start = SystemTime::now(); #[allow(clippy::cast_precision_loss)] start.duration_since(UNIX_EPOCH).map_or_else( diff --git a/crates/loxide/src/interpreter.rs b/crates/loxide/src/interpreter.rs index eb19c40..9bb80b4 100644 --- a/crates/loxide/src/interpreter.rs +++ b/crates/loxide/src/interpreter.rs @@ -3,77 +3,37 @@ //! Responsible for running the AST and returning the computed values //! -use std::{cell::RefCell, collections::HashMap, fmt, rc::Rc}; +use std::{cell::RefCell, collections::HashMap, rc::Rc}; -use crate::{builtins, value::EvaluatedValue}; -use lexer::{Token, TokenType}; -use parser::{Expr, LiteralAtom, Parser, Stmt}; +use crate::builtins; +use ast::{EvaluatedValue, Expr, LiteralAtom, LoxFunction, NativeFunction, Stmt}; +use lexer::TokenType; +use parser::Parser; -/// `NativeFunction` is used to represent builtin native functions -#[derive(Clone)] -pub struct NativeFunction { - /// `name` of the native function - pub name: String, - /// Numbers of arguments that should be passed to `callable` - pub arity: u8, - /// A function to be run - pub callable: fn(&mut Interpreter, &[EvaluatedValue]) -> Result, -} - -impl fmt::Debug for NativeFunction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "NativeFunction({})", self.name) - } -} - -impl NativeFunction { - const fn arity(&self, _interpreter: &Interpreter) -> u8 { - self.arity - } - fn call( - &self, - interpreter: &mut Interpreter, - args: &[EvaluatedValue], - ) -> Result { - (self.callable)(interpreter, args) - } -} - -/// A user defined lox function -#[derive(Debug, Clone)] -pub struct LoxFunction<'de> { - /// The identifier - pub name: Token<'de>, - /// The parameter values - pub parameters: Vec>, - /// The body of the function - pub body: Vec>, -} - -impl<'de> LoxFunction<'de> { - #[allow(dead_code)] - fn arity(&self) -> u8 { - u8::try_from(self.parameters.len()).expect("arity < 255 is enforced by parser") - } - #[allow(dead_code)] - fn call( - &self, - interpreter: &mut Interpreter<'de>, - args: &[EvaluatedValue], - ) -> Result { - let _args_env: HashMap<_, _> = self - .parameters - .iter() - .zip(args.iter()) - .map(|(param, arg)| (param.origin, (Some(arg.clone()),))) - .collect(); - let saved_env = interpreter.environment.clone(); - let block = Stmt::Block(self.body.clone()); - evaluate_statement(&block, interpreter)?; - interpreter.environment = saved_env; - todo!("Implement the `call` fn for `LoxFunction`") - } -} +// impl<'de> LoxFunction<'de> { +// #[allow(dead_code)] +// fn arity(&self) -> u8 { +// u8::try_from(self.parameters.len()).expect("arity < 255 is enforced by parser") +// } +// #[allow(dead_code)] +// fn call( +// &self, +// interpreter: &mut Interpreter<'de>, +// args: &[EvaluatedValue], +// ) -> Result { +// let _args_env: HashMap<_, _> = self +// .parameters +// .iter() +// .zip(args.iter()) +// .map(|(param, arg)| (param.origin, (Some(arg.clone()),))) +// .collect(); +// let saved_env = interpreter.environment.clone(); +// let block = Stmt::Block(self.body.clone()); +// evaluate_statement(&block, interpreter)?; +// interpreter.environment = saved_env; +// todo!("Implement the `call` fn for `LoxFunction`") +// } +// } /// `Interpreter` /// responsible for iterating over the rusults of parser @@ -247,7 +207,8 @@ fn evaluate_statement<'de>( .clone(); } Stmt::While { condition, body } => loop { - if !(evaluate_expression(condition, interpreter)?.is_truthy()) { + let cond_val: bool = evaluate_expression(condition, interpreter)?.into(); + if !(cond_val) { break; } evaluate_statement(body, interpreter)?; @@ -363,7 +324,7 @@ fn evaluate_expression<'de>( _ => panic!("{operator:?} is not for suppoer Bool / Bool binary"), } } - (l, r, op) => todo!("Add handling for {l} {r} {op:?}"), + (l, r, op) => todo!("Add handling for {l:?} {r:?} {op:?}"), } } Expr::Unary { operator, right } => { @@ -388,25 +349,14 @@ fn evaluate_expression<'de>( true => Ok(EvaluatedValue::Bool(false)), false => Ok(EvaluatedValue::Bool(true)), }, - EvaluatedValue::NativeFunction(_f) => todo!(), - EvaluatedValue::LoxFunction { - name: _, - func_id: _, - } => todo!(), + _ => panic!("! should not be called with {v}"), }, ), TokenType::Minus => r.as_ref().map_or_else( |_| todo!(), |v| match v { - EvaluatedValue::String(_) => todo!(), EvaluatedValue::Number(n) => Ok(EvaluatedValue::Number(-n)), - EvaluatedValue::Nil => todo!(), - EvaluatedValue::Bool(_) => todo!(), - EvaluatedValue::NativeFunction(_f) => todo!(), - EvaluatedValue::LoxFunction { - name: _, - func_id: _, - } => todo!(), + _ => panic!("Minus should not be called with non-Number such as {v}"), }, ), // TODO: Make unrepresentable by narrowing `operator` to `UnaryOperator:Not|Negate` @@ -449,7 +399,7 @@ fn evaluate_expression<'de>( right, } => { let left_val = evaluate_expression(left, interpreter)?; - let left_truth: bool = left_val.is_truthy(); + let left_truth: bool = left_val.clone().into(); if operator.token_type == TokenType::Or { if left_truth { return Ok(left_val); @@ -472,11 +422,11 @@ fn evaluate_expression<'de>( match &callee_fn { EvaluatedValue::NativeFunction(f) => { - if f.arity(interpreter) as usize != args.len() { + if f.arity() as usize != args.len() { eprintln!("Expected {} arguments but got {}.", f.arity, args.len()); return Err("Incorrect arity".to_string()); } - f.call(interpreter, &args) + f.call(&args) } EvaluatedValue::LoxFunction { func_id, .. } => { let _ = interpreter.get_lox_fn(*func_id); diff --git a/crates/loxide/src/lib.rs b/crates/loxide/src/lib.rs index 248b449..7f9798e 100644 --- a/crates/loxide/src/lib.rs +++ b/crates/loxide/src/lib.rs @@ -9,4 +9,3 @@ #![warn(missing_docs)] mod builtins; pub mod interpreter; -mod value; diff --git a/crates/loxide/src/value.rs b/crates/loxide/src/value.rs deleted file mode 100644 index 711a101..0000000 --- a/crates/loxide/src/value.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::fmt::Display; - -use crate::interpreter::NativeFunction; - -/// The value that an expression has evaluated too, this can be a literal. -#[derive(Clone, Debug)] -pub enum EvaluatedValue { - /// String value `"hello"` - String(String), - /// Number value. Note Lox only supports double precision floating point - Number(f64), - /// nil, the unset/null value - Nil, - /// Boolean value `true`/`false` - Bool(bool), - /// builtin fn - NativeFunction(NativeFunction), - /// fn - LoxFunction { name: String, func_id: u64 }, -} - -impl EvaluatedValue { - pub(crate) const fn is_truthy(&self) -> bool { - match self { - Self::String(_) - | Self::Number(_) - | Self::NativeFunction(_) - | Self::LoxFunction { .. } => true, - Self::Nil => false, - Self::Bool(b) => *b, - } - } -} - -impl From for bool { - fn from(val: EvaluatedValue) -> Self { - match val { - EvaluatedValue::String(_) - | EvaluatedValue::Number(_) - | EvaluatedValue::NativeFunction(_) - | EvaluatedValue::LoxFunction { .. } => true, - EvaluatedValue::Nil => false, - EvaluatedValue::Bool(b) => b, - } - } -} - -impl Display for EvaluatedValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::String(s) => write!(f, "{s}"), - Self::Number(n) => write!(f, "{n}"), - Self::Nil => write!(f, "nil"), - Self::Bool(b) => write!(f, "{b:}"), - Self::NativeFunction(native_fn) => write!(f, "{native_fn:?}"), - Self::LoxFunction { name, func_id } => write!(f, "{name:?}-{func_id:?}"), - } - } -} diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index fa05b58..42a9ba7 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -11,6 +11,7 @@ repository.workspace = true [dependencies] lexer = { path = "../lexer" } +ast = { path = "../ast" } [dev-dependencies] diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index 1c95951..2dab99e 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -4,6 +4,7 @@ //! //! Uses a recursive desecent parser. To transform the token stream into //! `Expr` +use ast::{Expr, LiteralAtom, Stmt}; use lexer::{Lexer, Token, TokenType}; /// `Parser` is responsible for iterating over the token stream from `Lexer` @@ -14,95 +15,6 @@ pub struct Parser<'de> { parse_failed: bool, } -#[derive(Debug, Clone, PartialEq)] -/// `LiteralAtom` represents the types of literals supported by Lox -pub enum LiteralAtom<'de> { - /// `String` literal for example `"foo"` - String(&'de str), - /// Number literal for example `123.1` - Number(f64), - /// Nil literal - Nil, - /// Bool literals `false` or `true` - Bool(bool), -} - -#[derive(Debug, Clone)] -/// `Expr` represents a unit of an AST -pub enum Expr<'de> { - /// `Binary` is a binary expression such as `1 * 2` - Binary { - /// The left item `Expr` in an expression - left: Box>, - /// The operator to be applied on the `left` and `right` `Expr` - operator: Token<'de>, - /// The right item `Expr` in an expression. - right: Box>, - }, - /// `Unary` is a unary expression such as `!true` - Unary { - /// The operator to be applied on the `right` `Expr` - operator: Token<'de>, - /// The expression the unary operator will be applied to - right: Box>, - }, - /// `Literal` is a value - Literal(LiteralAtom<'de>), - /// `Grouping` holds other `Expr` such as `(1 * 2)` - Grouping(Box>), - /// `Variable` - Variable(Token<'de>), - /// `Assign` - Assign(&'de str, Box>), - /// `Logical` - `or` and `and` - Logical { - /// The left expression of a Logical expression - left: Box>, - /// The operator of a Logical expression - operator: Token<'de>, - /// The right expression of a Logical expression - right: Box>, - }, - /// Function `Call` - Call { - /// function to be called - callee: Box>, - /// paren token - paren: Token<'de>, - /// arguments to be passed to function call - arguments: Vec>, - }, -} - -#[derive(Debug, Clone)] -/// `Stmt` represents the possible statements supported -pub enum Stmt<'de> { - /// A print statement - Print(Expr<'de>), - /// An expression statement - ExpressionStatement(Expr<'de>), - /// Var statement - // var = expr; - Var(&'de str, Option>), - /// Block - Block(Vec>), - /// If statement - If(Expr<'de>, Box>, Option>>), - /// While statement - While { - /// The condition that must be `true` for the body to be run - condition: Expr<'de>, - /// The statements that will be executed repreatedly if `condition` - body: Box>, - }, - /// Func statement - Function { - name: Token<'de>, - parameters: Vec>, - body: Vec>, - }, -} - impl<'de> Iterator for Parser<'de> { type Item = Result, String>;