diff --git a/src/b.rs b/src/b.rs index 121ef5ce..03a8fe45 100644 --- a/src/b.rs +++ b/src/b.rs @@ -28,7 +28,22 @@ use crust::libc::*; use crust::assoc_lookup_cstr; use arena::Arena; use targets::*; -use lexer::{Lexer, Loc, Token}; +use lexer::{Lexer, Loc, Token, ErrorKind}; + +pub unsafe fn get_token(l: *mut Lexer, c: *mut Compiler) -> Option<()> { + match lexer::get_token(l) { + Ok(()) => Some(()), + Err(ErrorKind::Error) => bump_error_count(c), + Err(ErrorKind::Fatal) => bump_error_count(c).and(None), + } +} + +pub unsafe fn peek_token(l: *mut Lexer, c: *mut Compiler) -> Option { + match lexer::peek_token(l) { + Some(token) => Some(token), + None => bump_error_count(c).and(None), + } +} pub unsafe fn expect_tokens(l: *mut Lexer, tokens: *const [Token]) -> Option<()> { for i in 0..tokens.len() { @@ -60,25 +75,23 @@ pub unsafe fn expect_token(l: *mut Lexer, token: Token) -> Option<()> { expect_tokens(l, &[token]) } -pub unsafe fn get_and_expect_token(l: *mut Lexer, token: Token) -> Option<()> { - lexer::get_token(l)?; +pub unsafe fn get_and_expect_token(l: *mut Lexer, c: *mut Compiler, token: Token) -> Option<()> { + get_token(l, c)?; expect_token(l, token) } pub unsafe fn get_and_expect_token_but_continue(l: *mut Lexer, c: *mut Compiler, token: Token) -> Option<()> { - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if expect_token(l, token).is_none() { - (*l).parse_point = saved_point; + peek_token(l, c)?; + if let None = expect_token(l, token) { bump_error_count(c) } else { - Some(()) + get_token(l, c) } } -pub unsafe fn get_and_expect_tokens(l: *mut Lexer, clexes: *const [Token]) -> Option<()> { - lexer::get_token(l)?; - expect_tokens(l, clexes) +pub unsafe fn get_and_expect_tokens(l: *mut Lexer, c: *mut Compiler, tokens: *const [Token]) -> Option<()> { + get_token(l, c)?; + expect_tokens(l, tokens) } pub unsafe fn expect_token_id(l: *mut Lexer, id: *const c_char) -> Option<()> { @@ -90,8 +103,8 @@ pub unsafe fn expect_token_id(l: *mut Lexer, id: *const c_char) -> Option<()> { Some(()) } -pub unsafe fn get_and_expect_token_id(l: *mut Lexer, id: *const c_char) -> Option<()> { - lexer::get_token(l)?; +pub unsafe fn get_and_expect_token_id(l: *mut Lexer, c: *mut Compiler, id: *const c_char) -> Option<()> { + get_token(l, c)?; expect_token_id(l, id) } @@ -385,7 +398,7 @@ pub unsafe fn compile_string(string: *const c_char, c: *mut Compiler) -> usize { } pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Option<(Arg, bool)> { - lexer::get_token(l)?; + get_token(l, c)?; let arg = match (*l).token { Token::OParen => { let result = compile_expression(l, c)?; @@ -483,12 +496,13 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt let (mut arg, mut is_lvalue) = arg?; loop { - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - - (arg, is_lvalue) = match (*l).token { - Token::OParen => Some((compile_function_call(l, c, arg)?, false)), + (arg, is_lvalue) = match peek_token(l, c)? { + Token::OParen => { + get_token(l, c)?; + Some((compile_function_call(l, c, arg)?, false)) + } Token::OBracket => { + get_token(l, c)?; let (offset, _) = compile_expression(l, c)?; get_and_expect_token_but_continue(l, c, Token::CBracket)?; @@ -498,6 +512,7 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt Some((Arg::Deref(result), true)) } Token::PlusPlus => { + get_token(l, c)?; let loc = (*l).loc; if !is_lvalue { diagf!(loc, c!("ERROR: cannot increment an rvalue\n")); @@ -511,6 +526,7 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt Some((Arg::AutoVar(pre), false)) } Token::MinusMinus => { + get_token(l, c)?; let loc = (*l).loc; if !is_lvalue { diagf!(loc, c!("ERROR: cannot decrement an rvalue\n")); @@ -524,7 +540,6 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt Some((Arg::AutoVar(pre), false)) } _ => { - (*l).parse_point = saved_point; return Some((arg, is_lvalue)); } }?; @@ -561,39 +576,31 @@ pub unsafe fn compile_binop_expression(l: *mut Lexer, c: *mut Compiler, preceden let (mut lhs, mut lvalue) = compile_binop_expression(l, c, precedence + 1)?; - let mut saved_point = (*l).parse_point; - lexer::get_token(l)?; - - if let Some(binop) = Binop::from_token((*l).token) { + if let Some(binop) = Binop::from_token(peek_token(l, c)?) { if binop.precedence() == precedence { - while let Some(binop) = Binop::from_token((*l).token) { + while let Some(binop) = Binop::from_token(peek_token(l, c)?) { if binop.precedence() != precedence { break; } + get_token(l, c)?; let (rhs, _) = compile_binop_expression(l, c, precedence + 1)?; let index = allocate_auto_var(&mut (*c).auto_vars_ator); push_opcode(Op::Binop {binop, index, lhs, rhs}, (*l).loc, c); - lhs = Arg::AutoVar(index); + lhs = Arg::AutoVar(index); lvalue = false; - - saved_point = (*l).parse_point; - lexer::get_token(l)?; } } } - (*l).parse_point = saved_point; Some((lhs, lvalue)) } pub unsafe fn compile_assign_expression(l: *mut Lexer, c: *mut Compiler) -> Option<(Arg, bool)> { let (lhs, mut lvalue) = compile_binop_expression(l, c, 0)?; - let mut saved_point = (*l).parse_point; - lexer::get_token(l)?; - - while let Some(binop) = Binop::from_assign_token((*l).token) { + while let Some(binop) = Binop::from_assign_token(peek_token(l, c)?) { + get_token(l, c)?; let binop_loc = (*l).loc; let (rhs, _) = compile_assign_expression(l, c)?; @@ -623,12 +630,10 @@ pub unsafe fn compile_assign_expression(l: *mut Lexer, c: *mut Compiler) -> Opti } lvalue = false; - - saved_point = (*l).parse_point; - lexer::get_token(l)?; } - if (*l).token == Token::Question { + if peek_token(l, c)? == Token::Question { + get_token(l, c)?; let result = allocate_auto_var(&mut (*c).auto_vars_ator); let else_label = allocate_label_index(c); @@ -648,7 +653,6 @@ pub unsafe fn compile_assign_expression(l: *mut Lexer, c: *mut Compiler) -> Opti Some((Arg::AutoVar(result), false)) } else { - (*l).parse_point = saved_point; Some((lhs, lvalue)) } } @@ -658,25 +662,21 @@ pub unsafe fn compile_expression(l: *mut Lexer, c: *mut Compiler) -> Option<(Arg } pub unsafe fn compile_block(l: *mut Lexer, c: *mut Compiler) -> Option<()> { - loop { - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if (*l).token == Token::CCurly { return Some(()); } - (*l).parse_point = saved_point; - - compile_statement(l, c)? + while peek_token(l, c)? != Token::CCurly { + compile_statement(l, c)?; } + get_and_expect_token(l, c, Token::CCurly) } - unsafe fn compile_function_call(l: *mut Lexer, c: *mut Compiler, fun: Arg) -> Option { + +unsafe fn compile_function_call(l: *mut Lexer, c: *mut Compiler, fun: Arg) -> Option { let mut args: Array = zeroed(); - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if (*l).token != Token::CParen { - (*l).parse_point = saved_point; + if peek_token(l, c)? == Token::CParen { + get_token(l, c)?; + } else { loop { let (expr, _) = compile_expression(l, c)?; da_append(&mut args, expr); - get_and_expect_tokens(l, &[Token::CParen, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::CParen, Token::Comma])?; match (*l).token { Token::CParen => break, Token::Comma => continue, @@ -701,12 +701,11 @@ pub unsafe fn name_declare_if_not_exists(names: *mut Array<*const c_char>, name: pub unsafe fn compile_asm_stmts(l: *mut Lexer, c: *mut Compiler, stmts: *mut Array) -> Option<()> { get_and_expect_token_but_continue(l, c, Token::OParen)?; - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if (*l).token != Token::CParen { - (*l).parse_point = saved_point; + if peek_token(l, c)? == Token::CParen { + get_token(l, c)?; + } else { loop { - get_and_expect_token(l, Token::String)?; + get_and_expect_token(l, c, Token::String)?; match (*l).token { Token::String => { let line = arena::strdup(&mut (*c).arena, (*l).string); @@ -716,7 +715,7 @@ pub unsafe fn compile_asm_stmts(l: *mut Lexer, c: *mut Compiler, stmts: *mut Arr _ => unreachable!(), } - get_and_expect_tokens(l, &[Token::Comma, Token::CParen])?; + get_and_expect_tokens(l, c, &[Token::Comma, Token::CParen])?; match (*l).token { Token::Comma => {} Token::CParen => break, @@ -729,14 +728,13 @@ pub unsafe fn compile_asm_stmts(l: *mut Lexer, c: *mut Compiler, stmts: *mut Arr } pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - - match (*l).token { + match peek_token(l, c)? { Token::SemiColon => { + get_token(l, c)?; Some(()) }, Token::OCurly => { + get_token(l, c)?; scope_push(&mut (*c).vars); let saved_auto_vars_count = (*c).auto_vars_ator.count; compile_block(l, c)?; @@ -745,22 +743,24 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } Token::Extrn => { + get_token(l, c)?; while (*l).token != Token::SemiColon { - get_and_expect_token(l, Token::ID)?; + get_and_expect_token(l, c, Token::ID)?; let name = arena::strdup(&mut (*c).arena, (*l).string); name_declare_if_not_exists(&mut (*c).extrns, name); declare_var(c, name, (*l).loc, Storage::External {name})?; - get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::Comma])?; } compile_statement(l, c) } Token::Auto => { + get_token(l, c)?; while (*l).token != Token::SemiColon { - get_and_expect_token(l, Token::ID)?; + get_and_expect_token(l, c, Token::ID)?; let name = arena::strdup(&mut (*c).arena, (*l).string); let index = allocate_auto_var(&mut (*c).auto_vars_ator); declare_var(c, name, (*l).loc, Storage::Auto {index})?; - get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma, Token::IntLit, Token::CharLit])?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::Comma, Token::IntLit, Token::CharLit])?; if (*l).token == Token::IntLit || (*l).token == Token::CharLit { let size = (*l).int_number as usize; if size == 0 { @@ -774,12 +774,13 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { // See TODO(2025-06-05 17:45:36) let arg = Arg::RefAutoVar(index + size); push_opcode(Op::AutoAssign {index, arg}, (*l).loc, c); - get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::Comma])?; } } compile_statement(l, c) } Token::If => { + get_token(l, c)?; get_and_expect_token_but_continue(l, c, Token::OParen)?; let saved_auto_vars_count = (*c).auto_vars_ator.count; let (cond, _) = compile_expression(l, c)?; @@ -790,22 +791,21 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { compile_statement(l, c)?; - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if (*l).token == Token::Else { + if peek_token(l, c)? == Token::Else { + get_token(l, c)?; let out_label = allocate_label_index(c); push_opcode(Op::JmpLabel{label: out_label}, (*l).loc, c); push_opcode(Op::Label{label: else_label}, (*l).loc, c); compile_statement(l, c)?; push_opcode(Op::Label{label: out_label}, (*l).loc, c); } else { - (*l).parse_point = saved_point; push_opcode(Op::Label{label: else_label}, (*l).loc, c); } Some(()) } Token::While => { + get_token(l, c)?; let cond_label = allocate_label_index(c); push_opcode(Op::Label {label: cond_label}, (*l).loc, c); @@ -825,7 +825,8 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } Token::Return => { - get_and_expect_tokens(l, &[Token::SemiColon, Token::OParen])?; + get_token(l, c)?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::OParen])?; if (*l).token == Token::SemiColon { push_opcode(Op::Return {arg: None}, (*l).loc, c); } else if (*l).token == Token::OParen { @@ -839,7 +840,8 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } Token::Goto => { - get_and_expect_token(l, Token::ID)?; + get_token(l, c)?; + get_and_expect_token(l, c, Token::ID)?; let name = arena::strdup(&mut (*c).arena, (*l).string); let loc = (*l).loc; let addr = (*c).func_body.count; @@ -849,6 +851,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } Token::Asm => { + get_token(l, c)?; let loc = (*l).loc; let mut stmts: Array = zeroed(); compile_asm_stmts(l, c, &mut stmts)?; @@ -856,8 +859,9 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } Token::Case => { + get_token(l, c)?; let case_loc = (*l).loc; - lexer::get_token(l); + get_token(l, c)?; expect_tokens(l, &[Token::IntLit, Token::CharLit])?; // TODO: String ??! let case_value = (*l).int_number; get_and_expect_token_but_continue(l, c, Token::Colon)?; @@ -893,6 +897,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { } } Token::Switch => { + get_token(l, c)?; let saved_auto_vars_count = (*c).auto_vars_ator.count; let switch_loc = (*l).loc; @@ -913,18 +918,20 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> { Some(()) } _ => { - if (*l).token == Token::ID { - let name = arena::strdup(&mut (*c).arena, (*l).string); - let name_loc = (*l).loc; - lexer::get_token(l)?; + if peek_token(l, c)? == Token::ID { + let saved_point = (*l).parse_point; + get_token(l, c)?; + get_token(l, c)?; if (*l).token == Token::Colon { + let name = arena::strdup(&mut (*c).arena, (*l).string); + let name_loc = (*l).loc; let label = allocate_label_index(c); push_opcode(Op::Label{label}, name_loc, c); define_goto_label(c, name, name_loc, label)?; return Some(()); } + (*l).parse_point = saved_point; } - (*l).parse_point = saved_point; let saved_auto_vars_count = (*c).auto_vars_ator.count; compile_expression(l, c)?; (*c).auto_vars_ator.count = saved_auto_vars_count; @@ -1037,7 +1044,7 @@ pub unsafe fn bump_error_count(c: *mut Compiler) -> Option<()> { pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { 'def: loop { - lexer::get_token(l)?; + get_token(l, c)?; match (*l).token { Token::EOF => break 'def, Token::Variadic => { @@ -1066,11 +1073,11 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { } Token::Extrn => { while (*l).token != Token::SemiColon { - get_and_expect_token(l, Token::ID)?; + get_and_expect_token(l, c, Token::ID)?; let name = arena::strdup(&mut (*c).arena, (*l).string); name_declare_if_not_exists(&mut (*c).extrns, name); declare_var(c, name, (*l).loc, Storage::External {name})?; - get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::Comma])?; } } _ => { @@ -1079,25 +1086,22 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { let name_loc = (*l).loc; declare_var(c, name, name_loc, Storage::External{name})?; - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - - match (*l).token { + match peek_token(l, c)? { Token::OParen => { // Function definition + get_token(l, c)?; scope_push(&mut (*c).vars); // begin function scope let mut params_count = 0; - let saved_point = (*l).parse_point; - lexer::get_token(l)?; - if (*l).token != Token::CParen { - (*l).parse_point = saved_point; + if peek_token(l, c)? == Token::CParen { + get_token(l, c)?; + } else { 'params: loop { - get_and_expect_token(l, Token::ID)?; + get_and_expect_token(l, c, Token::ID)?; let name = arena::strdup(&mut (*c).arena, (*l).string); let name_loc = (*l).loc; let index = allocate_auto_var(&mut (*c).auto_vars_ator); declare_var(c, name, name_loc, Storage::Auto{index})?; params_count += 1; - get_and_expect_tokens(l, &[Token::CParen, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::CParen, Token::Comma])?; match (*l).token { Token::CParen => break 'params, Token::Comma => continue 'params, @@ -1133,13 +1137,12 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { (*c).op_label_count = 0; } Token::Asm => { // Assembly function definition + get_token(l, c)?; let mut body: Array = zeroed(); compile_asm_stmts(l, c, &mut body)?; da_append(&mut (*c).asm_funcs, AsmFunc {name, name_loc, body}); } _ => { // Variable definition - (*l).parse_point = saved_point; - let mut global = Global { name, values: zeroed(), @@ -1149,22 +1152,22 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { // TODO: This code is ugly // couldn't find a better way to write it while keeping accurate error messages - get_and_expect_tokens(l, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID, Token::SemiColon, Token::OBracket])?; + get_and_expect_tokens(l, c, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID, Token::SemiColon, Token::OBracket])?; if (*l).token == Token::OBracket { global.is_vec = true; - get_and_expect_tokens(l, &[Token::IntLit, Token::CBracket])?; + get_and_expect_tokens(l, c, &[Token::IntLit, Token::CBracket])?; if (*l).token == Token::IntLit { global.minimum_size = (*l).int_number as usize; get_and_expect_token_but_continue(l, c, Token::CBracket)?; } - get_and_expect_tokens(l, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID, Token::SemiColon])?; + get_and_expect_tokens(l, c, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID, Token::SemiColon])?; } while (*l).token != Token::SemiColon { let value = match (*l).token { Token::Minus => { - get_and_expect_token(l, Token::IntLit)?; + get_and_expect_token(l, c, Token::IntLit)?; ImmediateValue::Literal(!(*l).int_number + 1) } Token::IntLit | Token::CharLit => ImmediateValue::Literal((*l).int_number), @@ -1183,9 +1186,9 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> { }; da_append(&mut global.values, value); - get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?; + get_and_expect_tokens(l, c, &[Token::SemiColon, Token::Comma])?; if (*l).token == Token::Comma { - get_and_expect_tokens(l, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID])?; + get_and_expect_tokens(l, c, &[Token::Minus, Token::IntLit, Token::CharLit, Token::String, Token::ID])?; } else { break; } diff --git a/src/lexer.rs b/src/lexer.rs index 926d6472..bc0e937f 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -29,11 +29,18 @@ macro_rules! missingf { }} } +pub type Result = core::result::Result<(), ErrorKind>; + +#[derive(Clone, Copy)] +pub enum ErrorKind { + Error, // An (invalid) token is available; compilation can continue. + Fatal, // No token is available; compilation must be terminated. +} + #[derive(Clone, Copy, PartialEq)] pub enum Token { // Terminal EOF, - ParseError, // Values ID, @@ -99,7 +106,6 @@ pub unsafe fn display_token(token: Token) -> *const c_char { match token { // Terminal Token::EOF => c!("end of file"), - Token::ParseError => c!("parse error"), // Values Token::ID => c!("identifier"), @@ -286,6 +292,9 @@ pub struct Lexer { pub string: *const c_char, pub int_number: u64, pub loc: Loc, + + pub next_result: Option, + pub next_point: Parse_Point, } pub unsafe fn new(input_path: *const c_char, input_stream: *const c_char, eof: *const c_char, historical: bool) -> Lexer { @@ -297,6 +306,7 @@ pub unsafe fn new(input_path: *const c_char, input_stream: *const c_char, eof: * l.parse_point.line_start = input_stream; l.parse_point.line_number = 1; l.historical = historical; + l.next_result = None; l } @@ -372,17 +382,19 @@ pub unsafe fn loc(l: *mut Lexer) -> Loc { } } -pub unsafe fn parse_string_into_storage(l: *mut Lexer, delim: c_char) -> Option<()> { +pub unsafe fn parse_string_into_storage(l: *mut Lexer, delim: c_char) -> Result { let escape_char = if !(*l).historical { '\\' } else {'*'} as c_char; + skip_char(l); + let mut result = Ok(()); while let Some(x) = peek_char(l) { match x { x if x == escape_char => { skip_char(l); let Some(x) = peek_char(l) else { - (*l).token = Token::ParseError; diagf!(loc(l), c!("LEXER ERROR: Unfinished escape sequence\n")); - return None; + result = Err(ErrorKind::Fatal); + break; }; let x = match x { x if x == '0' as c_char => '\0' as c_char, @@ -392,9 +404,9 @@ pub unsafe fn parse_string_into_storage(l: *mut Lexer, delim: c_char) -> Option< x if x == delim => delim, x if x == escape_char => escape_char, x => { - (*l).token = Token::ParseError; diagf!(loc(l), c!("LEXER ERROR: Unknown escape sequence starting with `%c`\n"), x as c_int); - return None; + result = Err(ErrorKind::Error); + continue; } }; da_append(&mut (*l).string_storage, x); @@ -407,7 +419,8 @@ pub unsafe fn parse_string_into_storage(l: *mut Lexer, delim: c_char) -> Option< }, } } - Some(()) + if !is_eof(l) { skip_char(l); } + result } #[repr(u8)] @@ -439,44 +452,71 @@ unsafe fn parse_digit(c: c_char, radix: Radix) -> Option { return None; } -unsafe fn parse_number(l: *mut Lexer, radix: Radix, report_point: Parse_Point) -> Option<()> { +unsafe fn parse_number(l: *mut Lexer, radix: Radix) -> Result { + let mut overflow = false; while let Some(x) = peek_char(l) { - let Some(d) = parse_digit(x, radix) else { + let Some(digit) = parse_digit(x, radix) else { break; }; + skip_char(l); let Some(r) = (*l).int_number.checked_mul(radix as u64) else { - (*l).parse_point = report_point; - diagf!(loc(l), c!("LEXER ERROR: Constant integer overflow\n")); - return None; + overflow = true; + continue; }; (*l).int_number = r; - let Some(r) = (*l).int_number.checked_add(d as u64) else { - (*l).parse_point = report_point; - diagf!(loc(l), c!("LEXER ERROR: Constant integer overflow.\n")); - return None; + let Some(r) = (*l).int_number.checked_add(digit as u64) else { + overflow = true; + continue; }; (*l).int_number = r; - skip_char(l); - }; + } + if (*l).historical && matches!(radix, Radix::Hex) { + diagf!((*l).loc, c!("LEXER ERROR: Hex literals are not available in historical mode\n")); + return Err(ErrorKind::Error); + } + if overflow { + diagf!((*l).loc, c!("LEXER ERROR: Integer literal overflow\n")); + return Err(ErrorKind::Error); + } + Ok(()) +} - return Some(()); +pub unsafe fn peek_token(l: *mut Lexer) -> Option { + let Some(result) = (*l).next_result else { + let saved_point = (*l).parse_point; + (*l).next_result = Some(get_token(l)); + (*l).next_point = (*l).parse_point; + (*l).parse_point = saved_point; + return peek_token(l); + }; + match result { + Ok(()) => Some((*l).token), + Err(ErrorKind::Error) => Some((*l).token), + Err(ErrorKind::Fatal) => None, + } } -pub unsafe fn get_token(l: *mut Lexer) -> Option<()> { +pub unsafe fn get_token(l: *mut Lexer) -> Result { + if let Some(result) = (*l).next_result { + (*l).next_result = None; + (*l).parse_point = (*l).next_point; + return result; + } + 'comments: loop { skip_whitespaces(l); let saved_point = (*l).parse_point; if skip_prefix(l, c!("//")) { + skip_until(l, c!("\n")); if (*l).historical { (*l).parse_point = saved_point; diagf!(loc(l), c!("LEXER ERROR: C++ style comments are not available in the historical mode.\n")); - (*l).token = Token::ParseError; - return None; + // TODO: Convert to recoverable error. The problem is we don't yet have a token at this point, so can't return non-fatal here. + return Err(ErrorKind::Fatal); } - skip_until(l, c!("\n")); continue 'comments; } @@ -492,15 +532,15 @@ pub unsafe fn get_token(l: *mut Lexer) -> Option<()> { let Some(x) = peek_char(l) else { (*l).token = Token::EOF; - return Some(()) + return Ok(()); }; - let puncs = if !(*l).historical { PUNCTS } else { HISTORICAL_PUNCTS }; - for i in 0..puncs.len() { - let (prefix, token) = (*puncs)[i]; + let puncts = if !(*l).historical { PUNCTS } else { HISTORICAL_PUNCTS }; + for i in 0..puncts.len() { + let (prefix, token) = (*puncts)[i]; if skip_prefix(l, prefix) { (*l).token = token; - return Some(()) + return Ok(()); } } @@ -522,89 +562,71 @@ pub unsafe fn get_token(l: *mut Lexer) -> Option<()> { let (id, token) = (*KEYWORDS)[i]; if strcmp((*l).string, id) == 0 { (*l).token = token; - return Some(()); + return Ok(()); } } - return Some(()) + return Ok(()); } - let start_of_number = (*l).parse_point; if skip_prefix(l, c!("0x")) { - if (*l).historical { - (*l).parse_point = start_of_number; - diagf!(loc(l), c!("LEXER ERROR: hex literals are not available in the historical mode.\n")); - (*l).token = Token::ParseError; - return None; - } - (*l).token = Token::IntLit; (*l).int_number = 0; - return parse_number(l, Radix::Hex, start_of_number); + return parse_number(l, Radix::Hex); } if skip_prefix(l, c!("0")) { (*l).token = Token::IntLit; (*l).int_number = 0; - return parse_number(l, Radix::Oct, start_of_number); + return parse_number(l, Radix::Oct); } if isdigit(x as c_int) != 0 { (*l).token = Token::IntLit; (*l).int_number = 0; - return parse_number(l, Radix::Dec, start_of_number); + return parse_number(l, Radix::Dec); } if x == '"' as c_char { - skip_char(l); (*l).token = Token::String; (*l).string_storage.count = 0; parse_string_into_storage(l, '"' as c_char)?; if is_eof(l) { diagf!(loc(l), c!("LEXER ERROR: Unfinished string literal\n")); diagf!((*l).loc, c!("LEXER INFO: Literal starts here\n")); - (*l).token = Token::ParseError; - return None; + return Err(ErrorKind::Fatal); } - skip_char(l); da_append(&mut (*l).string_storage, 0); (*l).string = (*l).string_storage.items; - return Some(()); + return Ok(()); } if x == '\'' as c_char { - skip_char(l); (*l).token = Token::CharLit; (*l).string_storage.count = 0; parse_string_into_storage(l, '\'' as c_char)?; if is_eof(l) { diagf!(loc(l), c!("LEXER ERROR: Unfinished character literal\n")); diagf!((*l).loc, c!("LEXER INFO: Literal starts here\n")); - (*l).token = Token::ParseError; - return None; + return Err(ErrorKind::Fatal); } - skip_char(l); if (*l).string_storage.count == 0 { diagf!((*l).loc, c!("LEXER ERROR: Empty character literal\n")); - (*l).token = Token::ParseError; - return None; + return Err(ErrorKind::Error); } if (*l).string_storage.count > 2 { // TODO: maybe we should allow more on targets with 64 bits? - // TODO: such error should not terminate the compilation diagf!((*l).loc, c!("LEXER ERROR: Character literal contains more than two characters\n")); - (*l).token = Token::ParseError; - return None; + return Err(ErrorKind::Error); } (*l).int_number = 0; for i in 0..(*l).string_storage.count { (*l).int_number *= 0x100; (*l).int_number += *(*l).string_storage.items.add(i) as u64; } - return Some(()); + return Ok(()); } diagf!((*l).loc, c!("LEXER ERROR: Unknown token %c\n"), *(*l).parse_point.current as c_int); - (*l).token = Token::ParseError; - None + Err(ErrorKind::Fatal) } diff --git a/tests.json b/tests.json index d854ee2f..21028848 100644 --- a/tests.json +++ b/tests.json @@ -1878,5 +1878,52 @@ "state": "Enabled", "comment": "" } + }, + "lexer_errors": { + "gas-x86_64-windows": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "gas-x86_64-linux": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "gas-x86_64-darwin": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "gas-aarch64-linux": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "gas-aarch64-darwin": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "uxn": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "6502": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "fasm-x86_64-windows": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + }, + "fasm-x86_64-linux": { + "expected_stdout": "", + "state": "Disabled", + "comment": "TODO: Expected to not compile." + } } -} +} \ No newline at end of file diff --git a/tests/lexer_errors.b b/tests/lexer_errors.b new file mode 100644 index 00000000..29873693 --- /dev/null +++ b/tests/lexer_errors.b @@ -0,0 +1,14 @@ +// TODO: Enable in test suite once negative testing is supported. + +main() { + // Invalid character literals + ''; + 'EEE'; + + // Literal overflow + 0xfffffffffffffffffffff; + 07777777777777777777777; + + // Unknown escape sequences + "\foo\bar\baz"; +}