diff --git a/src/yaml/parser.mojo b/src/yaml/parser.mojo index 4c762e3..35bd439 100644 --- a/src/yaml/parser.mojo +++ b/src/yaml/parser.mojo @@ -32,8 +32,9 @@ allowing it to determine when to stop collecting items for a mapping or sequence - Scalars: Convert tokens to appropriate YamlValue types """ -from collections import List +from collections import List, Dict from .lexer import Token, TokenKind, Position +from .value import YamlValue struct Parser: @@ -55,13 +56,13 @@ struct Parser: var tokens: List[Token] var pos: Int # Current position in token stream - fn __init__(out self, tokens: List[Token]): + fn __init__(out self, var tokens: List[Token]): """Initialise parser with token stream. Args: tokens: List of tokens from lexer. """ - self.tokens = tokens + self.tokens = tokens^ self.pos = 0 fn current(self) -> Token: @@ -72,7 +73,7 @@ struct Parser: """ if self.pos >= len(self.tokens): return Token(TokenKind.EOF(), "", Position(0, 0)) - return self.tokens[self.pos] + return self.tokens[self.pos].copy() fn peek(self, offset: Int = 1) -> Token: """Look ahead at token without consuming it. @@ -86,7 +87,7 @@ struct Parser: var peek_pos = self.pos + offset if peek_pos >= len(self.tokens): return Token(TokenKind.EOF(), "", Position(0, 0)) - return self.tokens[peek_pos] + return self.tokens[peek_pos].copy() fn advance(mut self) -> Token: """Consume and return current token. @@ -97,9 +98,9 @@ struct Parser: if self.pos >= len(self.tokens): return Token(TokenKind.EOF(), "", Position(0, 0)) - var token = self.tokens[self.pos] + var token = self.tokens[self.pos].copy() self.pos += 1 - return token + return token^ fn expect(mut self, expected: TokenKind) raises -> Token: """Consume token and verify it matches expected kind. @@ -119,3 +120,169 @@ struct Parser: ", column " + String(token.pos.column)) return self.advance() + + fn skip_newlines(mut self): + """Skip over NEWLINE tokens.""" + while self.current().kind == TokenKind.NEWLINE(): + _ = self.advance() + + fn parse(mut self) raises -> YamlValue: + """Parse the token stream into a YamlValue. + + Returns: + Root YamlValue (typically a mapping or sequence). + + Raises: + Error: If parsing fails. + """ + self.skip_newlines() + + if self.current().kind == TokenKind.EOF(): + # Empty document + return YamlValue.null() + + return self.parse_value() + + fn parse_value(mut self) raises -> YamlValue: + """Parse a value (scalar, mapping, or sequence). + + Dispatches to appropriate parsing method based on token type. + + Returns: + Parsed YamlValue. + """ + var token = self.current() + + # Check if this is a sequence (starts with dash) + if token.kind == TokenKind.DASH(): + return self.parse_sequence() + + # Check if this is a mapping (key followed by colon) + # Look for pattern: STRING/KEY COLON + if token.kind == TokenKind.STRING(): + var next_token = self.peek() + if next_token.kind == TokenKind.COLON(): + return self.parse_mapping() + + # Otherwise it's a scalar + return self.parse_scalar() + + fn parse_scalar(mut self) raises -> YamlValue: + """Parse a scalar value. + + Returns: + YamlValue containing the scalar. + """ + var token = self.advance() + + if token.kind == TokenKind.NULL(): + return YamlValue.null() + elif token.kind == TokenKind.BOOLEAN(): + if token.value == "true" or token.value == "yes": + return YamlValue.bool(True) + else: + return YamlValue.bool(False) + elif token.kind == TokenKind.INTEGER(): + return YamlValue.integer(atol(token.value)) + elif token.kind == TokenKind.FLOAT(): + return YamlValue.float(atof(token.value)) + elif token.kind == TokenKind.STRING(): + return YamlValue.string(token.value) + else: + raise Error("Unexpected token kind for scalar at line " + String(token.pos.line)) + + fn parse_mapping(mut self) raises -> YamlValue: + """Parse a mapping (dictionary). + + Returns: + YamlValue containing mapping. + """ + var result = Dict[String, YamlValue]() + + # Keep parsing key:value pairs until we hit DEDENT or EOF + while True: + self.skip_newlines() + + var token = self.current() + + # Stop at DEDENT or EOF + if token.kind == TokenKind.DEDENT() or token.kind == TokenKind.EOF(): + break + + # Check for dash (sequence at same level - stop here) + if token.kind == TokenKind.DASH(): + break + + # Parse key + if token.kind != TokenKind.STRING(): + raise Error("Expected key at line " + String(token.pos.line)) + + var key = token.value + _ = self.advance() + + # Expect colon + _ = self.expect(TokenKind.COLON()) + + self.skip_newlines() + + # Check if value is on next line (indented) + if self.current().kind == TokenKind.INDENT(): + _ = self.advance() + var value = self.parse_value() + result[key] = value^ + + # Consume DEDENT after indented value + if self.current().kind == TokenKind.DEDENT(): + _ = self.advance() + else: + # Value on same line + var value = self.parse_value() + result[key] = value^ + + self.skip_newlines() + + return YamlValue.mapping(result^) + + fn parse_sequence(mut self) raises -> YamlValue: + """Parse a sequence (list). + + Returns: + YamlValue containing sequence. + """ + var result = List[YamlValue]() + + # Keep parsing list items until we hit DEDENT or EOF + while True: + self.skip_newlines() + + var token = self.current() + + # Stop at DEDENT or EOF + if token.kind == TokenKind.DEDENT() or token.kind == TokenKind.EOF(): + break + + # Expect dash + if token.kind != TokenKind.DASH(): + break + + _ = self.advance() # consume dash + + self.skip_newlines() + + # Check if item is on next line (indented) + if self.current().kind == TokenKind.INDENT(): + _ = self.advance() + var item = self.parse_value() + result.append(item^) + + # Consume DEDENT after indented item + if self.current().kind == TokenKind.DEDENT(): + _ = self.advance() + else: + # Item on same line + var item = self.parse_value() + result.append(item^) + + self.skip_newlines() + + return YamlValue.sequence(result^) diff --git a/src/yaml/value.mojo b/src/yaml/value.mojo index 4809fad..dc12c79 100644 --- a/src/yaml/value.mojo +++ b/src/yaml/value.mojo @@ -123,6 +123,42 @@ struct YamlValue(Copyable, Movable): self.sequence_value = List[YamlValue]() self.mapping_value = value^ + # Static factory methods + @staticmethod + fn null() -> YamlValue: + """Create a null value.""" + return YamlValue() + + @staticmethod + fn bool(value: Bool) -> YamlValue: + """Create a boolean value.""" + return YamlValue(value) + + @staticmethod + fn integer(value: Int) -> YamlValue: + """Create an integer value.""" + return YamlValue(value) + + @staticmethod + fn float(value: Float64) -> YamlValue: + """Create a float value.""" + return YamlValue(value) + + @staticmethod + fn string(value: String) -> YamlValue: + """Create a string value.""" + return YamlValue(value) + + @staticmethod + fn sequence(var value: List[YamlValue]) -> YamlValue: + """Create a sequence value.""" + return YamlValue(value^) + + @staticmethod + fn mapping(var value: Dict[String, YamlValue]) -> YamlValue: + """Create a mapping value.""" + return YamlValue(value^) + # Type checking methods fn is_null(self) -> Bool: """Check if value is null.""" @@ -208,14 +244,18 @@ struct YamlValue(Copyable, Movable): return seq_copy^ fn as_mapping(self) raises -> Dict[String, YamlValue]: - """Get mapping (dict) value. + """Get mapping (dict) value (returns a copy). Raises: Error: If value is not a mapping. """ if not self.is_mapping(): raise Error("Value is not a mapping") - return self.mapping_value + # Return a copy + var map_copy = Dict[String, YamlValue]() + for entry in self.mapping_value.items(): + map_copy[entry.key] = entry.value.copy() + return map_copy^ fn get(self, key: String) raises -> YamlValue: """Get value by key from mapping (returns a copy). diff --git a/tests/test_parser_basic.mojo b/tests/test_parser_basic.mojo new file mode 100644 index 0000000..6d90901 --- /dev/null +++ b/tests/test_parser_basic.mojo @@ -0,0 +1,129 @@ +"""Tests for YAML parser basic functionality.""" + +from testing import assert_equal, assert_true, TestSuite +from yaml.lexer import Lexer +from yaml.parser import Parser +from yaml.value import YamlValue + + +def test_parse_empty(): + """Test parsing empty input.""" + var lexer = Lexer("") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_null()) + + +def test_parse_simple_string(): + """Test parsing a simple string value.""" + var lexer = Lexer("hello") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_string()) + assert_equal(result.as_string(), "hello") + + +def test_parse_integer(): + """Test parsing integer value.""" + var lexer = Lexer("42") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_int()) + assert_equal(result.as_int(), 42) + + +def test_parse_float(): + """Test parsing float value.""" + var lexer = Lexer("3.14") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_float()) + # Note: Float comparison with tolerance + var val = result.as_float() + assert_true(val > 3.13 and val < 3.15) + + +def test_parse_boolean_true(): + """Test parsing boolean true.""" + var lexer = Lexer("true") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_bool()) + assert_equal(result.as_bool(), True) + + +def test_parse_boolean_false(): + """Test parsing boolean false.""" + var lexer = Lexer("false") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_bool()) + assert_equal(result.as_bool(), False) + + +def test_parse_null(): + """Test parsing null value.""" + var lexer = Lexer("null") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_null()) + + +def test_parse_simple_mapping(): + """Test parsing a simple key-value mapping.""" + var lexer = Lexer("name: Alice") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_mapping()) + var mapping = result.as_mapping() + assert_true("name" in mapping) + assert_equal(mapping["name"].as_string(), "Alice") + + +def test_parse_multiple_keys(): + """Test parsing multiple key-value pairs.""" + var lexer = Lexer("name: Alice\nage: 30") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_mapping()) + var mapping = result.as_mapping() + assert_equal(len(mapping), 2) + assert_equal(mapping["name"].as_string(), "Alice") + assert_equal(mapping["age"].as_int(), 30) + + +def test_parse_simple_sequence(): + """Test parsing a simple list.""" + var lexer = Lexer("- apple\n- banana") + var tokens = lexer.tokenize() + var parser = Parser(tokens^) + var result = parser.parse() + + assert_true(result.is_sequence()) + var seq = result.as_sequence() + assert_equal(len(seq), 2) + assert_equal(seq[0].as_string(), "apple") + assert_equal(seq[1].as_string(), "banana") + + +def main(): + """Run all parser basic tests.""" + TestSuite.discover_tests[__functions_in_module()]().run()