diff --git a/parser_edsl.py b/parser_edsl.py index efb8fc5..394103c 100644 --- a/parser_edsl.py +++ b/parser_edsl.py @@ -1155,15 +1155,55 @@ def predict(self, state, pos, coords, states): next_sym = state.next_symbol() if isinstance(next_sym, NonTerminal): for prod, fold, _ in next_sym.enum_rules(): - new_state = EarleyState((next_sym, tuple(prod), fold), + if not prod: + attrs = state.attrs + if not attrs: + attrs = [] + res_coord = Fragment(coords[0].start, coords[-1].following) + attrs = attrs+[fold.callee([], coords, res_coord)] + new_state = EarleyState(state.rule, + state.dot + 1, + state.start, + pos, + attrs, + coords) + + if new_state.is_complete(): + _, _, fold1 = new_state.rule + coords = new_state.coords + res_coord = Fragment(coords[0].start, coords[-1].following) + res_attr = fold1.callee(attrs, coords, res_coord) + new_state = dataclasses.replace(new_state, attrs=[res_attr]) + + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + self.chart[pos].add(new_state) + + new_state = EarleyState((next_sym, tuple(prod), fold), 0, pos, pos, attrs=[], coords=coords) - if new_state not in self.chart[pos] and new_state not in states: - states.append(new_state) - self.predict(new_state, pos, coords, states) + if new_state.is_complete(): + _, _, fold = new_state.rule + coords = new_state.coords + res_coord = Fragment(coords[0].start, coords[-1].following) + res_attr = fold.callee([], coords, res_coord) + new_state = dataclasses.replace(new_state, attrs=[res_attr]) + + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + else: + new_state = EarleyState((next_sym, tuple(prod), fold), + 0, + pos, + pos, + attrs=[], + coords=coords) + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + self.predict(new_state, pos, coords, states) def scan(self, state, token, pos): next_sym = state.next_symbol() @@ -1195,6 +1235,9 @@ def scan(self, state, token, pos): self.chart[pos + 1].add(new_state) def complete(self, state: EarleyState, pos, states: list[EarleyState]): + _, rhs, _ = state.rule + if not rhs: + return for prev_state in self.chart[state.start]: next_sym = prev_state.next_symbol() if next_sym == state.rule[0]: @@ -1273,7 +1316,10 @@ def parse(self, tokens): # print(next_sym) if isinstance(next_sym, NonTerminal): # print('a', next_sym) - self.predict(state, pos, tokens[pos].pos, states) + coords = Fragment(Position(), Position()) + if len(tokens) > 0: + coords = tokens[min(pos, len(tokens)-1)].pos + self.predict(state, pos, coords, states) elif pos < len(tokens): self.scan(state, tokens[pos], pos) else: @@ -1286,6 +1332,7 @@ def parse(self, tokens): if (state.rule[0] == self.grammar.start and state.is_complete() and state.start == 0)] + if len(final_states) > 1: raise ParseError(pos=Position(), expected="", diff --git a/tests/earley_test.py b/tests/earley_test.py index d791c4d..83c80a9 100644 --- a/tests/earley_test.py +++ b/tests/earley_test.py @@ -22,4 +22,37 @@ def test_undefiened_grammar(): parser = pe.Parser(expr) with pytest.raises(pe.ParseError): - result = parser.parse_earley("42+3-5") \ No newline at end of file + result = parser.parse_earley("42+3-5") + + +def test_epsilon_rule_empty_grammar(): + expr = pe.NonTerminal('expr') + expr |= () + + parser = pe.Parser(expr) + + assert parser.parse_earley("") == None + + +def test_epsilon_rule(): + expr = pe.NonTerminal('expr') + expr |= ('a', expr, lambda _: None) + expr |= (lambda: None) + + parser = pe.Parser(expr) + + assert parser.parse_earley("aaa") == None + + +def test_epsilon_rule_attrs(): + NAr = pe.NonTerminal("NAr") + ARRAY = pe.Terminal("array", "array", lambda _: None, priority=10) + + NAr |= ARRAY, NAr, lambda x: x + 1 + NAr |= lambda: 0 + + p = pe.Parser(NAr) + p.add_skipped_domain("\\s") + + assert p.parse_earley("array array") == 2 + assert p.parse_earley(" ") == 0