From 7a721d5ab7cae2296c1ebd1aa2d1d413081c72dd Mon Sep 17 00:00:00 2001 From: Alexandr Starovoytov Date: Mon, 12 May 2025 14:39:48 +0300 Subject: [PATCH 1/3] Fix --- parser_edsl.py | 7 +++++-- tests/earley_test.py | 23 ++++++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/parser_edsl.py b/parser_edsl.py index efb8fc5..4ae9448 100644 --- a/parser_edsl.py +++ b/parser_edsl.py @@ -1273,7 +1273,10 @@ def parse(self, tokens): # print(next_sym) if isinstance(next_sym, NonTerminal): # print('a', next_sym) - self.predict(state, pos, tokens[pos].pos, states) + coords = Fragment(Position(), Position()) + if len(tokens) > 0: + coords = tokens[min(pos, len(tokens)-1)].pos + self.predict(state, pos, coords, states) elif pos < len(tokens): self.scan(state, tokens[pos], pos) else: @@ -1291,7 +1294,7 @@ def parse(self, tokens): expected="", unexpected="", _text=f"Неопределенная грамматика: найдено {len(final_states)} путей разбора") - if final_states: + if final_states and final_states[0].attrs: return final_states[0].attrs[0] def print_chart(self): diff --git a/tests/earley_test.py b/tests/earley_test.py index d791c4d..439e006 100644 --- a/tests/earley_test.py +++ b/tests/earley_test.py @@ -22,4 +22,25 @@ def test_undefiened_grammar(): parser = pe.Parser(expr) with pytest.raises(pe.ParseError): - result = parser.parse_earley("42+3-5") \ No newline at end of file + result = parser.parse_earley("42+3-5") + + +def test_epsilon_rule_empty_grammar(): + expr = pe.NonTerminal('expr') + expr |= () + + parser = pe.Parser(expr) + + assert parser.parse_earley("") == None + + +def test_epsilon_rule(): + expr = pe.NonTerminal('expr') + expr |= ('a', expr, lambda _: None) + expr |= () + + parser = pe.Parser(expr) + + assert parser.parse_earley("aaa") == None + + From 90bd7cd1527f2dd9ac6c715ed5f7ff538bfa7c97 Mon Sep 17 00:00:00 2001 From: Alexandr Starovoytov Date: Tue, 13 May 2025 13:41:29 +0300 Subject: [PATCH 2/3] Add test --- tests/earley_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/earley_test.py b/tests/earley_test.py index 439e006..42118e8 100644 --- a/tests/earley_test.py +++ b/tests/earley_test.py @@ -44,3 +44,15 @@ def test_epsilon_rule(): assert parser.parse_earley("aaa") == None +def test_epsilon_rule_attrs(): + NAr = pe.NonTerminal("NAr") + ARRAY = pe.Terminal("array", "array", lambda _: None, priority=10) + + NAr |= ARRAY, NAr, lambda x: x + 1 + NAr |= lambda: 0 + + p = pe.Parser(NAr) + p.add_skipped_domain("\\s") + + assert p.parse_earley("array array") == 1 + assert p.parse_earley(" ") == 0 From c53c7c06d874c38018286f33891dfa3920e0c9d5 Mon Sep 17 00:00:00 2001 From: Alexandr Starovoytov Date: Mon, 19 May 2025 13:22:10 +0300 Subject: [PATCH 3/3] Fix --- parser_edsl.py | 54 ++++++++++++++++++++++++++++++++++++++++---- tests/earley_test.py | 4 ++-- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/parser_edsl.py b/parser_edsl.py index 4ae9448..394103c 100644 --- a/parser_edsl.py +++ b/parser_edsl.py @@ -1155,15 +1155,55 @@ def predict(self, state, pos, coords, states): next_sym = state.next_symbol() if isinstance(next_sym, NonTerminal): for prod, fold, _ in next_sym.enum_rules(): - new_state = EarleyState((next_sym, tuple(prod), fold), + if not prod: + attrs = state.attrs + if not attrs: + attrs = [] + res_coord = Fragment(coords[0].start, coords[-1].following) + attrs = attrs+[fold.callee([], coords, res_coord)] + new_state = EarleyState(state.rule, + state.dot + 1, + state.start, + pos, + attrs, + coords) + + if new_state.is_complete(): + _, _, fold1 = new_state.rule + coords = new_state.coords + res_coord = Fragment(coords[0].start, coords[-1].following) + res_attr = fold1.callee(attrs, coords, res_coord) + new_state = dataclasses.replace(new_state, attrs=[res_attr]) + + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + self.chart[pos].add(new_state) + + new_state = EarleyState((next_sym, tuple(prod), fold), 0, pos, pos, attrs=[], coords=coords) - if new_state not in self.chart[pos] and new_state not in states: - states.append(new_state) - self.predict(new_state, pos, coords, states) + if new_state.is_complete(): + _, _, fold = new_state.rule + coords = new_state.coords + res_coord = Fragment(coords[0].start, coords[-1].following) + res_attr = fold.callee([], coords, res_coord) + new_state = dataclasses.replace(new_state, attrs=[res_attr]) + + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + else: + new_state = EarleyState((next_sym, tuple(prod), fold), + 0, + pos, + pos, + attrs=[], + coords=coords) + if new_state not in self.chart[pos] and new_state not in states: + states.append(new_state) + self.predict(new_state, pos, coords, states) def scan(self, state, token, pos): next_sym = state.next_symbol() @@ -1195,6 +1235,9 @@ def scan(self, state, token, pos): self.chart[pos + 1].add(new_state) def complete(self, state: EarleyState, pos, states: list[EarleyState]): + _, rhs, _ = state.rule + if not rhs: + return for prev_state in self.chart[state.start]: next_sym = prev_state.next_symbol() if next_sym == state.rule[0]: @@ -1289,12 +1332,13 @@ def parse(self, tokens): if (state.rule[0] == self.grammar.start and state.is_complete() and state.start == 0)] + if len(final_states) > 1: raise ParseError(pos=Position(), expected="", unexpected="", _text=f"Неопределенная грамматика: найдено {len(final_states)} путей разбора") - if final_states and final_states[0].attrs: + if final_states: return final_states[0].attrs[0] def print_chart(self): diff --git a/tests/earley_test.py b/tests/earley_test.py index 42118e8..83c80a9 100644 --- a/tests/earley_test.py +++ b/tests/earley_test.py @@ -37,7 +37,7 @@ def test_epsilon_rule_empty_grammar(): def test_epsilon_rule(): expr = pe.NonTerminal('expr') expr |= ('a', expr, lambda _: None) - expr |= () + expr |= (lambda: None) parser = pe.Parser(expr) @@ -54,5 +54,5 @@ def test_epsilon_rule_attrs(): p = pe.Parser(NAr) p.add_skipped_domain("\\s") - assert p.parse_earley("array array") == 1 + assert p.parse_earley("array array") == 2 assert p.parse_earley(" ") == 0