From f967892413e538ea109da2666e8daee3b39363de Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 19 Oct 2024 15:39:27 +0300 Subject: [PATCH 01/30] add cfg annotations --- pyformlang/cfg/cfg.py | 184 ++++++++++++++------------- pyformlang/cfg/cfg_object.py | 2 +- pyformlang/cfg/cyk_table.py | 2 +- pyformlang/cfg/epsilon.py | 2 +- pyformlang/cfg/pda_object_creator.py | 43 ++++--- pyformlang/cfg/production.py | 20 ++- pyformlang/cfg/set_queue.py | 10 +- pyformlang/cfg/terminal.py | 12 +- pyformlang/cfg/utils.py | 6 +- pyformlang/cfg/utils_cfg.py | 9 +- pyformlang/cfg/variable.py | 14 +- 11 files changed, 169 insertions(+), 135 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 50af3fc..5b54470 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -1,19 +1,19 @@ """ A context free grammar """ -import string + +from string import ascii_uppercase from copy import deepcopy -from typing import AbstractSet, Iterable, Tuple, Dict, Any, Union +from typing import Dict, List, Iterable, Set, \ + AbstractSet, Tuple, Optional, Any, Union import networkx as nx -# pylint: disable=cyclic-import -from pyformlang import pda -from pyformlang.finite_automaton import DeterministicFiniteAutomaton -# pylint: disable=cyclic-import -from pyformlang.pda import cfg_variable_converter as cvc +from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State as FAState +from pyformlang.pda import State as PDAState, Epsilon as PDAEpsilon, PDA +from pyformlang.pda.cfg_variable_converter import CFGVariableConverter +from pyformlang.regular_expression import Regex from .cfg_object import CFGObject -# pylint: disable=cyclic-import -from .cyk_table import CYKTable, DerivationDoesNotExist -from .epsilon import Epsilon +from .cyk_table import CYKTable, CYKNode, DerivationDoesNotExist +from .epsilon import Epsilon as CFGEpsilon from .pda_object_creator import PDAObjectCreator from .production import Production from .terminal import Terminal @@ -30,7 +30,7 @@ class NotParsableException(Exception): """When the grammar cannot be parsed (parser not powerful enough)""" -def is_special_text(text): +def is_special_text(text: str) -> bool: """ Check if the input is given an explicit type """ return len(text) > 5 and \ (text[0:5] == '"VAR:' or text[0:5] == '"TER:') and \ @@ -55,43 +55,41 @@ class CFG: # pylint: disable=too-many-instance-attributes def __init__(self, - variables: AbstractSet[Union[Variable, str]] = None, - terminals: AbstractSet[Union[Terminal, str]] = None, - start_symbol: Union[Variable, str] = None, - productions: Iterable[Production] = None): + variables: AbstractSet[Any] = None, + terminals: AbstractSet[Any] = None, + start_symbol: Any = None, + productions: Iterable[Production] = None) -> None: if variables is not None: variables = {to_variable(x) for x in variables} - self._variables = variables or set() - self._variables = set(self._variables) + self._variables: Set[Variable] = variables or set() if terminals is not None: terminals = {to_terminal(x) for x in terminals} - self._terminals = terminals or set() - self._terminals = set(self._terminals) + self._terminals: Set[Terminal] = terminals or set() if start_symbol is not None: start_symbol = to_variable(start_symbol) - self._start_symbol = start_symbol - if start_symbol is not None: self._variables.add(start_symbol) - self._productions = productions or set() - self._productions = self._productions + self._start_symbol: Optional[Variable] = start_symbol + if productions is not None: + productions = set(productions) + self._productions: Set[Production] = productions or set() for production in self._productions: self.__initialize_production_in_cfg(production) - self._normal_form = None - self._generating_symbols = None - self._nullable_symbols = None - self._impacts = None - self._remaining_lists = None - self._added_impacts = None - - def __initialize_production_in_cfg(self, production): + self._normal_form: Optional[CFG] = None + self._generating_symbols: Set[CFGObject] = set() + self._nullable_symbols: Set[CFGObject] = set() + self._impacts: Dict[CFGObject, List[Tuple[CFGObject, int]]] = {} + self._remaining_lists: Dict[CFGObject, List[int]] = {} + self._added_impacts: Set[CFGObject] = set() + + def __initialize_production_in_cfg(self, production: Production) -> None: self._variables.add(production.head) for cfg_object in production.body: if isinstance(cfg_object, Terminal): self._terminals.add(cfg_object) - else: + elif isinstance(cfg_object, Variable): self._variables.add(cfg_object) - def get_generating_symbols(self) -> AbstractSet[CFGObject]: + def get_generating_symbols(self) -> Set[CFGObject]: """ Gives the objects which are generating in the CFG Returns @@ -99,14 +97,15 @@ def get_generating_symbols(self) -> AbstractSet[CFGObject]: generating_symbols : set of :class:`~pyformlang.cfg.CFGObject` The generating symbols of the CFG """ - if self._generating_symbols is None: + if not self._generating_symbols: self._generating_symbols = self._get_generating_or_nullable(False) return self._generating_symbols - def _get_generating_or_nullable(self, nullable=False): + def _get_generating_or_nullable(self, nullable: bool = False) \ + -> Set[CFGObject]: """ Merge of nullable and generating """ - to_process = [Epsilon()] - g_symbols = {Epsilon()} + to_process: List[CFGObject] = [CFGEpsilon()] + g_symbols: Set[CFGObject] = {CFGEpsilon()} self._set_impacts_and_remaining_lists() @@ -135,11 +134,11 @@ def _get_generating_or_nullable(self, nullable=False): # Fix modifications for symbol_impact, index_impact in processed_with_modification: self._remaining_lists[symbol_impact][index_impact] += 1 - g_symbols.remove(Epsilon()) + g_symbols.remove(CFGEpsilon()) return g_symbols - def _set_impacts_and_remaining_lists(self): - if self._impacts is not None: + def _set_impacts_and_remaining_lists(self) -> None: + if self._impacts: return self._added_impacts = set() self._remaining_lists = {} @@ -157,7 +156,7 @@ def _set_impacts_and_remaining_lists(self): self._impacts.setdefault(symbol, []).append( (head, index_impact)) - def generate_epsilon(self): + def generate_epsilon(self) -> bool: """ Whether the grammar generates epsilon or not Returns @@ -165,8 +164,8 @@ def generate_epsilon(self): generate_epsilon : bool Whether epsilon is generated or not by the CFG """ - generate_epsilon = {Epsilon()} - to_process = [Epsilon()] + generate_epsilon: Set[CFGObject] = {CFGEpsilon()} + to_process: List[CFGObject] = [CFGEpsilon()] self._set_impacts_and_remaining_lists() @@ -193,7 +192,7 @@ def generate_epsilon(self): to_process.append(symbol_impact) return False - def get_reachable_symbols(self) -> AbstractSet[CFGObject]: + def get_reachable_symbols(self) -> Set[CFGObject]: """ Gives the objects which are reachable in the CFG Returns @@ -207,7 +206,7 @@ def get_reachable_symbols(self) -> AbstractSet[CFGObject]: for production in self._productions: temp = reachable_transition_d.setdefault(production.head, []) for symbol in production.body: - if not isinstance(symbol, Epsilon): + if not isinstance(symbol, CFGEpsilon): temp.append(symbol) to_process = [self._start_symbol] while to_process: @@ -240,7 +239,7 @@ def remove_useless_symbols(self) -> "CFG": new_ter = new_ter.intersection(reachables) return CFG(new_var, new_ter, self._start_symbol, productions) - def get_nullable_symbols(self) -> AbstractSet[CFGObject]: + def get_nullable_symbols(self) -> Set[CFGObject]: """ Gives the objects which are nullable in the CFG Returns @@ -248,7 +247,7 @@ def get_nullable_symbols(self) -> AbstractSet[CFGObject]: nullable_symbols : set of :class:`~pyformlang.cfg.CFGObject` The nullable symbols of the CFG """ - if self._nullable_symbols is None: + if not self._nullable_symbols: self._nullable_symbols = self._get_generating_or_nullable(True) return self._nullable_symbols @@ -270,7 +269,7 @@ def remove_epsilon(self) -> "CFG": self._start_symbol, new_productions) - def get_unit_pairs(self) -> AbstractSet[Tuple[Variable, Variable]]: + def get_unit_pairs(self) -> Set[Tuple[Variable, Variable]]: """ Finds all the unit pairs Returns @@ -318,7 +317,7 @@ def eliminate_unit_productions(self) -> "CFG": self._start_symbol, productions) - def _get_productions_with_only_single_terminals(self): + def _get_productions_with_only_single_terminals(self) -> List[Production]: """ Remove the terminals involved in a body of length more than 1 """ term_to_var = {} new_productions = [] @@ -345,7 +344,8 @@ def _get_productions_with_only_single_terminals(self): Production(term_to_var[terminal], [terminal])) return new_productions - def _get_next_free_variable(self, idx, prefix): + def _get_next_free_variable(self, idx: int, prefix: str) \ + -> Tuple[int, Variable]: idx += 1 temp = Variable(prefix + str(idx)) while temp in self._variables: @@ -353,7 +353,8 @@ def _get_next_free_variable(self, idx, prefix): temp = Variable(prefix + str(idx)) return idx, temp - def _decompose_productions(self, productions): + def _decompose_productions(self, productions: Iterable[Production]) \ + -> List[Production]: """ Decompose productions """ idx = 0 new_productions = [] @@ -431,7 +432,7 @@ def to_normal_form(self) -> "CFG": return cfg @property - def variables(self) -> AbstractSet[Variable]: + def variables(self) -> Set[Variable]: """ Gives the variables Returns @@ -442,7 +443,7 @@ def variables(self) -> AbstractSet[Variable]: return self._variables @property - def terminals(self) -> AbstractSet[Terminal]: + def terminals(self) -> Set[Terminal]: """ Gives the terminals Returns @@ -453,7 +454,7 @@ def terminals(self) -> AbstractSet[Terminal]: return self._terminals @property - def productions(self) -> AbstractSet[Production]: + def productions(self) -> Set[Production]: """ Gives the productions Returns @@ -464,7 +465,7 @@ def productions(self) -> AbstractSet[Production]: return self._productions @property - def start_symbol(self) -> Variable: + def start_symbol(self) -> Optional[Variable]: """ Gives the start symbol Returns @@ -561,7 +562,7 @@ def union(self, other: "CFG") -> "CFG": return cfg_temp.substitute({temp_0: self, temp_1: other}) - def __or__(self, other): + def __or__(self, other: "CFG") -> "CFG": """ Makes the union of two CFGs Parameters @@ -603,7 +604,7 @@ def concatenate(self, other: "CFG") -> "CFG": return cfg_temp.substitute({temp_0: self, temp_1: other}) - def __add__(self, other): + def __add__(self, other: "CFG") -> "CFG": """ Makes the concatenation of two CFGs Parameters @@ -678,7 +679,7 @@ def reverse(self) -> "CFG": self.start_symbol, productions) - def __invert__(self): + def __invert__(self) -> "CFG": """ Reverse the current CFG Returns @@ -702,13 +703,13 @@ def is_empty(self) -> bool: """ return self._start_symbol not in self.get_generating_symbols() - def __bool__(self): + def __bool__(self) -> bool: return not self.is_empty() - def __contains__(self, word: Iterable[Union[Terminal, str]]) -> bool: + def __contains__(self, word: Iterable[Any]) -> bool: return self.contains(word) - def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: + def contains(self, word: Iterable[Any]) -> bool: """ Gives the membership of a word to the grammar Parameters @@ -722,13 +723,13 @@ def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: Whether word if in the CFG or not """ # Remove epsilons - word = [to_terminal(x) for x in word if x != Epsilon()] + word = [to_terminal(x) for x in word if x != CFGEpsilon()] if not word: return self.generate_epsilon() cyk_table = CYKTable(self, word) return cyk_table.generate_word() - def get_cnf_parse_tree(self, word): + def get_cnf_parse_tree(self, word: Iterable[Any]) -> CYKNode: """ Get a parse tree of the CNF of this grammar @@ -743,13 +744,13 @@ def get_cnf_parse_tree(self, word): The parse tree """ - word = [to_terminal(x) for x in word if x != Epsilon()] + word = [to_terminal(x) for x in word if x != CFGEpsilon()] if not word and not self.generate_epsilon(): raise DerivationDoesNotExist cyk_table = CYKTable(self, word) return cyk_table.get_parse_tree() - def to_pda(self) -> "pda.PDA": + def to_pda(self) -> "PDA": """ Converts the CFG to a PDA that generates on empty stack an \ equivalent language @@ -758,21 +759,23 @@ def to_pda(self) -> "pda.PDA": new_pda : :class:`~pyformlang.pda.PDA` The equivalent PDA when accepting on empty stack """ - state = pda.State("q") + state = PDAState("q") pda_object_creator = PDAObjectCreator(self._terminals, self._variables) input_symbols = {pda_object_creator.get_symbol_from(x) for x in self._terminals} stack_alphabet = {pda_object_creator.get_stack_symbol_from(x) for x in self._terminals.union(self._variables)} - start_stack_symbol = pda_object_creator.get_stack_symbol_from( - self._start_symbol) - new_pda = pda.PDA(states={state}, + start_stack_symbol = None + if self._start_symbol: + start_stack_symbol = pda_object_creator.get_stack_symbol_from( + self._start_symbol) + new_pda = PDA(states={state}, input_symbols=input_symbols, stack_alphabet=stack_alphabet, start_state=state, start_stack_symbol=start_stack_symbol) for production in self._productions: - new_pda.add_transition(state, pda.Epsilon(), + new_pda.add_transition(state, PDAEpsilon(), pda_object_creator.get_stack_symbol_from( production.head), state, @@ -815,7 +818,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": cfg = self.to_normal_form() states = list(other.states) cv_converter = \ - cvc.CFGVariableConverter(states, cfg.variables) + CFGVariableConverter(states, cfg.variables) new_productions = [] for production in cfg.productions: if len(production.body) == 2: @@ -839,12 +842,13 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": @staticmethod def _intersection_starting_rules(cfg: "CFG", other: DeterministicFiniteAutomaton, - cv_converter): + cv_converter: CFGVariableConverter) \ + -> List[Production]: start = Variable("Start") productions_temp = [] start_other = other.start_state for final_state in other.final_states: - new_body = [ + new_body: List[CFGObject] = [ cv_converter.to_cfg_combined_variable( start_other, cfg.start_symbol, @@ -854,14 +858,12 @@ def _intersection_starting_rules(cfg: "CFG", return productions_temp @staticmethod - def _intersection_when_terminal(other: DeterministicFiniteAutomaton, - production, + def _intersection_when_terminal(other_fst, production, cv_converter, states): productions_temp = [] for state_p in states: - next_state = other.get_next_state( - state_p, production.body[0].value) - if next_state: + next_states = other_fst(state_p, production.body[0].value) + if next_states: new_head = \ cv_converter.to_cfg_combined_variable( state_p, production.head, next_state) @@ -872,8 +874,10 @@ def _intersection_when_terminal(other: DeterministicFiniteAutomaton, return productions_temp @staticmethod - def _intersection_when_two_non_terminals(production, states, - cv_converter): + def _intersection_when_two_non_terminals(production: Production, + states: Iterable[FAState], + cv_converter: CFGVariableConverter) \ + -> List[Production]: productions_temp = [] for state_p in states: for state_r in states: @@ -890,7 +894,11 @@ def _intersection_when_two_non_terminals(production, states, return productions_temp @staticmethod - def _get_all_bodies(production, state_p, state_r, states, cv_converter): + def _get_all_bodies(production: Production, + state_p, + state_r, + states, + cv_converter: CFGVariableConverter): return [ [cv_converter.to_cfg_combined_variable(state_p, production.body[0], @@ -900,7 +908,7 @@ def _get_all_bodies(production, state_p, state_r, states, cv_converter): state_r)] for state_q in states] - def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": + def __and__(self, other): """ Gives the intersection of the current CFG with an other object Parameters @@ -918,7 +926,7 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ return self.intersection(other) - def get_words(self, max_length: int = -1): + def get_words(self, max_length: int = -1) -> Iterable[List[CFGObject]]: """ Get the words generated by the CFG Parameters @@ -1005,7 +1013,7 @@ def is_finite(self) -> bool: return True return False - def to_text(self): + def to_text(self) -> str: """ Turns the grammar into its string representation. This might lose some\ type information and the start_symbol. @@ -1021,7 +1029,9 @@ def to_text(self): return "\n".join(res) + "\n" @classmethod - def from_text(cls, text, start_symbol=Variable("S")): + def from_text(cls, + text: str, + start_symbol: Optional[Variable] = Variable("S")) -> "CFG": """ Read a context free grammar from a text. The text contains one rule per line. @@ -1077,7 +1087,7 @@ def _read_line(cls, line, productions, terminals, variables): body_component = body_component[5:-1] else: type_component = "" - if body_component[0] in string.ascii_uppercase or \ + if body_component[0] in ascii_uppercase or \ type_component == "VAR": body_var = Variable(body_component) variables.add(body_var) @@ -1089,7 +1099,7 @@ def _read_line(cls, line, productions, terminals, variables): body.append(body_ter) productions.add(Production(head, body)) - def is_normal_form(self): + def is_normal_form(self) -> bool: """ Tells is the current grammar is in Chomsky Normal Form or not diff --git a/pyformlang/cfg/cfg_object.py b/pyformlang/cfg/cfg_object.py index ed454b8..3d0d17c 100644 --- a/pyformlang/cfg/cfg_object.py +++ b/pyformlang/cfg/cfg_object.py @@ -14,7 +14,7 @@ class CFGObject: # pylint: disable=too-few-public-methods __slots__ = ["_value", "_hash"] - def __init__(self, value: Any): + def __init__(self, value: Any) -> None: self._value = value self._hash = None diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 1fe68ad..db03717 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -90,7 +90,7 @@ def _generates_all_terminals(self): generate_all_terminals = False return generate_all_terminals - def get_parse_tree(self): + def get_parse_tree(self) -> "CYKNode": """ Give the parse tree associated with this CYK Table diff --git a/pyformlang/cfg/epsilon.py b/pyformlang/cfg/epsilon.py index cfab093..1f6047b 100644 --- a/pyformlang/cfg/epsilon.py +++ b/pyformlang/cfg/epsilon.py @@ -7,7 +7,7 @@ class Epsilon(Terminal): """ An epsilon terminal """ # pylint: disable=too-few-public-methods - def __init__(self): + def __init__(self) -> None: super().__init__("epsilon") def to_text(self) -> str: diff --git a/pyformlang/cfg/pda_object_creator.py b/pyformlang/cfg/pda_object_creator.py index 5553290..bd5fe66 100644 --- a/pyformlang/cfg/pda_object_creator.py +++ b/pyformlang/cfg/pda_object_creator.py @@ -1,41 +1,48 @@ """Creation of objects for PDA""" -from pyformlang import cfg -from pyformlang import pda +from typing import Dict, Iterable, Optional + +from pyformlang.cfg import Epsilon as CFGEpsilon, Terminal, Variable +from pyformlang.cfg.cfg_object import CFGObject +from pyformlang.pda import Epsilon as PDAEpsilon, Symbol, StackSymbol class PDAObjectCreator: """Creates Objects for a PDA""" - def __init__(self, terminals, variables): - self._inverse_symbol = {} - self._inverse_stack_symbol = {} + def __init__(self, + terminals: Iterable[Terminal], + variables: Iterable[Variable]) -> None: + self._inverse_symbol: Dict[CFGObject, Optional[Symbol]] = {} + self._inverse_stack_symbol: Dict[CFGObject, Optional[StackSymbol]] = {} for terminal in terminals: self._inverse_symbol[terminal] = None self._inverse_stack_symbol[terminal] = None for variable in variables: self._inverse_stack_symbol[variable] = None - def get_symbol_from(self, symbol): + def get_symbol_from(self, symbol: CFGObject) -> Symbol: """Get a symbol""" - if isinstance(symbol, cfg.Epsilon): - return pda.Epsilon() - if self._inverse_symbol[symbol] is None: + if isinstance(symbol, CFGEpsilon): + return PDAEpsilon() + inverse_symbol = self._inverse_symbol[symbol] + if inverse_symbol is None: value = str(symbol.value) - temp = pda.Symbol(value) + temp = Symbol(value) self._inverse_symbol[symbol] = temp return temp - return self._inverse_symbol[symbol] + return inverse_symbol - def get_stack_symbol_from(self, stack_symbol): + def get_stack_symbol_from(self, stack_symbol: CFGObject) -> StackSymbol: """Get a stack symbol""" - if isinstance(stack_symbol, cfg.Epsilon): - return pda.Epsilon() - if self._inverse_stack_symbol[stack_symbol] is None: + if isinstance(stack_symbol, CFGEpsilon): + return PDAEpsilon() + inverse_stack_symbol = self._inverse_stack_symbol[stack_symbol] + if inverse_stack_symbol is None: value = str(stack_symbol.value) - if isinstance(stack_symbol, cfg.Terminal): + if isinstance(stack_symbol, Terminal): value = "#TERM#" + value - temp = pda.StackSymbol(value) + temp = StackSymbol(value) self._inverse_stack_symbol[stack_symbol] = temp return temp - return self._inverse_stack_symbol[stack_symbol] + return inverse_stack_symbol diff --git a/pyformlang/cfg/production.py b/pyformlang/cfg/production.py index 77270d6..40c2968 100644 --- a/pyformlang/cfg/production.py +++ b/pyformlang/cfg/production.py @@ -1,5 +1,6 @@ """ A production or rule of a CFG """ -from typing import List + +from typing import List, Any from .terminal import Terminal from .variable import Variable @@ -20,7 +21,10 @@ class Production: __slots__ = ["_body", "_head", "_hash"] - def __init__(self, head: Variable, body: List[CFGObject], filtering=True): + def __init__(self, + head: Variable, + body: List[CFGObject], + filtering: bool = True) -> None: if filtering: self._body = [x for x in body if not isinstance(x, Epsilon)] else: @@ -38,18 +42,20 @@ def body(self) -> List[CFGObject]: """Get the body objects""" return self._body - def __repr__(self): + def __repr__(self) -> str: return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) - def __hash__(self): + def __hash__(self) -> int: if self._hash is None: self._hash = sum(map(hash, self._body)) + hash(self._head) return self._hash - def __eq__(self, other): - return self.head == other.head and self.body == other.body + def __eq__(self, other: Any) -> bool: + if isinstance(other, Production): + return self.head == other.head and self.body == other.body + return False - def is_normal_form(self): + def is_normal_form(self) -> bool: """ Tells is the production is in Chomsky Normal Form diff --git a/pyformlang/cfg/set_queue.py b/pyformlang/cfg/set_queue.py index 8f01de2..b46f37b 100644 --- a/pyformlang/cfg/set_queue.py +++ b/pyformlang/cfg/set_queue.py @@ -1,24 +1,26 @@ """ A queue with non duplicate elements""" +from typing import Any + class SetQueue: """ A queue with non duplicate elements""" - def __init__(self): + def __init__(self) -> None: self._to_process = [] self._processing = set() - def append(self, value): + def append(self, value: Any) -> None: """ Append an element """ if value not in self._processing: self._to_process.append(value) self._processing.add(value) - def pop(self): + def pop(self) -> Any: """ Pop an element """ popped = self._to_process.pop() self._processing.remove(popped) return popped - def __bool__(self): + def __bool__(self) -> bool: return bool(self._to_process) diff --git a/pyformlang/cfg/terminal.py b/pyformlang/cfg/terminal.py index 230f646..135b230 100644 --- a/pyformlang/cfg/terminal.py +++ b/pyformlang/cfg/terminal.py @@ -1,18 +1,22 @@ """ A terminal in a CFG """ +from typing import Any + from .cfg_object import CFGObject class Terminal(CFGObject): # pylint: disable=too-few-public-methods """ A terminal in a CFG """ - def __eq__(self, other): - return isinstance(other, Terminal) and self.value == other.value + def __eq__(self, other: Any) -> bool: + if isinstance(other, CFGObject): + return self.value == other.value + return self.value == other - def __repr__(self): + def __repr__(self) -> str: return "Terminal(" + str(self.value) + ")" - def __hash__(self): + def __hash__(self) -> int: if self._hash is None: self._hash = hash(self.value) return self._hash diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index 86da22b..0fd3196 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -1,17 +1,19 @@ """ Useful functions """ +from typing import Any + from .variable import Variable from .terminal import Terminal -def to_variable(given): +def to_variable(given: Any) -> Variable: """ Transformation into a variable """ if isinstance(given, Variable): return given return Variable(given) -def to_terminal(given): +def to_terminal(given: Any) -> Terminal: """ Transformation into a terminal """ if isinstance(given, Terminal): return given diff --git a/pyformlang/cfg/utils_cfg.py b/pyformlang/cfg/utils_cfg.py index 4c3ff19..567df64 100644 --- a/pyformlang/cfg/utils_cfg.py +++ b/pyformlang/cfg/utils_cfg.py @@ -1,11 +1,11 @@ """ Internal Usage only""" - -from typing import List, AbstractSet +from typing import Dict, List, Iterable, AbstractSet from .production import Production from .epsilon import Epsilon from .cfg_object import CFGObject +from .variable import Variable def remove_nullable_production_sub(body: List[CFGObject], @@ -36,9 +36,10 @@ def remove_nullable_production(production: Production, return res -def get_productions_d(productions): +def get_productions_d(productions: Iterable[Production]) \ + -> Dict[Variable, List[Production]]: """ Get productions as a dictionary """ - productions_d = {} + productions_d: Dict[Variable, List[Production]] = {} for production in productions: production_head = productions_d.setdefault(production.head, []) production_head.append(production) diff --git a/pyformlang/cfg/variable.py b/pyformlang/cfg/variable.py index 808ebbd..71c97a6 100644 --- a/pyformlang/cfg/variable.py +++ b/pyformlang/cfg/variable.py @@ -1,6 +1,8 @@ """ A variable in a CFG """ import string +from typing import Any + from .cfg_object import CFGObject @@ -13,28 +15,28 @@ class Variable(CFGObject): # pylint: disable=too-few-public-methods The value of the variable """ - def __init__(self, value): + def __init__(self, value: Any) -> None: super().__init__(value) self._hash = None self.index_cfg_converter = None - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, CFGObject): return self._value == other.value return self._value == other - def __str__(self): + def __str__(self) -> str: return str(self.value) - def __repr__(self): + def __repr__(self) -> str: return "Variable(" + str(self.value) + ")" - def __hash__(self): + def __hash__(self) -> int: if self._hash is None: self._hash = self._compute_new_hash() return self._hash - def _compute_new_hash(self): + def _compute_new_hash(self) -> int: return hash(self._value) def to_text(self) -> str: From 4f73b85396ee822fdcb05156050430808e9b5a94 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 1 Dec 2024 16:40:34 +0300 Subject: [PATCH 02/30] add cfg annotations --- pyformlang/cfg/cfg.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 5b54470..1f498b6 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -3,14 +3,15 @@ from string import ascii_uppercase from copy import deepcopy from typing import Dict, List, Iterable, Set, \ - AbstractSet, Tuple, Optional, Any, Union + AbstractSet, Tuple, Optional, Any -import networkx as nx +from networkx import DiGraph, find_cycle +from networkx.exception import NetworkXNoCycle from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State as FAState -from pyformlang.pda import State as PDAState, Epsilon as PDAEpsilon, PDA +from pyformlang.pda import PDA, State as PDAState, Epsilon as PDAEpsilon from pyformlang.pda.cfg_variable_converter import CFGVariableConverter -from pyformlang.regular_expression import Regex + from .cfg_object import CFGObject from .cyk_table import CYKTable, CYKNode, DerivationDoesNotExist from .epsilon import Epsilon as CFGEpsilon @@ -1001,15 +1002,15 @@ def is_finite(self) -> bool: Whether the grammar is finite or not """ normal = self.to_normal_form() - di_graph = nx.DiGraph() + di_graph = DiGraph() for production in normal.productions: body = production.body if len(body) == 2: di_graph.add_edge(production.head, body[0]) di_graph.add_edge(production.head, body[1]) try: - nx.find_cycle(di_graph, orientation="original") - except nx.exception.NetworkXNoCycle: + find_cycle(di_graph, orientation="original") + except NetworkXNoCycle: return True return False From 3f1c19cb2c13e746b1b76e874f6ce9d8d0cf7ba9 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 19 Oct 2024 18:41:44 +0300 Subject: [PATCH 03/30] add final cfg annotations --- pyformlang/cfg/cfg.py | 48 ++++++++------ pyformlang/cfg/cyk_table.py | 40 +++++++----- pyformlang/cfg/llone_parser.py | 54 ++++++++++------ pyformlang/cfg/parse_tree.py | 20 +++--- pyformlang/cfg/pda_object_creator.py | 3 +- pyformlang/cfg/recursive_decent_parser.py | 26 +++++--- pyformlang/cfg/variable.py | 4 +- pyformlang/finite_automaton/state.py | 4 +- pyformlang/pda/cfg_variable_converter.py | 78 +++++++++++++---------- pyformlang/pda/stack_symbol.py | 4 +- 10 files changed, 169 insertions(+), 112 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 1f498b6..6f45163 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -7,6 +7,8 @@ from networkx import DiGraph, find_cycle from networkx.exception import NetworkXNoCycle +from networkx import DiGraph, find_cycle +from networkx.exception import NetworkXNoCycle from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State as FAState from pyformlang.pda import PDA, State as PDAState, Epsilon as PDAEpsilon @@ -817,7 +819,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": return CFG() generate_empty = self.contains([]) and other.accepts([]) cfg = self.to_normal_form() - states = list(other.states) + states = set(other.states) cv_converter = \ CFGVariableConverter(states, cfg.variables) new_productions = [] @@ -852,7 +854,7 @@ def _intersection_starting_rules(cfg: "CFG", new_body: List[CFGObject] = [ cv_converter.to_cfg_combined_variable( start_other, - cfg.start_symbol, + cfg.start_symbol, # type: ignore final_state)] productions_temp.append( Production(start, new_body, filtering=False)) @@ -867,7 +869,7 @@ def _intersection_when_terminal(other_fst, production, if next_states: new_head = \ cv_converter.to_cfg_combined_variable( - state_p, production.head, next_state) + state_p, production.head, next_states[0]) productions_temp.append( Production(new_head, [production.body[0]], @@ -875,10 +877,11 @@ def _intersection_when_terminal(other_fst, production, return productions_temp @staticmethod - def _intersection_when_two_non_terminals(production: Production, - states: Iterable[FAState], - cv_converter: CFGVariableConverter) \ - -> List[Production]: + def _intersection_when_two_non_terminals( + production: Production, + states: Iterable[FAState], + cv_converter: CFGVariableConverter) \ + -> List[Production]: productions_temp = [] for state_p in states: for state_r in states: @@ -889,24 +892,27 @@ def _intersection_when_two_non_terminals(production: Production, cv_converter.to_cfg_combined_variable( state_p, production.head, state_r) productions_temp += [Production(new_head, - body, + body, # type: ignore filtering=False) for body in bodies] return productions_temp @staticmethod def _get_all_bodies(production: Production, - state_p, - state_r, - states, - cv_converter: CFGVariableConverter): + state_p: FAState, + state_r: FAState, + states: Iterable[FAState], + cv_converter: CFGVariableConverter) \ + -> List[List[Variable]]: return [ - [cv_converter.to_cfg_combined_variable(state_p, - production.body[0], - state_q), - cv_converter.to_cfg_combined_variable(state_q, - production.body[1], - state_r)] + [cv_converter.to_cfg_combined_variable( + state_p, + production.body[0], # type: ignore + state_q), + cv_converter.to_cfg_combined_variable( + state_q, + production.body[1], # type: ignore + state_r)] for state_q in states] def __and__(self, other): @@ -1073,7 +1079,11 @@ def from_text(cls, productions=productions, start_symbol=start_symbol) @classmethod - def _read_line(cls, line, productions, terminals, variables): + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: head_s, body_s = line.split("->") head_text = head_s.strip() if is_special_text(head_text): diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index db03717..c373a7b 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -2,6 +2,10 @@ Representation of a CYK table """ +from typing import Dict, List, Set, Iterable, Tuple, Any + +from pyformlang.cfg import CFG, Terminal +from pyformlang.cfg.cfg_object import CFGObject from pyformlang.cfg.parse_tree import ParseTree @@ -16,18 +20,18 @@ class CYKTable: The word from which we construct the CYK table """ - def __init__(self, cfg, word): - self._cnf = cfg.to_normal_form() - self._word = word - self._productions_d = {} + def __init__(self, cfg: CFG, word: List[Terminal]) -> None: + self._cnf: CFG = cfg.to_normal_form() + self._word: List[Terminal] = word + self._productions_d: Dict[Tuple, List[CFGObject]] = {} + self._cyk_table: Dict[Tuple[int, int], Set[CYKNode]] = {} self._set_productions_by_body() - self._cyk_table = {} if not self._generates_all_terminals(): self._cyk_table[(0, len(self._word))] = set() else: self._set_cyk_table() - def _set_productions_by_body(self): + def _set_productions_by_body(self) -> None: # Organize productions for production in self._cnf.productions: temp = tuple(production.body) @@ -36,23 +40,24 @@ def _set_productions_by_body(self): else: self._productions_d[temp] = [production.head] - def _set_cyk_table(self): + def _set_cyk_table(self) -> None: self._initialize_cyk_table() self._propagate_in_cyk_table() - def _get_windows(self): + def _get_windows(self) -> Iterable[Tuple[int, int]]: # The windows must in order by length for window_size in range(2, len(self._word) + 1): for start_window in range(len(self._word) - window_size + 1): yield start_window, start_window + window_size - def _get_all_window_pairs(self, start_window, end_window): + def _get_all_window_pairs(self, start_window: int, end_window: int) \ + -> Iterable[Tuple["CYKNode", "CYKNode"]]: for mid_window in range(start_window + 1, end_window): for var_b in self._cyk_table[(start_window, mid_window)]: for var_c in self._cyk_table[(mid_window, end_window)]: yield var_b, var_c - def _propagate_in_cyk_table(self): + def _propagate_in_cyk_table(self) -> None: for start_window, end_window in self._get_windows(): for var_b, var_c in self._get_all_window_pairs(start_window, end_window): @@ -61,7 +66,7 @@ def _propagate_in_cyk_table(self): self._cyk_table[(start_window, end_window)].add( CYKNode(var_a, var_b, var_c)) - def _initialize_cyk_table(self): + def _initialize_cyk_table(self) -> None: for i, terminal in enumerate(self._word): self._cyk_table[(i, i + 1)] = \ {CYKNode(x, CYKNode(terminal)) @@ -73,7 +78,7 @@ def _initialize_cyk_table(self): self._cyk_table[ (start_window, start_window + window_size)] = set() - def generate_word(self): + def generate_word(self) -> bool: """ Checks is the word is generated Returns @@ -83,7 +88,7 @@ def generate_word(self): """ return self._cnf.start_symbol in self._cyk_table[(0, len(self._word))] - def _generates_all_terminals(self): + def _generates_all_terminals(self) -> bool: generate_all_terminals = True for terminal in self._word: if (terminal,) not in self._productions_d: @@ -112,7 +117,10 @@ def get_parse_tree(self) -> "CYKNode": class CYKNode(ParseTree): """A node in the CYK table""" - def __init__(self, value, left_son=None, right_son=None): + def __init__(self, + value: Any, + left_son: "CYKNode" = None, + right_son: "CYKNode" = None): super().__init__(value) self.value = value self.left_son = left_son @@ -122,12 +130,12 @@ def __init__(self, value, left_son=None, right_son=None): if right_son is not None: self.sons.append(right_son) - def __eq__(self, other): + def __eq__(self, other: Any): if isinstance(other, CYKNode): return self.value == other.value return self.value == other - def __hash__(self): + def __hash__(self) -> int: return hash(self.value) diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index 1a29f63..9aa0dfd 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -1,6 +1,9 @@ """ LL(1) Parser """ +from typing import Dict, List, Set, Iterable, Tuple, Any +from pyformlang.cfg import CFG, Production +from pyformlang.cfg.cfg_object import CFGObject from pyformlang.cfg.epsilon import Epsilon from pyformlang.cfg.cfg import NotParsableException from pyformlang.cfg.parse_tree import ParseTree @@ -19,10 +22,10 @@ class LLOneParser: A context-free Grammar """ - def __init__(self, cfg): + def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_first_set(self): + def get_first_set(self) -> Dict[CFGObject, Set[CFGObject]]: """ Used in LL(1) """ # Algorithm from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ @@ -46,7 +49,9 @@ def get_first_set(self): return first_set @staticmethod - def _get_first_set_production(production, first_set): + def _get_first_set_production(production: Production, + first_set: Dict[CFGObject, Set[CFGObject]]) \ + -> Set[CFGObject]: first_not_containing_epsilon = 0 first_set_temp = set() for body_component in production.body: @@ -62,9 +67,11 @@ def _get_first_set_production(production, first_set): first_set_temp.remove(Epsilon()) return first_set_temp - def _initialize_first_set(self, triggers): + def _initialize_first_set(self, + triggers: Dict[CFGObject, List[CFGObject]]) \ + -> Tuple[Dict, SetQueue]: to_process = SetQueue() - first_set = {} + first_set: Dict[CFGObject, Set[CFGObject]] = {} # Initialisation for terminal in self._cfg.terminals: first_set[terminal] = {terminal} @@ -78,8 +85,8 @@ def _initialize_first_set(self, triggers): to_process.append(triggered) return first_set, to_process - def _get_triggers(self): - triggers = {} + def _get_triggers(self) -> Dict[CFGObject, List[CFGObject]]: + triggers: Dict[CFGObject, List[CFGObject]] = {} for production in self._cfg.productions: for body_component in production.body: if body_component not in triggers: @@ -87,7 +94,7 @@ def _get_triggers(self): triggers[body_component].append(production.head) return triggers - def get_follow_set(self): + def get_follow_set(self) -> Dict[CFGObject, Set[CFGObject]]: """ Get follow set """ first_set = self.get_first_set() triggers = self._get_triggers_follow_set(first_set) @@ -103,7 +110,9 @@ def get_follow_set(self): to_process.append(triggered) return follow_set - def _initialize_follow_set(self, first_set): + def _initialize_follow_set(self, + first_set: Dict[CFGObject, Set[CFGObject]]) \ + -> Tuple[Dict, SetQueue]: to_process = SetQueue() follow_set = {} follow_set[self._cfg.start_symbol] = {"$"} @@ -123,7 +132,9 @@ def _initialize_follow_set(self, first_set): to_process.append(component) return follow_set, to_process - def _get_triggers_follow_set(self, first_set): + def _get_triggers_follow_set(self, + first_set: Dict[CFGObject, Set[CFGObject]]) \ + -> Dict[CFGObject, List[CFGObject]]: triggers = {} for production in self._cfg.productions: if production.head not in triggers: @@ -138,7 +149,8 @@ def _get_triggers_follow_set(self, first_set): triggers[production.head].add(component) return triggers - def get_llone_parsing_table(self): + def get_llone_parsing_table(self) \ + -> Dict[CFGObject, Dict[CFGObject, List[Production]]]: """ Get the LL(1) parsing table From: https://www.slideshare.net/MahbuburRahman273/ll1-parser-in-compilers @@ -153,7 +165,8 @@ def get_llone_parsing_table(self): nullable_productions.append(production) else: non_nullable_productions.append(production) - llone_parsing_table = {} + llone_parsing_table: Dict[CFGObject, + Dict[CFGObject, List[Production]]] = {} for production in nullable_productions: if production.head not in llone_parsing_table: llone_parsing_table[production.head] = {} @@ -175,7 +188,7 @@ def get_llone_parsing_table(self): ) return llone_parsing_table - def is_llone_parsable(self): + def is_llone_parsable(self) -> bool: """ Checks whether the grammar can be parse with the LL(1) parser. @@ -190,7 +203,7 @@ def is_llone_parsable(self): return False return True - def get_llone_parse_tree(self, word): + def get_llone_parse_tree(self, word: Iterable[Any]) -> ParseTree: """ Get LL(1) parse Tree @@ -211,20 +224,21 @@ def get_llone_parse_tree(self, word): """ word = [to_terminal(x) for x in word if x != Epsilon()] - word.append("$") + word.append("$") # type: ignore word = word[::-1] parsing_table = self.get_llone_parsing_table() parse_tree = ParseTree(self._cfg.start_symbol) stack = ["$", parse_tree] while stack: current = stack.pop() - if current == "$" and word[-1] == "$": - return parse_tree - if current.value == word[-1]: + if isinstance(current, str): + if current == "$" and word[-1] == "$": + return parse_tree + elif current.value == word[-1]: word.pop() else: - rule_applied = list(parsing_table.get(current.value, {}) - .get(word[-1], [])) + rule_applied = parsing_table.get(current.value, {}) \ + .get(word[-1], []) if len(rule_applied) == 1: for component in rule_applied[0].body[::-1]: new_node = ParseTree(component) diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index e343302..3228e32 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -1,6 +1,8 @@ """ A parse Tree """ -import networkx as nx +from typing import List, Any + +from networkx import DiGraph from networkx.drawing.nx_pydot import write_dot from pyformlang.cfg.variable import Variable @@ -9,14 +11,14 @@ class ParseTree: """ A parse tree """ - def __init__(self, value): + def __init__(self, value: Any) -> None: self.value = value - self.sons = [] + self.sons: List[ParseTree] = [] - def __repr__(self): + def __repr__(self) -> str: return "ParseTree(" + str(self.value) + ", " + str(self.sons) + ")" - def get_leftmost_derivation(self): + def get_leftmost_derivation(self) -> List[List[Any]]: """ Get the leftmost derivation @@ -46,7 +48,7 @@ def get_leftmost_derivation(self): start.append(son.value) return res - def get_rightmost_derivation(self): + def get_rightmost_derivation(self) -> List[List[Any]]: """ Get the leftmost derivation @@ -73,7 +75,7 @@ def get_rightmost_derivation(self): end = derivation + end return res - def to_networkx(self): + def to_networkx(self) -> DiGraph: """ Transforms the tree into a Networkx Directed Graph @@ -83,7 +85,7 @@ def to_networkx(self): The tree in Networkx format. """ - tree = nx.DiGraph() + tree = DiGraph() tree.add_node("ROOT", label=self.value.value) to_process = [("ROOT", son) for son in self.sons[::-1]] counter = 0 @@ -99,7 +101,7 @@ def to_networkx(self): to_process += [(new_node, son) for son in current_node.sons[::-1]] return tree - def write_as_dot(self, filename): + def write_as_dot(self, filename: str) -> None: """ Write the parse tree in dot format into a file diff --git a/pyformlang/cfg/pda_object_creator.py b/pyformlang/cfg/pda_object_creator.py index bd5fe66..97093c8 100644 --- a/pyformlang/cfg/pda_object_creator.py +++ b/pyformlang/cfg/pda_object_creator.py @@ -33,7 +33,8 @@ def get_symbol_from(self, symbol: CFGObject) -> Symbol: return temp return inverse_symbol - def get_stack_symbol_from(self, stack_symbol: CFGObject) -> StackSymbol: + def get_stack_symbol_from(self, stack_symbol: CFGObject) \ + -> StackSymbol: """Get a stack symbol""" if isinstance(stack_symbol, CFGEpsilon): return PDAEpsilon() diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index 8f10b2f..befa8a9 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -2,13 +2,16 @@ A recursive decent parser. """ -from pyformlang.cfg import Variable, Epsilon +from typing import List, Iterable, Tuple, Optional, Any + +from pyformlang.cfg import CFG, Terminal, Variable, Epsilon from pyformlang.cfg.cfg import NotParsableException from pyformlang.cfg.parse_tree import ParseTree from pyformlang.cfg.utils import to_terminal -def _get_index_to_extend(current_expansion, left): +def _get_index_to_extend(current_expansion: List[Any], left: bool) \ + -> Tuple[int, Optional[List[Any]]]: order = enumerate(current_expansion) if not left: order = reversed(list(order)) @@ -29,10 +32,11 @@ class RecursiveDecentParser: """ - def __init__(self, cfg): + def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_parse_tree(self, word, left=True): + def get_parse_tree(self, word: Iterable[Any], left: bool = True) \ + -> ParseTree: """ Get a parse tree for a given word @@ -62,8 +66,11 @@ def get_parse_tree(self, word, left=True): return parse_tree raise NotParsableException - def _match(self, word, current_expansion, idx_word=0, - idx_current_expansion=0): + def _match(self, + word: List[Terminal], + current_expansion: List[Any], + idx_word: int = 0, + idx_current_expansion: int = 0) -> bool: if idx_word == len(word) and \ idx_current_expansion == len(current_expansion): return True @@ -82,7 +89,10 @@ def _match(self, word, current_expansion, idx_word=0, idx_current_expansion + 1) return False - def _get_parse_tree_sub(self, word, current_expansion, left=True): + def _get_parse_tree_sub(self, + word: List[Terminal], + current_expansion: List[Any], + left: bool = True) -> bool: if not self._match(word, current_expansion): return False extend_idx, to_expand = _get_index_to_extend(current_expansion, left) @@ -100,7 +110,7 @@ def _get_parse_tree_sub(self, word, current_expansion, left=True): return True return False - def is_parsable(self, word, left=True): + def is_parsable(self, word: Iterable[Any], left: bool = True) -> bool: """ Whether a word is parsable or not diff --git a/pyformlang/cfg/variable.py b/pyformlang/cfg/variable.py index 71c97a6..b687dbc 100644 --- a/pyformlang/cfg/variable.py +++ b/pyformlang/cfg/variable.py @@ -1,7 +1,7 @@ """ A variable in a CFG """ import string -from typing import Any +from typing import Optional, Any from .cfg_object import CFGObject @@ -18,7 +18,7 @@ class Variable(CFGObject): # pylint: disable=too-few-public-methods def __init__(self, value: Any) -> None: super().__init__(value) self._hash = None - self.index_cfg_converter = None + self.index_cfg_converter: Optional[int] = None def __eq__(self, other: Any) -> bool: if isinstance(other, CFGObject): diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/finite_automaton/state.py index 53ed6d9..356bc8d 100644 --- a/pyformlang/finite_automaton/state.py +++ b/pyformlang/finite_automaton/state.py @@ -2,7 +2,7 @@ Representation of a state in a finite state automaton """ -from typing import Hashable, Any +from typing import Hashable, Optional, Any from .finite_automaton_object import FiniteAutomatonObject @@ -25,7 +25,7 @@ class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods def __init__(self, value: Hashable) -> None: super().__init__(value) self.index = None - self.index_cfg_converter = None + self.index_cfg_converter: Optional[int] = None def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/pda/cfg_variable_converter.py b/pyformlang/pda/cfg_variable_converter.py index e49d213..0712385 100644 --- a/pyformlang/pda/cfg_variable_converter.py +++ b/pyformlang/pda/cfg_variable_converter.py @@ -1,14 +1,18 @@ """A CFG Variable Converter""" -from pyformlang import cfg +from typing import Dict, List, AbstractSet, Tuple, Optional, Any +from pyformlang.finite_automaton import State +from pyformlang.cfg import Variable class CFGVariableConverter: """A CFG Variable Converter""" - def __init__(self, states, stack_symbols): + def __init__(self, + states: AbstractSet[State], + stack_symbols: AbstractSet[Variable]) -> None: self._counter = 0 - self._inverse_states_d = {} + self._inverse_states_d: Dict[State, int] = {} self._counter_state = 0 for self._counter_state, state in enumerate(states): self._inverse_states_d[state] = self._counter_state @@ -20,37 +24,33 @@ def __init__(self, states, stack_symbols): self._inverse_stack_symbol_d[symbol] = self._counter_symbol symbol.index_cfg_converter = self._counter_symbol self._counter_symbol += 1 - self._conversions = [[[(False, None) for _ in range(len(states))] - for _ in range(len(stack_symbols))] for _ in - range(len(states))] + self._conversions: List[List[List[Tuple[bool, Optional[Variable]]]]] \ + = [[[(False, None) for _ in range(len(states))] + for _ in range(len(stack_symbols))] for _ in + range(len(states))] - def _get_state_index(self, state): + def _get_state_index(self, state: State) -> int: """Get the state index""" if state.index_cfg_converter is None: - self._set_index_state(state) + if state not in self._inverse_states_d: + self._inverse_states_d[state] = self._counter_state + self._counter_state += 1 + state.index_cfg_converter = self._inverse_states_d[state] return state.index_cfg_converter - def _set_index_state(self, state): - """Set the state index""" - if state not in self._inverse_states_d: - self._inverse_states_d[state] = self._counter_state - self._counter_state += 1 - state.index_cfg_converter = self._inverse_states_d[state] - - def _get_symbol_index(self, symbol): + def _get_symbol_index(self, symbol: Variable) -> int: """Get the symbol index""" if symbol.index_cfg_converter is None: - self._set_index_symbol(symbol) + if symbol not in self._inverse_stack_symbol_d: + self._inverse_stack_symbol_d[symbol] = self._counter_symbol + self._counter_symbol += 1 + symbol.index_cfg_converter = self._inverse_stack_symbol_d[symbol] return symbol.index_cfg_converter - def _set_index_symbol(self, symbol): - """ Set the symbol index """ - if symbol not in self._inverse_stack_symbol_d: - self._inverse_stack_symbol_d[symbol] = self._counter_symbol - self._counter_symbol += 1 - symbol.index_cfg_converter = self._inverse_stack_symbol_d[symbol] - - def to_cfg_combined_variable(self, state0, stack_symbol, state1): + def to_cfg_combined_variable(self, + state0: State, + stack_symbol: Variable, + state1: State) -> Variable: """ Conversion used in the to_pda method """ i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -61,27 +61,33 @@ def to_cfg_combined_variable(self, state0, stack_symbol, state1): return prev[1] def _create_new_variable(self, - i_stack_symbol, - i_state0, - i_state1, - prev, - value=None): + i_stack_symbol: int, + i_state0: int, + i_state1: int, + prev: Tuple, + value: Any = None) -> Tuple[bool, Variable]: # pylint: disable=too-many-arguments if value is None: value = self._counter - temp = (prev[0], cfg.Variable(value)) + temp = (prev[0], Variable(value)) self._counter += 1 self._conversions[i_state0][i_stack_symbol][i_state1] = temp return temp - def set_valid(self, state0, stack_symbol, state1): + def set_valid(self, + state0: State, + stack_symbol: Variable, + state1: State) -> None: """Set valid""" i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) prev = self._conversions[i_state0][i_stack_symbol][i_state1] self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) - def is_valid_and_get(self, state0, stack_symbol, state1): + def is_valid_and_get(self, + state0: State, + stack_symbol: Variable, + state1: State) -> Optional[Variable]: """Check if valid and get""" i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) @@ -96,7 +102,11 @@ def is_valid_and_get(self, state0, stack_symbol, state1): current)[1] return current[1] - def _get_indexes(self, stack_symbol, state0, state1): + def _get_indexes(self, + stack_symbol: Variable, + state0: State, + state1: State) \ + -> Tuple[int, int, int]: i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) i_state1 = self._get_state_index(state1) diff --git a/pyformlang/pda/stack_symbol.py b/pyformlang/pda/stack_symbol.py index 6d8dd7e..b8dff27 100644 --- a/pyformlang/pda/stack_symbol.py +++ b/pyformlang/pda/stack_symbol.py @@ -1,5 +1,7 @@ """ A StackSymbol in a pushdown automaton """ +from typing import Optional + class StackSymbol: """ A StackSymbol in a pushdown automaton @@ -14,7 +16,7 @@ class StackSymbol: def __init__(self, value): self._value = value self._hash = None - self.index_cfg_converter = None + self.index_cfg_converter: Optional[int] = None @property def value(self): From a462635d3dd4a537c76209bad3d09fb6216dd0fa Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 21 Oct 2024 00:32:33 +0300 Subject: [PATCH 04/30] add pda module annotations, rework pda objects --- pyformlang/pda/epsilon.py | 6 +- pyformlang/pda/pda.py | 113 ++++++++++++++------------ pyformlang/pda/pda_object.py | 27 ++++++ pyformlang/pda/stack_symbol.py | 28 +++---- pyformlang/pda/state.py | 27 +++--- pyformlang/pda/symbol.py | 19 +++-- pyformlang/pda/transition_function.py | 42 ++++++---- pyformlang/pda/utils.py | 23 ++++-- 8 files changed, 166 insertions(+), 119 deletions(-) create mode 100644 pyformlang/pda/pda_object.py diff --git a/pyformlang/pda/epsilon.py b/pyformlang/pda/epsilon.py index 3e1f014..00956a5 100644 --- a/pyformlang/pda/epsilon.py +++ b/pyformlang/pda/epsilon.py @@ -1,11 +1,11 @@ """ An epsilon symbol """ -from .symbol import Symbol +from .stack_symbol import StackSymbol -class Epsilon(Symbol): +class Epsilon(StackSymbol): """ An epsilon symbol """ # pylint: disable=too-few-public-methods - def __init__(self): + def __init__(self) -> None: super().__init__("epsilon") diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 1deb75d..14c5d5b 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -1,19 +1,25 @@ """ We represent here a push-down automaton """ -import json -from itertools import product -from typing import AbstractSet, List, Iterable, Any -import networkx as nx -import numpy as np +from typing import List, Set, AbstractSet, Iterable, Tuple, Optional, Any +from json import dumps, loads +from itertools import product +from numpy import empty +from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot from pyformlang import cfg from pyformlang import finite_automaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.pda.cfg_variable_converter import CFGVariableConverter -from .epsilon import Epsilon -from .stack_symbol import StackSymbol +from pyformlang.finite_automaton import FiniteAutomaton +from pyformlang.finite_automaton import Symbol as FASymbol, Epsilon as FAEpsilon +from pyformlang.regular_expression import Regex +from pyformlang.cfg import CFG, Variable, Terminal, Production + from .state import State +from .symbol import Symbol as PDASymbol +from .stack_symbol import StackSymbol +from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction from .utils import PDAObjectCreator from ..finite_automaton import FiniteAutomaton @@ -82,26 +88,22 @@ def __init__(self, if final_states is not None: final_states = {self._pda_obj_creator.to_state(x) for x in final_states} - self._states = states or set() - self._states = set(self._states) - self._input_symbols = input_symbols or set() - self._input_symbols = set(self._input_symbols) - self._stack_alphabet = stack_alphabet or set() - self._stack_alphabet = set(self._stack_alphabet) + self._states: Set[State] = states or set() + self._input_symbols: Set[PDASymbol] = input_symbols or set() + self._stack_alphabet: Set[StackSymbol] = stack_alphabet or set() self._transition_function = transition_function or TransitionFunction() - self._start_state = start_state + self._start_state: Optional[State] = start_state if start_state is not None: self._states.add(start_state) - self._start_stack_symbol = start_stack_symbol + self._start_stack_symbol: Optional[StackSymbol] = start_stack_symbol if start_stack_symbol is not None: self._stack_alphabet.add(start_stack_symbol) - self._final_states = final_states or set() - self._final_states = set(self._final_states) + self._final_states: Set[State] = final_states or set() for state in self._final_states: self._states.add(state) - self._cfg_variable_converter = None + self._cfg_variable_converter: Optional[CFGVariableConverter] = None - def set_start_state(self, start_state: Any): + def set_start_state(self, start_state: Any) -> None: """ Sets the start state to the automaton Parameters @@ -113,7 +115,7 @@ def set_start_state(self, start_state: Any): self._states.add(start_state) self._start_state = start_state - def set_start_stack_symbol(self, start_stack_symbol: Any): + def set_start_stack_symbol(self, start_stack_symbol: Any) -> None: """ Sets the start stack symbol to the automaton Parameters @@ -126,7 +128,7 @@ def set_start_stack_symbol(self, start_stack_symbol: Any): self._stack_alphabet.add(start_stack_symbol) self._start_stack_symbol = start_stack_symbol - def add_final_state(self, state: Any): + def add_final_state(self, state: Any) -> None: """ Adds a final state to the automaton Parameters @@ -138,12 +140,12 @@ def add_final_state(self, state: Any): self._final_states.add(state) @property - def start_state(self): + def start_state(self) -> Optional[State]: """ Get start state """ return self._start_state @property - def states(self): + def states(self) -> Set[State]: """ Get the states fo the PDA Returns @@ -154,7 +156,7 @@ def states(self): return self._states @property - def final_states(self): + def final_states(self) -> Set[State]: """ The final states of the PDA Returns @@ -166,7 +168,7 @@ def final_states(self): return self._final_states @property - def input_symbols(self): + def input_symbols(self) -> Set[PDASymbol]: """ The input symbols of the PDA @@ -178,7 +180,7 @@ def input_symbols(self): return self._input_symbols @property - def stack_symbols(self): + def stack_symbols(self) -> Set[StackSymbol]: """ The stack symbols of the PDA @@ -199,7 +201,10 @@ def get_number_transitions(self) -> int: """ return self._transition_function.get_number_transitions() - def add_transitions(self, transitions): + def add_transitions(self, + transitions: + Iterable[Tuple[Any, Any, Any, + Any, List[Any]]]) -> None: """ Adds several transitions @@ -241,11 +246,11 @@ def add_transition(self, stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to] self._states.add(s_from) self._states.add(s_to) - if input_symbol != Epsilon(): + if input_symbol != PDAEpsilon(): self._input_symbols.add(input_symbol) self._stack_alphabet.add(stack_from) for stack_symbol in stack_to: - if stack_symbol != Epsilon(): + if stack_symbol != PDAEpsilon(): self._stack_alphabet.add(stack_symbol) self._transition_function.add_transition(s_from, input_symbol, @@ -274,11 +279,11 @@ def to_final_state(self) -> "PDA": new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, Epsilon(), new_stack_symbol, + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, self._start_state, [self._start_stack_symbol, new_stack_symbol]) for state in self._states: - new_tf.add_transition(state, Epsilon(), new_stack_symbol, + new_tf.add_transition(state, PDAEpsilon(), new_stack_symbol, new_end, []) return PDA(new_states, self._input_symbols.copy(), @@ -309,15 +314,15 @@ def to_empty_stack(self) -> "PDA": new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, Epsilon(), new_stack_symbol, + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, self._start_state, [self._start_stack_symbol, new_stack_symbol]) for state in self._final_states: for stack_symbol in new_stack_alphabet: - new_tf.add_transition(state, Epsilon(), stack_symbol, + new_tf.add_transition(state, PDAEpsilon(), stack_symbol, new_end, []) for stack_symbol in new_stack_alphabet: - new_tf.add_transition(new_end, Epsilon(), stack_symbol, + new_tf.add_transition(new_end, PDAEpsilon(), stack_symbol, new_end, []) return PDA(new_states, self._input_symbols.copy(), @@ -326,7 +331,7 @@ def to_empty_stack(self) -> "PDA": new_start, new_stack_symbol) - def to_cfg(self) -> "cfg.CFG": + def to_cfg(self) -> CFG: """ Turns the language L generated by this PDA when accepting \ on empty \ stack into a CFG that accepts the same language L @@ -338,7 +343,7 @@ def to_cfg(self) -> "cfg.CFG": """ self._cfg_variable_converter = \ CFGVariableConverter(self._states, self._stack_alphabet) - start = cfg.Variable("#StartCFG#") + start = Variable("#StartCFG#") productions = self._initialize_production_from_start_in_to_cfg(start) states = self._states for transition in self._transition_function: @@ -352,7 +357,7 @@ def to_cfg(self) -> "cfg.CFG": self._process_transition_and_state_to_cfg(productions, state, transition) - return cfg.CFG(start_symbol=start, productions=productions) + return CFG(start_symbol=start, productions=productions) def _process_transition_and_state_to_cfg(self, productions, @@ -370,10 +375,10 @@ def _process_transition_and_state_to_cfg_safe(self, productions, state, head = self._get_head_from_state_and_transition(state, transition) bodies = self._get_all_bodies_from_state_and_transition(state, transition) - if transition[INPUT][INPUT_SYMBOL] != Epsilon(): + if transition[INPUT][INPUT_SYMBOL] != PDAEpsilon(): _prepend_input_symbol_to_the_bodies(bodies, transition) for body in bodies: - productions.append(cfg.Production(head, body, filtering=False)) + productions.append(Production(head, body, filtering=False)) def _get_all_bodies_from_state_and_transition(self, state, transition): return self._generate_all_rules(transition[OUTPUT][STATE], @@ -382,7 +387,7 @@ def _get_all_bodies_from_state_and_transition(self, state, transition): def _generate_all_rules(self, s_from: State, s_to: State, ss_by: List[StackSymbol]) \ - -> Iterable[Iterable["cfg.Variable"]]: + -> Iterable[Iterable[Variable]]: """ Generates the rules in the CFG conversion """ if not ss_by: return [[]] @@ -431,7 +436,7 @@ def _initialize_production_from_start_in_to_cfg(self, start): productions = [] for state in self._states: productions.append( - cfg.Production( + Production( start, [self._cfg_variable_converter.to_cfg_combined_variable( self._start_state, @@ -477,7 +482,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": pda = PDA(start_state=start, start_stack_symbol=self._start_stack_symbol) symbols = self._input_symbols.copy() - symbols.add(Epsilon()) + symbols.add(PDAEpsilon()) to_process = [(self._start_state, start_state_other)] processed = {(self._start_state, start_state_other)} while to_process: @@ -550,7 +555,7 @@ def to_dict(self): """ return self._transition_function.to_dict() - def to_networkx(self) -> nx.MultiDiGraph: + def to_networkx(self) -> MultiDiGraph: """ Transform the current pda into a networkx graph @@ -560,7 +565,7 @@ def to_networkx(self) -> nx.MultiDiGraph: A networkx MultiDiGraph representing the pda """ - graph = nx.MultiDiGraph() + graph = MultiDiGraph() for state in self._states: graph.add_node(state.value, is_start=state == self._start_state, @@ -571,7 +576,7 @@ def to_networkx(self) -> nx.MultiDiGraph: self.__add_start_state_to_graph(graph, state) if self._start_stack_symbol is not None: graph.add_node("INITIAL_STACK_HIDDEN", - label=json.dumps(self._start_stack_symbol.value), + label=dumps(self._start_stack_symbol.value), shape=None, height=.0, width=.0) @@ -581,9 +586,9 @@ def to_networkx(self) -> nx.MultiDiGraph: graph.add_edge( s_from.value, s_to.value, - label=(json.dumps(in_symbol.value) + " -> " + - json.dumps(stack_from.value) + " / " + - json.dumps([x.value for x in stack_to]))) + label=(dumps(in_symbol.value) + " -> " + + dumps(stack_from.value) + " / " + + dumps([x.value for x in stack_to]))) return graph @classmethod @@ -616,10 +621,10 @@ def from_networkx(cls, graph): if "label" in transition: in_symbol, stack_info = transition["label"].split( " -> ") - in_symbol = json.loads(in_symbol) + in_symbol = loads(in_symbol) stack_from, stack_to = stack_info.split(" / ") - stack_from = json.loads(stack_from) - stack_to = json.loads(stack_to) + stack_from = loads(stack_from) + stack_to = loads(stack_to) pda.add_transition(s_from, in_symbol, stack_from, @@ -632,7 +637,7 @@ def from_networkx(cls, graph): pda.add_final_state(node) if "INITIAL_STACK_HIDDEN" in graph.nodes: pda.set_start_stack_symbol( - json.loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"])) + loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"])) return pda def write_as_dot(self, filename): @@ -660,7 +665,7 @@ def __add_start_state_to_graph(graph: nx.MultiDiGraph, state: State) -> None: def _prepend_input_symbol_to_the_bodies(bodies, transition): - to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value) + to_prepend = Terminal(transition[INPUT][INPUT_SYMBOL].value) for body in bodies: body.insert(0, to_prepend) @@ -675,7 +680,7 @@ def __init__(self, states_pda, states_dfa): self._inverse_state_dfa = {} for i, state in enumerate(states_dfa): self._inverse_state_dfa[state] = i - self._conversions = np.empty((len(states_pda), len(states_dfa)), + self._conversions = empty((len(states_pda), len(states_dfa)), dtype=object) def to_pda_combined_state(self, state_pda, state_other): diff --git a/pyformlang/pda/pda_object.py b/pyformlang/pda/pda_object.py new file mode 100644 index 0000000..9db9077 --- /dev/null +++ b/pyformlang/pda/pda_object.py @@ -0,0 +1,27 @@ +""" Basic PDA object representation """ + +from typing import Any + + +class PDAObject: + """ Basic PDA object representation """ + + def __init__(self, value: Any) -> None: + self._value = value + self._hash = None + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + @property + def value(self) -> Any: + """ Returns the value of the object """ + return self._value + + def __eq__(self, other: Any) -> bool: + raise NotImplementedError + + def __repr__(self) -> str: + raise NotImplementedError diff --git a/pyformlang/pda/stack_symbol.py b/pyformlang/pda/stack_symbol.py index b8dff27..8c5a4a5 100644 --- a/pyformlang/pda/stack_symbol.py +++ b/pyformlang/pda/stack_symbol.py @@ -1,9 +1,10 @@ """ A StackSymbol in a pushdown automaton """ -from typing import Optional +from typing import Optional, Any +from .symbol import Symbol -class StackSymbol: +class StackSymbol(Symbol): """ A StackSymbol in a pushdown automaton Parameters @@ -13,13 +14,15 @@ class StackSymbol: """ - def __init__(self, value): - self._value = value - self._hash = None + def __init__(self, value: Any) -> None: + super().__init__(value) self.index_cfg_converter: Optional[int] = None + def __hash__(self) -> int: + return super().__hash__() + @property - def value(self): + def value(self) -> Any: """ Returns the value of the stack symbol Returns @@ -29,13 +32,10 @@ def value(self): """ return self._value - def __hash__(self): - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - def __eq__(self, other): - return self._value == other.value + def __eq__(self, other: Any) -> bool: + if isinstance(other, StackSymbol): + return self._value == other.value + return False - def __repr__(self): + def __repr__(self) -> str: return "StackSymbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/state.py b/pyformlang/pda/state.py index d69abfd..316f1e8 100644 --- a/pyformlang/pda/state.py +++ b/pyformlang/pda/state.py @@ -1,7 +1,11 @@ """ A State in a pushdown automaton """ +from typing import Optional, Any -class State: +from .pda_object import PDAObject + + +class State(PDAObject): """ A State in a pushdown automaton Parameters @@ -11,19 +15,16 @@ class State: """ - def __init__(self, value): - self._value = value - self._hash = None - self.index_cfg_converter = None + def __init__(self, value: Any) -> None: + super().__init__(value) + self.index_cfg_converter: Optional[int] = None - def __hash__(self): - if self._hash is None: - self._hash = hash(self._value) - return self._hash + def __hash__(self) -> int: + return super().__hash__() @property - def value(self): - """ Returns the value of the state + def value(self) -> Any: + """ Returns the value of the symbol Returns ---------- @@ -32,10 +33,10 @@ def value(self): """ return self._value - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, State): return self._value == other.value return False - def __repr__(self): + def __repr__(self) -> str: return "State(" + str(self._value) + ")" diff --git a/pyformlang/pda/symbol.py b/pyformlang/pda/symbol.py index 94c7cf8..cb38df5 100644 --- a/pyformlang/pda/symbol.py +++ b/pyformlang/pda/symbol.py @@ -1,7 +1,11 @@ """ A Symbol in a pushdown automaton """ +from typing import Any -class Symbol: +from .pda_object import PDAObject + + +class Symbol(PDAObject): """ A Symbol in a pushdown automaton Parameters @@ -11,14 +15,11 @@ class Symbol: """ - def __init__(self, value): - self._value = value - - def __hash__(self): - return hash(str(self._value)) + def __hash__(self) -> int: + return super().__hash__() @property - def value(self): + def value(self) -> Any: """ Returns the value of the symbol Returns @@ -28,10 +29,10 @@ def value(self): """ return self._value - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self._value == other.value return False - def __repr__(self): + def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index a458cd1..70699db 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -1,6 +1,6 @@ """ A transition function in a pushdown automaton """ -from typing import List +from typing import Dict, List, Set, Iterator, Tuple, Optional from .stack_symbol import StackSymbol from .state import State @@ -10,13 +10,17 @@ class TransitionFunction: """ A transition function in a pushdown automaton """ - def __init__(self): - self._transitions = {} - self._iter_key = None - self._current_key = None - self._iter_inside = None + def __init__(self) -> None: + self._transitions: Dict[Tuple[State, Symbol, StackSymbol], + Set[Tuple[State, List[StackSymbol]]]] = {} + self._iter_key: Optional[Iterator[ + Tuple[State, Symbol, StackSymbol]]] = None + self._current_key: Optional[ + Tuple[State, Symbol, StackSymbol]] = None + self._iter_inside: Optional[Iterator[ + Tuple[State, List[StackSymbol]]]] = None - def get_number_transitions(self): + def get_number_transitions(self) -> int: """ Gets the number of transitions Returns @@ -32,7 +36,7 @@ def add_transition(self, input_symbol: Symbol, stack_from: StackSymbol, s_to: State, - stack_to: List[StackSymbol]): + stack_to: List[StackSymbol]) -> None: """ Add a transition to the function Parameters @@ -49,7 +53,7 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ temp_in = (s_from, input_symbol, stack_from) - temp_out = (s_to, tuple(stack_to)) + temp_out = (s_to, stack_to.copy()) if temp_in in self._transitions: self._transitions[temp_in].add(temp_out) else: @@ -70,31 +74,35 @@ def copy(self) -> "TransitionFunction": temp_out[0], temp_out[1]) return new_tf - def __iter__(self): + def __iter__(self) -> Iterator[Tuple[Tuple[State, Symbol, StackSymbol], + Tuple[State, List[StackSymbol]]]]: self._iter_key = iter(self._transitions.keys()) self._current_key = None self._iter_inside = None return self - def __next__(self): + def __next__(self) -> Tuple[Tuple[State, Symbol, StackSymbol], + Tuple[State, List[StackSymbol]]]: if self._iter_inside is None: - next_key = next(self._iter_key) + next_key = next(self._iter_key) # type: ignore self._current_key = next_key self._iter_inside = iter(self._transitions[next_key]) try: next_value = next(self._iter_inside) - return self._current_key, next_value + return self._current_key, next_value # type: ignore except StopIteration: - next_key = next(self._iter_key) + next_key = next(self._iter_key) # type: ignore self._current_key = next_key self._iter_inside = iter(self._transitions[next_key]) return next(self) def __call__(self, s_from: State, input_symbol: Symbol, - stack_from: StackSymbol): - return self._transitions.get((s_from, input_symbol, stack_from), {}) + stack_from: StackSymbol) \ + -> Set[Tuple[State, List[StackSymbol]]]: + return self._transitions.get((s_from, input_symbol, stack_from), set()) - def to_dict(self): + def to_dict(self) -> Dict[Tuple[State, Symbol, StackSymbol], + Set[Tuple[State, List[StackSymbol]]]]: """Get the dictionary representation of the transitions""" return self._transitions diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 3fcbb35..447e418 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,5 +1,7 @@ """ Useful functions for a PDA """ +from typing import Type, Dict, Any + from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol @@ -11,18 +13,18 @@ class PDAObjectCreator: A Object in a PDA """ - def __init__(self): - self._state_creator = {} - self._symbol_creator = {} - self._stack_symbol_creator = {} + def __init__(self) -> None: + self._state_creator: Dict[Any, State] = {} + self._symbol_creator: Dict[Any, Symbol] = {} + self._stack_symbol_creator: Dict[Any, StackSymbol] = {} - def to_state(self, given): + def to_state(self, given: Any) -> State: """ Convert to a state """ if isinstance(given, State): return _get_object_from_known(given, self._state_creator) return _get_object_from_raw(given, self._state_creator, State) - def to_symbol(self, given): + def to_symbol(self, given: Any) -> Symbol: """ Convert to a symbol """ if isinstance(given, Symbol): return _get_object_from_known(given, self._symbol_creator) @@ -30,7 +32,7 @@ def to_symbol(self, given): return Epsilon() return _get_object_from_raw(given, self._symbol_creator, Symbol) - def to_stack_symbol(self, given): + def to_stack_symbol(self, given: Any) -> StackSymbol: """ Convert to a stack symbol """ if isinstance(given, StackSymbol): return _get_object_from_known(given, @@ -42,14 +44,17 @@ def to_stack_symbol(self, given): StackSymbol) -def _get_object_from_known(given, obj_converter): +def _get_object_from_known(given: Any, + obj_converter: Dict[Any, Any]) -> Any: if given.value in obj_converter: return obj_converter[given.value] obj_converter[given.value] = given return given -def _get_object_from_raw(given, obj_converter, to_type): +def _get_object_from_raw(given: Any, + obj_converter: Dict[Any, Any], + to_type: Type) -> Any: if given in obj_converter: return obj_converter[given] temp = to_type(given) From dda86126481fff40eb0ce1f149c8dabd3fccd92c Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 21 Oct 2024 20:30:42 +0300 Subject: [PATCH 05/30] finish pda annotation --- pyformlang/cfg/cfg.py | 10 +- pyformlang/pda/cfg_variable_converter.py | 37 +++-- pyformlang/pda/pda.py | 178 +++++++++++++++-------- 3 files changed, 141 insertions(+), 84 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 6f45163..731fdab 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -854,7 +854,7 @@ def _intersection_starting_rules(cfg: "CFG", new_body: List[CFGObject] = [ cv_converter.to_cfg_combined_variable( start_other, - cfg.start_symbol, # type: ignore + cfg.start_symbol, final_state)] productions_temp.append( Production(start, new_body, filtering=False)) @@ -892,7 +892,7 @@ def _intersection_when_two_non_terminals( cv_converter.to_cfg_combined_variable( state_p, production.head, state_r) productions_temp += [Production(new_head, - body, # type: ignore + body, filtering=False) for body in bodies] return productions_temp @@ -903,15 +903,15 @@ def _get_all_bodies(production: Production, state_r: FAState, states: Iterable[FAState], cv_converter: CFGVariableConverter) \ - -> List[List[Variable]]: + -> List[List[CFGObject]]: return [ [cv_converter.to_cfg_combined_variable( state_p, - production.body[0], # type: ignore + production.body[0], state_q), cv_converter.to_cfg_combined_variable( state_q, - production.body[1], # type: ignore + production.body[1], state_r)] for state_q in states] diff --git a/pyformlang/pda/cfg_variable_converter.py b/pyformlang/pda/cfg_variable_converter.py index 0712385..f2dd2f3 100644 --- a/pyformlang/pda/cfg_variable_converter.py +++ b/pyformlang/pda/cfg_variable_converter.py @@ -2,23 +2,22 @@ from typing import Dict, List, AbstractSet, Tuple, Optional, Any -from pyformlang.finite_automaton import State from pyformlang.cfg import Variable class CFGVariableConverter: """A CFG Variable Converter""" def __init__(self, - states: AbstractSet[State], - stack_symbols: AbstractSet[Variable]) -> None: + states: AbstractSet[Any], + stack_symbols: AbstractSet[Any]) -> None: self._counter = 0 - self._inverse_states_d: Dict[State, int] = {} + self._inverse_states_d: Dict[Any, int] = {} self._counter_state = 0 for self._counter_state, state in enumerate(states): self._inverse_states_d[state] = self._counter_state state.index_cfg_converter = self._counter_state self._counter_state += 1 - self._inverse_stack_symbol_d = {} + self._inverse_stack_symbol_d: Dict[Any, int] = {} self._counter_symbol = 0 for self._counter_symbol, symbol in enumerate(stack_symbols): self._inverse_stack_symbol_d[symbol] = self._counter_symbol @@ -29,7 +28,7 @@ def __init__(self, for _ in range(len(stack_symbols))] for _ in range(len(states))] - def _get_state_index(self, state: State) -> int: + def _get_state_index(self, state: Any) -> int: """Get the state index""" if state.index_cfg_converter is None: if state not in self._inverse_states_d: @@ -38,7 +37,7 @@ def _get_state_index(self, state: State) -> int: state.index_cfg_converter = self._inverse_states_d[state] return state.index_cfg_converter - def _get_symbol_index(self, symbol: Variable) -> int: + def _get_symbol_index(self, symbol: Any) -> int: """Get the symbol index""" if symbol.index_cfg_converter is None: if symbol not in self._inverse_stack_symbol_d: @@ -48,9 +47,9 @@ def _get_symbol_index(self, symbol: Variable) -> int: return symbol.index_cfg_converter def to_cfg_combined_variable(self, - state0: State, - stack_symbol: Variable, - state1: State) -> Variable: + state0: Any, + stack_symbol: Any, + state1: Any) -> Variable: """ Conversion used in the to_pda method """ i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -75,9 +74,9 @@ def _create_new_variable(self, return temp def set_valid(self, - state0: State, - stack_symbol: Variable, - state1: State) -> None: + state0: Any, + stack_symbol: Any, + state1: Any) -> None: """Set valid""" i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -85,9 +84,9 @@ def set_valid(self, self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) def is_valid_and_get(self, - state0: State, - stack_symbol: Variable, - state1: State) -> Optional[Variable]: + state0: Any, + stack_symbol: Any, + state1: Any) -> Optional[Variable]: """Check if valid and get""" i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) @@ -103,9 +102,9 @@ def is_valid_and_get(self, return current[1] def _get_indexes(self, - stack_symbol: Variable, - state0: State, - state1: State) \ + stack_symbol: Any, + state0: Any, + state1: Any) \ -> Tuple[int, int, int]: i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 14c5d5b..1f25130 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -1,6 +1,7 @@ """ We represent here a push-down automaton """ -from typing import List, Set, AbstractSet, Iterable, Tuple, Optional, Any +from typing import Dict, List, Set, AbstractSet, \ + Iterable, Tuple, Type, Optional, Any from json import dumps, loads from itertools import product from numpy import empty @@ -12,17 +13,18 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.pda.cfg_variable_converter import CFGVariableConverter from pyformlang.finite_automaton import FiniteAutomaton -from pyformlang.finite_automaton import Symbol as FASymbol, Epsilon as FAEpsilon +from pyformlang.finite_automaton import State as FAState, \ + Symbol as FASymbol, Epsilon as FAEpsilon from pyformlang.regular_expression import Regex from pyformlang.cfg import CFG, Variable, Terminal, Production +from pyformlang.cfg.cfg_object import CFGObject -from .state import State +from .state import State as PDAState from .symbol import Symbol as PDASymbol from .stack_symbol import StackSymbol from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction from .utils import PDAObjectCreator -from ..finite_automaton import FiniteAutomaton INPUT_SYMBOL = 1 @@ -88,20 +90,19 @@ def __init__(self, if final_states is not None: final_states = {self._pda_obj_creator.to_state(x) for x in final_states} - self._states: Set[State] = states or set() + self._states: Set[PDAState] = states or set() self._input_symbols: Set[PDASymbol] = input_symbols or set() self._stack_alphabet: Set[StackSymbol] = stack_alphabet or set() self._transition_function = transition_function or TransitionFunction() - self._start_state: Optional[State] = start_state + self._start_state: Optional[PDAState] = start_state if start_state is not None: self._states.add(start_state) self._start_stack_symbol: Optional[StackSymbol] = start_stack_symbol if start_stack_symbol is not None: self._stack_alphabet.add(start_stack_symbol) - self._final_states: Set[State] = final_states or set() + self._final_states: Set[PDAState] = final_states or set() for state in self._final_states: self._states.add(state) - self._cfg_variable_converter: Optional[CFGVariableConverter] = None def set_start_state(self, start_state: Any) -> None: """ Sets the start state to the automaton @@ -140,12 +141,12 @@ def add_final_state(self, state: Any) -> None: self._final_states.add(state) @property - def start_state(self) -> Optional[State]: + def start_state(self) -> Optional[PDAState]: """ Get start state """ return self._start_state @property - def states(self) -> Set[State]: + def states(self) -> Set[PDAState]: """ Get the states fo the PDA Returns @@ -156,7 +157,7 @@ def states(self) -> Set[State]: return self._states @property - def final_states(self) -> Set[State]: + def final_states(self) -> Set[PDAState]: """ The final states of the PDA Returns @@ -268,8 +269,13 @@ def to_final_state(self) -> "PDA": The new PDA which accepts by final state the language that \ was accepted by empty stack """ - new_start = get_next_free("#STARTTOFINAL#", State, self._states) - new_end = get_next_free("#ENDTOFINAL#", State, self._states) + if self._start_state is None: + raise RuntimeError("start state should not be None") + if self._start_stack_symbol is None: + raise RuntimeError("start stack symbol should not be None") + + new_start = get_next_free("#STARTTOFINAL#", PDAState, self._states) + new_end = get_next_free("#ENDTOFINAL#", PDAState, self._states) new_stack_symbol = get_next_free("#BOTTOMTOFINAL#", StackSymbol, self._stack_alphabet) @@ -303,8 +309,13 @@ def to_empty_stack(self) -> "PDA": The new PDA which accepts by empty stack the language that was \ accepted by final state """ - new_start = get_next_free("#STARTEMPTYS#", State, self._states) - new_end = get_next_free("#ENDEMPTYS#", State, self._states) + if self._start_state is None: + raise RuntimeError("start state should not be None") + if self._start_stack_symbol is None: + raise RuntimeError("start stack symbol should not be None") + + new_start = get_next_free("#STARTEMPTYS#", PDAState, self._states) + new_end = get_next_free("#ENDEMPTYS#", PDAState, self._states) new_stack_symbol = get_next_free("#BOTTOMEMPTYS#", StackSymbol, self._stack_alphabet) @@ -341,14 +352,15 @@ def to_cfg(self) -> CFG: new_cfg : :class:`~pyformlang.cfg.CFG` The equivalent CFG """ - self._cfg_variable_converter = \ + variable_converter = \ CFGVariableConverter(self._states, self._stack_alphabet) start = Variable("#StartCFG#") - productions = self._initialize_production_from_start_in_to_cfg(start) + productions = self._initialize_production_from_start_in_to_cfg( + start, variable_converter) states = self._states for transition in self._transition_function: for state in states: - self._cfg_variable_converter.set_valid( + variable_converter.set_valid( transition[INPUT][STATE], transition[INPUT][STACK_FROM], state) @@ -356,45 +368,67 @@ def to_cfg(self) -> CFG: for state in states: self._process_transition_and_state_to_cfg(productions, state, - transition) + transition, + variable_converter) return CFG(start_symbol=start, productions=productions) - def _process_transition_and_state_to_cfg(self, - productions, - state, - transition): + def _process_transition_and_state_to_cfg( + self, + productions: List[Production], + state: PDAState, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> None: current_state_has_empty_new_stack = \ len(transition[OUTPUT][NEW_STACK]) == 0 and \ state != transition[OUTPUT][STATE] if not current_state_has_empty_new_stack: - self._process_transition_and_state_to_cfg_safe(productions, state, - transition) - - def _process_transition_and_state_to_cfg_safe(self, productions, state, - transition): - head = self._get_head_from_state_and_transition(state, transition) - bodies = self._get_all_bodies_from_state_and_transition(state, - transition) + self._process_transition_and_state_to_cfg_safe(productions, + state, + transition, + variable_converter) + + def _process_transition_and_state_to_cfg_safe( + self, + productions: List[Production], + state: PDAState, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> None: + head = self._get_head_from_state_and_transition( + state, transition, variable_converter) + bodies = self._get_all_bodies_from_state_and_transition( + state, transition, variable_converter) if transition[INPUT][INPUT_SYMBOL] != PDAEpsilon(): _prepend_input_symbol_to_the_bodies(bodies, transition) for body in bodies: productions.append(Production(head, body, filtering=False)) - def _get_all_bodies_from_state_and_transition(self, state, transition): + def _get_all_bodies_from_state_and_transition( + self, + state: PDAState, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: return self._generate_all_rules(transition[OUTPUT][STATE], state, - transition[OUTPUT][NEW_STACK]) - - def _generate_all_rules(self, s_from: State, s_to: State, - ss_by: List[StackSymbol]) \ - -> Iterable[Iterable[Variable]]: + transition[OUTPUT][NEW_STACK], + variable_converter) + + def _generate_all_rules(self, + s_from: PDAState, + s_to: PDAState, + ss_by: List[StackSymbol], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: """ Generates the rules in the CFG conversion """ if not ss_by: return [[]] if len(ss_by) == 1: - return self._generate_length_one_rules(s_from, s_to, ss_by) + return self._generate_length_one_rules( + s_from, s_to, ss_by, variable_converter) res = [] - is_valid_and_get = self._cfg_variable_converter.is_valid_and_get + is_valid_and_get = variable_converter.is_valid_and_get append_to_res = res.append length_ss_by_minus_one = len(ss_by) - 1 for states in product(self._states, repeat=length_ss_by_minus_one): @@ -419,26 +453,40 @@ def _generate_all_rules(self, s_from: State, s_to: State, append_to_res(temp) return res - def _generate_length_one_rules(self, s_from, s_to, ss_by): - state = self._cfg_variable_converter.is_valid_and_get(s_from, ss_by[0], + def _generate_length_one_rules(self, + s_from: PDAState, + s_to: PDAState, + ss_by: List[StackSymbol], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: + state = variable_converter.is_valid_and_get(s_from, ss_by[0], s_to) if state is not None: return [[state]] return [] - def _get_head_from_state_and_transition(self, state, transition): - return self._cfg_variable_converter.to_cfg_combined_variable( + def _get_head_from_state_and_transition( + self, + state: PDAState, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> Variable: + return variable_converter.to_cfg_combined_variable( transition[INPUT][STATE], transition[INPUT][STACK_FROM], state) - def _initialize_production_from_start_in_to_cfg(self, start): + def _initialize_production_from_start_in_to_cfg( + self, + start: Variable, + variable_converter: CFGVariableConverter) \ + -> List[Production]: productions = [] for state in self._states: productions.append( Production( start, - [self._cfg_variable_converter.to_cfg_combined_variable( + [variable_converter.to_cfg_combined_variable( self._start_state, self._start_stack_symbol, state)])) @@ -545,7 +593,8 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": """ return self.intersection(other) - def to_dict(self): + def to_dict(self) -> Dict[Tuple[PDAState, PDASymbol, StackSymbol], + Set[Tuple[PDAState, List[StackSymbol]]]]: """ Get the transitions of the PDA as a dictionary Returns @@ -592,7 +641,7 @@ def to_networkx(self) -> MultiDiGraph: return graph @classmethod - def from_networkx(cls, graph): + def from_networkx(cls, graph: MultiDiGraph) -> "PDA": """ Import a networkx graph into a PDA. \ The imported graph requires to have the good format, i.e. to come \ @@ -640,7 +689,7 @@ def from_networkx(cls, graph): loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"])) return pda - def write_as_dot(self, filename): + def write_as_dot(self, filename: str) -> None: """ Write the PDA in dot format into a file @@ -652,19 +701,22 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) - @staticmethod - def __add_start_state_to_graph(graph: nx.MultiDiGraph, state: State) -> None: + def __add_start_state_to_graph(self, + graph: MultiDiGraph, + state: PDAState) -> None: """ Adds a starting node to a given graph """ graph.add_node("starting_" + str(state.value), - label="", - shape=None, - height=.0, - width=.0) + label="", + shape=None, + height=.0, + width=.0) graph.add_edge("starting_" + str(state.value), - state.value) + state.value) -def _prepend_input_symbol_to_the_bodies(bodies, transition): +def _prepend_input_symbol_to_the_bodies(bodies: List[List[CFGObject]], + transition: Tuple[Tuple, Tuple]) \ + -> None: to_prepend = Terminal(transition[INPUT][INPUT_SYMBOL].value) for body in bodies: body.insert(0, to_prepend) @@ -673,7 +725,9 @@ def _prepend_input_symbol_to_the_bodies(bodies, transition): class _PDAStateConverter: # pylint: disable=too-few-public-methods - def __init__(self, states_pda, states_dfa): + def __init__(self, + states_pda: Set[PDAState], + states_dfa: Set[FAState]) -> None: self._inverse_state_pda = {} for i, state in enumerate(states_pda): self._inverse_state_pda[state] = i @@ -683,17 +737,21 @@ def __init__(self, states_pda, states_dfa): self._conversions = empty((len(states_pda), len(states_dfa)), dtype=object) - def to_pda_combined_state(self, state_pda, state_other): + def to_pda_combined_state(self, + state_pda: PDAState, + state_other: FAState) -> PDAState: """ To PDA state in the intersection function """ i_state_pda = self._inverse_state_pda[state_pda] i_state_other = self._inverse_state_dfa[state_other] if self._conversions[i_state_pda, i_state_other] is None: - self._conversions[i_state_pda, i_state_other] = State( + self._conversions[i_state_pda, i_state_other] = PDAState( (state_pda, state_other)) return self._conversions[i_state_pda, i_state_other] -def get_next_free(prefix, type_generating, to_check): +def get_next_free(prefix: str, + type_generating: Type, + to_check: Iterable[Any]) -> Any: """ Get free next state or symbol """ idx = 0 new_var = type_generating(prefix) From b7947e29696c4f475bc64ab909c2df7d79203087 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 1 Dec 2024 22:16:45 +0300 Subject: [PATCH 06/30] use Hashable as input type, correct some annotations, correct pda iteration, add missing pda methods --- pyformlang/cfg/__init__.py | 6 +- pyformlang/cfg/cfg.py | 181 ++++++++--------- pyformlang/cfg/cfg_object.py | 21 +- pyformlang/cfg/cyk_table.py | 21 +- pyformlang/cfg/epsilon.py | 8 + pyformlang/cfg/llone_parser.py | 21 +- pyformlang/cfg/parse_tree.py | 11 +- pyformlang/cfg/pda_object_creator.py | 7 +- pyformlang/cfg/production.py | 14 +- pyformlang/cfg/recursive_decent_parser.py | 18 +- pyformlang/cfg/terminal.py | 12 -- pyformlang/cfg/utils.py | 6 +- pyformlang/cfg/utils_cfg.py | 4 +- pyformlang/cfg/variable.py | 21 +- pyformlang/pda/cfg_variable_converter.py | 1 + pyformlang/pda/epsilon.py | 8 + pyformlang/pda/pda.py | 231 ++++++++++++---------- pyformlang/pda/pda_object.py | 20 +- pyformlang/pda/stack_symbol.py | 25 +-- pyformlang/pda/state.py | 25 +-- pyformlang/pda/symbol.py | 21 +- pyformlang/pda/transition_function.py | 60 +++--- pyformlang/pda/utils.py | 17 +- 23 files changed, 376 insertions(+), 383 deletions(-) diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 0fd4888..826c411 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -20,14 +20,16 @@ """ +from .cfg_object import CFGObject from .variable import Variable from .terminal import Terminal +from .epsilon import Epsilon from .production import Production from .cfg import CFG -from .epsilon import Epsilon from .llone_parser import LLOneParser -__all__ = ["Variable", +__all__ = ["CFGObject", + "Variable", "Terminal", "Production", "CFG", diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 731fdab..19d1d3d 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -2,27 +2,25 @@ from string import ascii_uppercase from copy import deepcopy -from typing import Dict, List, Iterable, Set, \ - AbstractSet, Tuple, Optional, Any +from typing import Dict, List, Set, AbstractSet, \ + Iterable, Tuple, Optional, Hashable from networkx import DiGraph, find_cycle from networkx.exception import NetworkXNoCycle -from networkx import DiGraph, find_cycle -from networkx.exception import NetworkXNoCycle from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State as FAState from pyformlang.pda import PDA, State as PDAState, Epsilon as PDAEpsilon from pyformlang.pda.cfg_variable_converter import CFGVariableConverter +from .cyk_table import CYKTable, ParseTree, DerivationDoesNotExist +from .pda_object_creator import PDAObjectCreator from .cfg_object import CFGObject -from .cyk_table import CYKTable, CYKNode, DerivationDoesNotExist +from .variable import Variable +from .terminal import Terminal from .epsilon import Epsilon as CFGEpsilon -from .pda_object_creator import PDAObjectCreator from .production import Production -from .terminal import Terminal from .utils import to_variable, to_terminal from .utils_cfg import remove_nullable_production, get_productions_d -from .variable import Variable EPSILON_SYMBOLS = ["epsilon", "$", "ε", "ϵ", "Є"] @@ -58,9 +56,9 @@ class CFG: # pylint: disable=too-many-instance-attributes def __init__(self, - variables: AbstractSet[Any] = None, - terminals: AbstractSet[Any] = None, - start_symbol: Any = None, + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, productions: Iterable[Production] = None) -> None: if variables is not None: variables = {to_variable(x) for x in variables} @@ -84,6 +82,50 @@ def __init__(self, self._remaining_lists: Dict[CFGObject, List[int]] = {} self._added_impacts: Set[CFGObject] = set() + @property + def variables(self) -> Set[Variable]: + """ Gives the variables + + Returns + ---------- + variables : set of :class:`~pyformlang.cfg.Variable` + The variables of the CFG + """ + return self._variables + + @property + def terminals(self) -> Set[Terminal]: + """ Gives the terminals + + Returns + ---------- + terminals : set of :class:`~pyformlang.cfg.Terminal` + The terminals of the CFG + """ + return self._terminals + + @property + def productions(self) -> Set[Production]: + """ Gives the productions + + Returns + ---------- + productions : set of :class:`~pyformlang.cfg.Production` + The productions of the CFG + """ + return self._productions + + @property + def start_symbol(self) -> Optional[Variable]: + """ Gives the start symbol + + Returns + ---------- + start_variable : :class:`~pyformlang.cfg.Variable` + The start symbol of the CFG + """ + return self._start_symbol + def __initialize_production_in_cfg(self, production: Production) -> None: self._variables.add(production.head) for cfg_object in production.body: @@ -203,8 +245,10 @@ def get_reachable_symbols(self) -> Set[CFGObject]: reachable_symbols : set of :class:`~pyformlang.cfg.CFGObject` The reachable symbols of the CFG """ - r_symbols = set() - r_symbols.add(self._start_symbol) + if not self.start_symbol: + return set() + r_symbols: Set[CFGObject] = set() + r_symbols.add(self.start_symbol) reachable_transition_d = {} for production in self._productions: temp = reachable_transition_d.setdefault(production.head, []) @@ -434,50 +478,6 @@ def to_normal_form(self) -> "CFG": self._normal_form = cfg return cfg - @property - def variables(self) -> Set[Variable]: - """ Gives the variables - - Returns - ---------- - variables : set of :class:`~pyformlang.cfg.Variable` - The variables of the CFG - """ - return self._variables - - @property - def terminals(self) -> Set[Terminal]: - """ Gives the terminals - - Returns - ---------- - terminals : set of :class:`~pyformlang.cfg.Terminal` - The terminals of the CFG - """ - return self._terminals - - @property - def productions(self) -> Set[Production]: - """ Gives the productions - - Returns - ---------- - productions : set of :class:`~pyformlang.cfg.Production` - The productions of the CFG - """ - return self._productions - - @property - def start_symbol(self) -> Optional[Variable]: - """ Gives the start symbol - - Returns - ---------- - start_variable : :class:`~pyformlang.cfg.Variable` - The start symbol of the CFG - """ - return self._start_symbol - def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": """ Substitutes CFG to terminals in the current CFG @@ -496,7 +496,7 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": new_variables_d = {} new_vars = set() for variable in self._variables: - temp = Variable(variable.value + SUBS_SUFFIX + str(idx)) + temp = Variable(str(variable) + SUBS_SUFFIX + str(idx)) new_variables_d[variable] = temp new_vars.add(temp) idx += 1 @@ -506,18 +506,18 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": for ter, cfg in substitution.items(): new_variables_d_local = {} for variable in cfg.variables: - temp = Variable(variable.value + SUBS_SUFFIX + str(idx)) + temp = Variable(str(variable) + SUBS_SUFFIX + str(idx)) new_variables_d_local[variable] = temp new_vars.add(temp) idx += 1 # Add rules of the new cfg for production in cfg.productions: body = [] - for cfgobj in production.body: - if cfgobj in new_variables_d_local: - body.append(new_variables_d_local[cfgobj]) + for cfg_obj in production.body: + if cfg_obj in new_variables_d_local: + body.append(new_variables_d_local[cfg_obj]) else: - body.append(cfgobj) + body.append(cfg_obj) productions.append( Production(new_variables_d_local[production.head], body)) @@ -525,13 +525,13 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": terminals = terminals.union(cfg.terminals) for production in self._productions: body = [] - for cfgobj in production.body: - if cfgobj in new_variables_d: - body.append(new_variables_d[cfgobj]) - elif cfgobj in final_replacement: - body.append(final_replacement[cfgobj]) + for cfg_obj in production.body: + if cfg_obj in new_variables_d: + body.append(new_variables_d[cfg_obj]) + elif cfg_obj in final_replacement: + body.append(final_replacement[cfg_obj]) else: - body.append(cfgobj) + body.append(cfg_obj) productions.append(Production(new_variables_d[production.head], body)) return CFG(new_vars, None, new_variables_d[self._start_symbol], @@ -709,10 +709,7 @@ def is_empty(self) -> bool: def __bool__(self) -> bool: return not self.is_empty() - def __contains__(self, word: Iterable[Any]) -> bool: - return self.contains(word) - - def contains(self, word: Iterable[Any]) -> bool: + def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar Parameters @@ -732,7 +729,10 @@ def contains(self, word: Iterable[Any]) -> bool: cyk_table = CYKTable(self, word) return cyk_table.generate_word() - def get_cnf_parse_tree(self, word: Iterable[Any]) -> CYKNode: + def __contains__(self, word: Iterable[Hashable]) -> bool: + return self.contains(word) + + def get_cnf_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Get a parse tree of the CNF of this grammar @@ -820,8 +820,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": generate_empty = self.contains([]) and other.accepts([]) cfg = self.to_normal_form() states = set(other.states) - cv_converter = \ - CFGVariableConverter(states, cfg.variables) + cv_converter = CFGVariableConverter(states, cfg.variables) new_productions = [] for production in cfg.productions: if len(production.body) == 2: @@ -831,8 +830,8 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": new_productions += self._intersection_when_terminal( other, production, - cv_converter, - states) + states, + cv_converter) new_productions += self._intersection_starting_rules(cfg, other, cv_converter) @@ -861,15 +860,18 @@ def _intersection_starting_rules(cfg: "CFG", return productions_temp @staticmethod - def _intersection_when_terminal(other_fst, production, - cv_converter, states): + def _intersection_when_terminal( + other: DeterministicFiniteAutomaton, + production: Production, + states: Iterable[FAState], + cv_converter: CFGVariableConverter) \ + -> List[Production]: productions_temp = [] for state_p in states: - next_states = other_fst(state_p, production.body[0].value) - if next_states: - new_head = \ - cv_converter.to_cfg_combined_variable( - state_p, production.head, next_states[0]) + next_state = other.get_next_state(state_p, production.body[0].value) + if next_state: + new_head = cv_converter.to_cfg_combined_variable( + state_p, production.head, next_state) productions_temp.append( Production(new_head, [production.body[0]], @@ -888,9 +890,8 @@ def _intersection_when_two_non_terminals( bodies = CFG._get_all_bodies(production, state_p, state_r, states, cv_converter) - new_head = \ - cv_converter.to_cfg_combined_variable( - state_p, production.head, state_r) + new_head = cv_converter.to_cfg_combined_variable( + state_p, production.head, state_r) productions_temp += [Production(new_head, body, filtering=False) @@ -915,7 +916,7 @@ def _get_all_bodies(production: Production, state_r)] for state_q in states] - def __and__(self, other): + def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object Parameters @@ -1038,7 +1039,7 @@ def to_text(self) -> str: @classmethod def from_text(cls, text: str, - start_symbol: Optional[Variable] = Variable("S")) -> "CFG": + start_symbol: Optional[Hashable] = Variable("S")) -> "CFG": """ Read a context free grammar from a text. The text contains one rule per line. diff --git a/pyformlang/cfg/cfg_object.py b/pyformlang/cfg/cfg_object.py index 3d0d17c..99fcc90 100644 --- a/pyformlang/cfg/cfg_object.py +++ b/pyformlang/cfg/cfg_object.py @@ -1,6 +1,7 @@ """ An object in a CFG (Variable and Terminal)""" -from typing import Any +from typing import Hashable, Any +from abc import abstractmethod class CFGObject: # pylint: disable=too-few-public-methods @@ -14,15 +15,29 @@ class CFGObject: # pylint: disable=too-few-public-methods __slots__ = ["_value", "_hash"] - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: self._value = value self._hash = None @property - def value(self) -> Any: + def value(self) -> Hashable: """Gets the value of the object""" return self._value + def __eq__(self, other: Any) -> bool: + if isinstance(other, CFGObject): + return self.value == other.value + return self.value == other + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + def __str__(self) -> str: + return str(self._value) + + @abstractmethod def to_text(self) -> str: """ Turns the object into a text format """ raise NotImplementedError diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index c373a7b..dabbd85 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -4,9 +4,10 @@ from typing import Dict, List, Set, Iterable, Tuple, Any -from pyformlang.cfg import CFG, Terminal -from pyformlang.cfg.cfg_object import CFGObject -from pyformlang.cfg.parse_tree import ParseTree +from .cfg import CFG, Terminal +from .cfg_object import CFGObject +from .epsilon import Epsilon +from .parse_tree import ParseTree class CYKTable: @@ -95,7 +96,7 @@ def _generates_all_terminals(self) -> bool: generate_all_terminals = False return generate_all_terminals - def get_parse_tree(self) -> "CYKNode": + def get_parse_tree(self) -> ParseTree: """ Give the parse tree associated with this CYK Table @@ -103,10 +104,10 @@ def get_parse_tree(self) -> "CYKNode": ------- parse_tree : :class:`~pyformlang.cfg.ParseTree` """ - if self._word and not self.generate_word(): - raise DerivationDoesNotExist if not self._word: - return CYKNode(self._cnf.start_symbol) + return CYKNode(self._cnf.start_symbol or Epsilon()) + if not self.generate_word(): + raise DerivationDoesNotExist root = [ x for x in self._cyk_table[(0, len(self._word))] @@ -118,9 +119,9 @@ class CYKNode(ParseTree): """A node in the CYK table""" def __init__(self, - value: Any, + value: CFGObject, left_son: "CYKNode" = None, - right_son: "CYKNode" = None): + right_son: "CYKNode" = None) -> None: super().__init__(value) self.value = value self.left_son = left_son @@ -130,7 +131,7 @@ def __init__(self, if right_son is not None: self.sons.append(right_son) - def __eq__(self, other: Any): + def __eq__(self, other: Any) -> bool: if isinstance(other, CYKNode): return self.value == other.value return self.value == other diff --git a/pyformlang/cfg/epsilon.py b/pyformlang/cfg/epsilon.py index 1f6047b..b70e7fc 100644 --- a/pyformlang/cfg/epsilon.py +++ b/pyformlang/cfg/epsilon.py @@ -1,5 +1,7 @@ """ An epsilon terminal """ +from typing import Any + from .terminal import Terminal @@ -10,5 +12,11 @@ class Epsilon(Terminal): def __init__(self) -> None: super().__init__("epsilon") + def __eq__(self, other: Any) -> bool: + return isinstance(other, Epsilon) + + def __hash__(self) -> int: + return super().__hash__() + def to_text(self) -> str: return "epsilon" diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index 9aa0dfd..8d258b8 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -1,15 +1,14 @@ """ LL(1) Parser """ -from typing import Dict, List, Set, Iterable, Tuple, Any +from typing import Dict, List, Set, Iterable, Tuple, Hashable -from pyformlang.cfg import CFG, Production -from pyformlang.cfg.cfg_object import CFGObject -from pyformlang.cfg.epsilon import Epsilon -from pyformlang.cfg.cfg import NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.set_queue import SetQueue -from pyformlang.cfg.utils import to_terminal -from pyformlang.cfg.utils_cfg import get_productions_d +from .cfg import CFG, Production, NotParsableException +from .cfg_object import CFGObject +from .epsilon import Epsilon +from .parse_tree import ParseTree +from .set_queue import SetQueue +from .utils import to_terminal +from .utils_cfg import get_productions_d class LLOneParser: @@ -203,7 +202,7 @@ def is_llone_parsable(self) -> bool: return False return True - def get_llone_parse_tree(self, word: Iterable[Any]) -> ParseTree: + def get_llone_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Get LL(1) parse Tree @@ -227,7 +226,7 @@ def get_llone_parse_tree(self, word: Iterable[Any]) -> ParseTree: word.append("$") # type: ignore word = word[::-1] parsing_table = self.get_llone_parsing_table() - parse_tree = ParseTree(self._cfg.start_symbol) + parse_tree = ParseTree(self._cfg.start_symbol or Epsilon()) stack = ["$", parse_tree] while stack: current = stack.pop() diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index 3228e32..cf74554 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -1,24 +1,25 @@ """ A parse Tree """ -from typing import List, Any +from typing import List from networkx import DiGraph from networkx.drawing.nx_pydot import write_dot -from pyformlang.cfg.variable import Variable +from .cfg_object import CFGObject +from .variable import Variable class ParseTree: """ A parse tree """ - def __init__(self, value: Any) -> None: + def __init__(self, value: CFGObject) -> None: self.value = value self.sons: List[ParseTree] = [] def __repr__(self) -> str: return "ParseTree(" + str(self.value) + ", " + str(self.sons) + ")" - def get_leftmost_derivation(self) -> List[List[Any]]: + def get_leftmost_derivation(self) -> List[List[CFGObject]]: """ Get the leftmost derivation @@ -48,7 +49,7 @@ def get_leftmost_derivation(self) -> List[List[Any]]: start.append(son.value) return res - def get_rightmost_derivation(self) -> List[List[Any]]: + def get_rightmost_derivation(self) -> List[List[CFGObject]]: """ Get the leftmost derivation diff --git a/pyformlang/cfg/pda_object_creator.py b/pyformlang/cfg/pda_object_creator.py index 97093c8..0d32ce0 100644 --- a/pyformlang/cfg/pda_object_creator.py +++ b/pyformlang/cfg/pda_object_creator.py @@ -2,10 +2,13 @@ from typing import Dict, Iterable, Optional -from pyformlang.cfg import Epsilon as CFGEpsilon, Terminal, Variable -from pyformlang.cfg.cfg_object import CFGObject from pyformlang.pda import Epsilon as PDAEpsilon, Symbol, StackSymbol +from .cfg_object import CFGObject +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon as CFGEpsilon + class PDAObjectCreator: """Creates Objects for a PDA""" diff --git a/pyformlang/cfg/production.py b/pyformlang/cfg/production.py index 40c2968..0e79922 100644 --- a/pyformlang/cfg/production.py +++ b/pyformlang/cfg/production.py @@ -2,9 +2,9 @@ from typing import List, Any -from .terminal import Terminal -from .variable import Variable from .cfg_object import CFGObject +from .variable import Variable +from .terminal import Terminal from .epsilon import Epsilon @@ -45,16 +45,16 @@ def body(self) -> List[CFGObject]: def __repr__(self) -> str: return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) - def __hash__(self) -> int: - if self._hash is None: - self._hash = sum(map(hash, self._body)) + hash(self._head) - return self._hash - def __eq__(self, other: Any) -> bool: if isinstance(other, Production): return self.head == other.head and self.body == other.body return False + def __hash__(self) -> int: + if self._hash is None: + self._hash = sum(map(hash, self._body)) + hash(self._head) + return self._hash + def is_normal_form(self) -> bool: """ Tells is the production is in Chomsky Normal Form diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index befa8a9..1974b61 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -2,12 +2,14 @@ A recursive decent parser. """ -from typing import List, Iterable, Tuple, Optional, Any +from typing import List, Iterable, Tuple, Optional, Hashable, Any -from pyformlang.cfg import CFG, Terminal, Variable, Epsilon -from pyformlang.cfg.cfg import NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.utils import to_terminal +from .cfg import CFG, NotParsableException +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon +from .parse_tree import ParseTree +from .utils import to_terminal def _get_index_to_extend(current_expansion: List[Any], left: bool) \ @@ -35,7 +37,7 @@ class RecursiveDecentParser: def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_parse_tree(self, word: Iterable[Any], left: bool = True) \ + def get_parse_tree(self, word: Iterable[Hashable], left: bool = True) \ -> ParseTree: """ Get a parse tree for a given word @@ -60,7 +62,7 @@ def get_parse_tree(self, word: Iterable[Any], left: bool = True) \ """ word = [to_terminal(x) for x in word if x != Epsilon()] - parse_tree = ParseTree(self._cfg.start_symbol) + parse_tree = ParseTree(self._cfg.start_symbol or Epsilon()) starting_expansion = [(self._cfg.start_symbol, parse_tree)] if self._get_parse_tree_sub(word, starting_expansion, left): return parse_tree @@ -110,7 +112,7 @@ def _get_parse_tree_sub(self, return True return False - def is_parsable(self, word: Iterable[Any], left: bool = True) -> bool: + def is_parsable(self, word: Iterable[Hashable], left: bool = True) -> bool: """ Whether a word is parsable or not diff --git a/pyformlang/cfg/terminal.py b/pyformlang/cfg/terminal.py index 135b230..7bc36b7 100644 --- a/pyformlang/cfg/terminal.py +++ b/pyformlang/cfg/terminal.py @@ -1,26 +1,14 @@ """ A terminal in a CFG """ -from typing import Any - from .cfg_object import CFGObject class Terminal(CFGObject): # pylint: disable=too-few-public-methods """ A terminal in a CFG """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, CFGObject): - return self.value == other.value - return self.value == other - def __repr__(self) -> str: return "Terminal(" + str(self.value) + ")" - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self.value) - return self._hash - def to_text(self) -> str: text = str(self._value) if text and text[0].isupper(): diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index 0fd3196..c49da5c 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -1,19 +1,19 @@ """ Useful functions """ -from typing import Any +from typing import Hashable from .variable import Variable from .terminal import Terminal -def to_variable(given: Any) -> Variable: +def to_variable(given: Hashable) -> Variable: """ Transformation into a variable """ if isinstance(given, Variable): return given return Variable(given) -def to_terminal(given: Any) -> Terminal: +def to_terminal(given: Hashable) -> Terminal: """ Transformation into a terminal """ if isinstance(given, Terminal): return given diff --git a/pyformlang/cfg/utils_cfg.py b/pyformlang/cfg/utils_cfg.py index 567df64..c0902d2 100644 --- a/pyformlang/cfg/utils_cfg.py +++ b/pyformlang/cfg/utils_cfg.py @@ -2,10 +2,10 @@ from typing import Dict, List, Iterable, AbstractSet -from .production import Production -from .epsilon import Epsilon from .cfg_object import CFGObject from .variable import Variable +from .epsilon import Epsilon +from .production import Production def remove_nullable_production_sub(body: List[CFGObject], diff --git a/pyformlang/cfg/variable.py b/pyformlang/cfg/variable.py index b687dbc..7c60de5 100644 --- a/pyformlang/cfg/variable.py +++ b/pyformlang/cfg/variable.py @@ -1,7 +1,7 @@ """ A variable in a CFG """ import string -from typing import Optional, Any +from typing import Optional, Hashable from .cfg_object import CFGObject @@ -15,30 +15,13 @@ class Variable(CFGObject): # pylint: disable=too-few-public-methods The value of the variable """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: super().__init__(value) - self._hash = None self.index_cfg_converter: Optional[int] = None - def __eq__(self, other: Any) -> bool: - if isinstance(other, CFGObject): - return self._value == other.value - return self._value == other - - def __str__(self) -> str: - return str(self.value) - def __repr__(self) -> str: return "Variable(" + str(self.value) + ")" - def __hash__(self) -> int: - if self._hash is None: - self._hash = self._compute_new_hash() - return self._hash - - def _compute_new_hash(self) -> int: - return hash(self._value) - def to_text(self) -> str: text = str(self._value) if text and text[0] not in string.ascii_uppercase: diff --git a/pyformlang/pda/cfg_variable_converter.py b/pyformlang/pda/cfg_variable_converter.py index f2dd2f3..22bf1a7 100644 --- a/pyformlang/pda/cfg_variable_converter.py +++ b/pyformlang/pda/cfg_variable_converter.py @@ -4,6 +4,7 @@ from pyformlang.cfg import Variable + class CFGVariableConverter: """A CFG Variable Converter""" diff --git a/pyformlang/pda/epsilon.py b/pyformlang/pda/epsilon.py index 00956a5..b4a6ca7 100644 --- a/pyformlang/pda/epsilon.py +++ b/pyformlang/pda/epsilon.py @@ -1,5 +1,7 @@ """ An epsilon symbol """ +from typing import Any + from .stack_symbol import StackSymbol @@ -9,3 +11,9 @@ class Epsilon(StackSymbol): def __init__(self) -> None: super().__init__("epsilon") + + def __eq__(self, other: Any) -> bool: + return isinstance(other, Epsilon) + + def __hash__(self) -> int: + return super().__hash__() diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 1f25130..28c9462 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -1,21 +1,17 @@ """ We represent here a push-down automaton """ from typing import Dict, List, Set, AbstractSet, \ - Iterable, Tuple, Type, Optional, Any + Iterator, Iterable, Tuple, Type, Optional, Hashable, Any from json import dumps, loads from itertools import product from numpy import empty from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot -from pyformlang import cfg -from pyformlang import finite_automaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.pda.cfg_variable_converter import CFGVariableConverter -from pyformlang.finite_automaton import FiniteAutomaton -from pyformlang.finite_automaton import State as FAState, \ - Symbol as FASymbol, Epsilon as FAEpsilon -from pyformlang.regular_expression import Regex +from pyformlang.finite_automaton import State as FAState +from pyformlang.finite_automaton import Symbol as FASymbol +from pyformlang.finite_automaton import Epsilon as FAEpsilon from pyformlang.cfg import CFG, Variable, Terminal, Production from pyformlang.cfg.cfg_object import CFGObject @@ -24,6 +20,8 @@ from .stack_symbol import StackSymbol from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction +from .transition_function import TransitionKey, TransitionValues, Transition +from .cfg_variable_converter import CFGVariableConverter from .utils import PDAObjectCreator INPUT_SYMBOL = 1 @@ -38,8 +36,11 @@ OUTPUT = 1 +InputTransition = Tuple[Hashable, Hashable, Hashable, + Hashable, Iterable[Hashable]] -class PDA: + +class PDA(Iterable[Transition]): """ Representation of a pushdown automaton Parameters @@ -65,13 +66,13 @@ class PDA: # pylint: disable=too-many-instance-attributes def __init__(self, - states: AbstractSet[Any] = None, - input_symbols: AbstractSet[Any] = None, - stack_alphabet: AbstractSet[Any] = None, + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, + stack_alphabet: AbstractSet[Hashable] = None, transition_function: TransitionFunction = None, - start_state: Any = None, - start_stack_symbol: Any = None, - final_states: AbstractSet[Any] = None): + start_state: Hashable = None, + start_stack_symbol: Hashable = None, + final_states: AbstractSet[Hashable] = None): # pylint: disable=too-many-arguments self._pda_obj_creator = PDAObjectCreator() if states is not None: @@ -104,47 +105,6 @@ def __init__(self, for state in self._final_states: self._states.add(state) - def set_start_state(self, start_state: Any) -> None: - """ Sets the start state to the automaton - - Parameters - ---------- - start_state : :class:`~pyformlang.pda.State` - The start state - """ - start_state = self._pda_obj_creator.to_state(start_state) - self._states.add(start_state) - self._start_state = start_state - - def set_start_stack_symbol(self, start_stack_symbol: Any) -> None: - """ Sets the start stack symbol to the automaton - - Parameters - ---------- - start_stack_symbol : :class:`~pyformlang.pda.StackSymbol` - The start stack symbol - """ - start_stack_symbol = self._pda_obj_creator.to_stack_symbol( - start_stack_symbol) - self._stack_alphabet.add(start_stack_symbol) - self._start_stack_symbol = start_stack_symbol - - def add_final_state(self, state: Any) -> None: - """ Adds a final state to the automaton - - Parameters - ---------- - state : :class:`~pyformlang.pda.State` - The state to add - """ - state = self._pda_obj_creator.to_state(state) - self._final_states.add(state) - - @property - def start_state(self) -> Optional[PDAState]: - """ Get start state """ - return self._start_state - @property def states(self) -> Set[PDAState]: """ @@ -156,18 +116,6 @@ def states(self) -> Set[PDAState]: """ return self._states - @property - def final_states(self) -> Set[PDAState]: - """ - The final states of the PDA - Returns - ------- - final_states : iterable of :class:`~pyformlang.pda.State` - The final states of the PDA - - """ - return self._final_states - @property def input_symbols(self) -> Set[PDASymbol]: """ @@ -192,38 +140,80 @@ def stack_symbols(self) -> Set[StackSymbol]: """ return self._stack_alphabet - def get_number_transitions(self) -> int: - """ Gets the number of transitions in the PDA + @property + def start_state(self) -> Optional[PDAState]: + """ Get start state """ + return self._start_state + + @property + def start_stack_symbol(self) -> Optional[StackSymbol]: + """ Get start stack symbol """ + return self._start_stack_symbol + @property + def final_states(self) -> Set[PDAState]: + """ + The final states of the PDA Returns + ------- + final_states : iterable of :class:`~pyformlang.pda.State` + The final states of the PDA + + """ + return self._final_states + + def set_start_state(self, start_state: Hashable) -> None: + """ Sets the start state to the automaton + + Parameters ---------- - n_transitions : int - The number of transitions + start_state : :class:`~pyformlang.pda.State` + The start state """ - return self._transition_function.get_number_transitions() + start_state = self._pda_obj_creator.to_state(start_state) + self._states.add(start_state) + self._start_state = start_state - def add_transitions(self, - transitions: - Iterable[Tuple[Any, Any, Any, - Any, List[Any]]]) -> None: + def set_start_stack_symbol(self, start_stack_symbol: Hashable) -> None: + """ Sets the start stack symbol to the automaton + + Parameters + ---------- + start_stack_symbol : :class:`~pyformlang.pda.StackSymbol` + The start stack symbol """ - Adds several transitions + start_stack_symbol = self._pda_obj_creator.to_stack_symbol( + start_stack_symbol) + self._stack_alphabet.add(start_stack_symbol) + self._start_stack_symbol = start_stack_symbol + + def add_final_state(self, state: Hashable) -> None: + """ Adds a final state to the automaton Parameters ---------- - transitions : - Transitions as they would be given to add_transition + state : :class:`~pyformlang.pda.State` + The state to add """ - for s_from, input_symbol, stack_from, s_to, stack_to in transitions: - self.add_transition(s_from, input_symbol, stack_from, - s_to, stack_to) + state = self._pda_obj_creator.to_state(state) + self._final_states.add(state) + + def get_number_transitions(self) -> int: + """ Gets the number of transitions in the PDA + + Returns + ---------- + n_transitions : int + The number of transitions + """ + return self._transition_function.get_number_transitions() def add_transition(self, - s_from: Any, - input_symbol: Any, - stack_from: Any, - s_to: Any, - stack_to: Iterable[Any]): + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable, + s_to: Hashable, + stack_to: Iterable[Hashable]) -> None: """ Add a transition to the PDA Parameters @@ -259,6 +249,43 @@ def add_transition(self, s_to, stack_to) + def add_transitions(self, transitions: Iterable[InputTransition]) -> None: + """ + Adds several transitions + + Parameters + ---------- + transitions : + Transitions as they would be given to add_transition + """ + for s_from, input_symbol, stack_from, s_to, stack_to in transitions: + self.add_transition(s_from, input_symbol, stack_from, + s_to, stack_to) + + def __call__(self, + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable) -> TransitionValues: + """ Calls transition function with given arguments """ + s_from = self._pda_obj_creator.to_state(s_from) + input_symbol = self._pda_obj_creator.to_symbol(input_symbol) + stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) + return self._transition_function(s_from, input_symbol, stack_from) + + def __contains__(self, transition: InputTransition) -> bool: + """ Whether the given transition is present in the PDA """ + s_from, input_symbol, stack_from, s_to, stack_to = transition + s_from = self._pda_obj_creator.to_state(s_from) + input_symbol = self._pda_obj_creator.to_symbol(input_symbol) + stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) + s_to = self._pda_obj_creator.to_state(s_to) + stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to] + return (s_to, stack_to) in self(s_from, input_symbol, stack_from) + + def __iter__(self) -> Iterator[Transition]: + """ Gets an iterator of transitions of the PDA """ + yield from self._transition_function + def to_final_state(self) -> "PDA": """ Turns the current PDA that accepts a language L by empty stack \ to another PDA that accepts the same language L by final state @@ -520,19 +547,18 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - start_state_other = other.start_state - if not start_state_other: + if not self.start_state or not other.start_state: return PDA() pda_state_converter = _PDAStateConverter(self._states, other.states) final_states_other = other.final_states - start = pda_state_converter.to_pda_combined_state(self._start_state, - start_state_other) + start = pda_state_converter.to_pda_combined_state(self.start_state, + other.start_state) pda = PDA(start_state=start, start_stack_symbol=self._start_stack_symbol) symbols = self._input_symbols.copy() symbols.add(PDAEpsilon()) - to_process = [(self._start_state, start_state_other)] - processed = {(self._start_state, start_state_other)} + to_process = [(self.start_state, other.start_state)] + processed = {(self.start_state, other.start_state)} while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in @@ -541,11 +567,11 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": pda_state_converter.to_pda_combined_state(state_in, state_dfa)) for symbol in symbols: - if symbol == Epsilon(): - symbol_dfa = finite_automaton.Epsilon() + if symbol == PDAEpsilon(): + symbol_dfa = FAEpsilon() next_state_dfa = state_dfa else: - symbol_dfa = finite_automaton.Symbol(symbol.value) + symbol_dfa = FASymbol(symbol.value) next_state_dfa = other.get_next_state(state_dfa, symbol_dfa) if not next_state_dfa: continue @@ -593,8 +619,7 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": """ return self.intersection(other) - def to_dict(self) -> Dict[Tuple[PDAState, PDASymbol, StackSymbol], - Set[Tuple[PDAState, List[StackSymbol]]]]: + def to_dict(self) -> Dict[TransitionKey, TransitionValues]: """ Get the transitions of the PDA as a dictionary Returns @@ -735,7 +760,7 @@ def __init__(self, for i, state in enumerate(states_dfa): self._inverse_state_dfa[state] = i self._conversions = empty((len(states_pda), len(states_dfa)), - dtype=object) + dtype=PDAState) def to_pda_combined_state(self, state_pda: PDAState, @@ -744,9 +769,9 @@ def to_pda_combined_state(self, i_state_pda = self._inverse_state_pda[state_pda] i_state_other = self._inverse_state_dfa[state_other] if self._conversions[i_state_pda, i_state_other] is None: - self._conversions[i_state_pda, i_state_other] = PDAState( - (state_pda, state_other)) - return self._conversions[i_state_pda, i_state_other] + self._conversions[i_state_pda, i_state_other] = \ + [PDAState((state_pda, state_other))] + return self._conversions[i_state_pda, i_state_other][0] def get_next_free(prefix: str, diff --git a/pyformlang/pda/pda_object.py b/pyformlang/pda/pda_object.py index 9db9077..1947693 100644 --- a/pyformlang/pda/pda_object.py +++ b/pyformlang/pda/pda_object.py @@ -1,27 +1,31 @@ """ Basic PDA object representation """ -from typing import Any +from abc import abstractmethod + +from typing import Hashable, Any class PDAObject: """ Basic PDA object representation """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: self._value = value self._hash = None - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - @property - def value(self) -> Any: + def value(self) -> Hashable: """ Returns the value of the object """ return self._value + @abstractmethod def __eq__(self, other: Any) -> bool: raise NotImplementedError + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + @abstractmethod def __repr__(self) -> str: raise NotImplementedError diff --git a/pyformlang/pda/stack_symbol.py b/pyformlang/pda/stack_symbol.py index 8c5a4a5..a932c14 100644 --- a/pyformlang/pda/stack_symbol.py +++ b/pyformlang/pda/stack_symbol.py @@ -1,6 +1,6 @@ """ A StackSymbol in a pushdown automaton """ -from typing import Optional, Any +from typing import Optional, Hashable, Any from .symbol import Symbol @@ -14,28 +14,17 @@ class StackSymbol(Symbol): """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: super().__init__(value) self.index_cfg_converter: Optional[int] = None + def __eq__(self, other: Any) -> bool: + if not isinstance(other, StackSymbol): + return False + return self._value == other.value + def __hash__(self) -> int: return super().__hash__() - @property - def value(self) -> Any: - """ Returns the value of the stack symbol - - Returns - ---------- - value: The value - any - """ - return self._value - - def __eq__(self, other: Any) -> bool: - if isinstance(other, StackSymbol): - return self._value == other.value - return False - def __repr__(self) -> str: return "StackSymbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/state.py b/pyformlang/pda/state.py index 316f1e8..52f1bb5 100644 --- a/pyformlang/pda/state.py +++ b/pyformlang/pda/state.py @@ -1,6 +1,6 @@ """ A State in a pushdown automaton """ -from typing import Optional, Any +from typing import Optional, Hashable, Any from .pda_object import PDAObject @@ -15,28 +15,17 @@ class State(PDAObject): """ - def __init__(self, value: Any) -> None: + def __init__(self, value: Hashable) -> None: super().__init__(value) self.index_cfg_converter: Optional[int] = None + def __eq__(self, other: Any) -> bool: + if not isinstance(other, State): + return False + return self._value == other.value + def __hash__(self) -> int: return super().__hash__() - @property - def value(self) -> Any: - """ Returns the value of the symbol - - Returns - ---------- - value: The value - any - """ - return self._value - - def __eq__(self, other: Any) -> bool: - if isinstance(other, State): - return self._value == other.value - return False - def __repr__(self) -> str: return "State(" + str(self._value) + ")" diff --git a/pyformlang/pda/symbol.py b/pyformlang/pda/symbol.py index cb38df5..4616932 100644 --- a/pyformlang/pda/symbol.py +++ b/pyformlang/pda/symbol.py @@ -15,24 +15,13 @@ class Symbol(PDAObject): """ + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Symbol): + return False + return self._value == other.value + def __hash__(self) -> int: return super().__hash__() - @property - def value(self) -> Any: - """ Returns the value of the symbol - - Returns - ---------- - value: The value - any - """ - return self._value - - def __eq__(self, other: Any) -> bool: - if isinstance(other, Symbol): - return self._value == other.value - return False - def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index 70699db..44aa806 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -1,24 +1,26 @@ """ A transition function in a pushdown automaton """ -from typing import Dict, List, Set, Iterator, Tuple, Optional +from copy import deepcopy +from typing import Dict, List, Set, Iterator, Iterable, Tuple, Optional -from .stack_symbol import StackSymbol from .state import State from .symbol import Symbol +from .stack_symbol import StackSymbol +TransitionKey = Tuple[State, Symbol, StackSymbol] +TransitionValue = Tuple[State, List[StackSymbol]] +TransitionValues = Set[TransitionValue] +Transition = Tuple[TransitionKey, TransitionValue] -class TransitionFunction: + +class TransitionFunction(Iterable[Transition]): """ A transition function in a pushdown automaton """ def __init__(self) -> None: - self._transitions: Dict[Tuple[State, Symbol, StackSymbol], - Set[Tuple[State, List[StackSymbol]]]] = {} - self._iter_key: Optional[Iterator[ - Tuple[State, Symbol, StackSymbol]]] = None - self._current_key: Optional[ - Tuple[State, Symbol, StackSymbol]] = None - self._iter_inside: Optional[Iterator[ - Tuple[State, List[StackSymbol]]]] = None + self._transitions: Dict[TransitionKey, TransitionValues] = {} + self._current_key: Optional[TransitionKey] = None + self._iter_key: Optional[Iterator[TransitionKey]] = None + self._iter_inside: Optional[Iterator[TransitionValue]] = None def get_number_transitions(self) -> int: """ Gets the number of transitions @@ -74,35 +76,17 @@ def copy(self) -> "TransitionFunction": temp_out[0], temp_out[1]) return new_tf - def __iter__(self) -> Iterator[Tuple[Tuple[State, Symbol, StackSymbol], - Tuple[State, List[StackSymbol]]]]: - self._iter_key = iter(self._transitions.keys()) - self._current_key = None - self._iter_inside = None - return self - - def __next__(self) -> Tuple[Tuple[State, Symbol, StackSymbol], - Tuple[State, List[StackSymbol]]]: - if self._iter_inside is None: - next_key = next(self._iter_key) # type: ignore - self._current_key = next_key - self._iter_inside = iter(self._transitions[next_key]) - try: - next_value = next(self._iter_inside) - return self._current_key, next_value # type: ignore - except StopIteration: - next_key = next(self._iter_key) # type: ignore - self._current_key = next_key - self._iter_inside = iter(self._transitions[next_key]) - return next(self) + def __iter__(self) -> Iterator[Transition]: + for key, values in self._transitions.items(): + for value in values: + yield key, value - def __call__(self, s_from: State, + def __call__(self, + s_from: State, input_symbol: Symbol, - stack_from: StackSymbol) \ - -> Set[Tuple[State, List[StackSymbol]]]: + stack_from: StackSymbol) -> TransitionValues: return self._transitions.get((s_from, input_symbol, stack_from), set()) - def to_dict(self) -> Dict[Tuple[State, Symbol, StackSymbol], - Set[Tuple[State, List[StackSymbol]]]]: + def to_dict(self) -> Dict[TransitionKey, TransitionValues]: """Get the dictionary representation of the transitions""" - return self._transitions + return deepcopy(self._transitions) diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 447e418..48d72e6 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,7 +1,8 @@ """ Useful functions for a PDA """ -from typing import Type, Dict, Any +from typing import Type, Dict, Hashable, Any +from .pda_object import PDAObject from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol @@ -14,17 +15,17 @@ class PDAObjectCreator: """ def __init__(self) -> None: - self._state_creator: Dict[Any, State] = {} - self._symbol_creator: Dict[Any, Symbol] = {} - self._stack_symbol_creator: Dict[Any, StackSymbol] = {} + self._state_creator: Dict[Hashable, State] = {} + self._symbol_creator: Dict[Hashable, Symbol] = {} + self._stack_symbol_creator: Dict[Hashable, StackSymbol] = {} - def to_state(self, given: Any) -> State: + def to_state(self, given: Hashable) -> State: """ Convert to a state """ if isinstance(given, State): return _get_object_from_known(given, self._state_creator) return _get_object_from_raw(given, self._state_creator, State) - def to_symbol(self, given: Any) -> Symbol: + def to_symbol(self, given: Hashable) -> Symbol: """ Convert to a symbol """ if isinstance(given, Symbol): return _get_object_from_known(given, self._symbol_creator) @@ -32,7 +33,7 @@ def to_symbol(self, given: Any) -> Symbol: return Epsilon() return _get_object_from_raw(given, self._symbol_creator, Symbol) - def to_stack_symbol(self, given: Any) -> StackSymbol: + def to_stack_symbol(self, given: Hashable) -> StackSymbol: """ Convert to a stack symbol """ if isinstance(given, StackSymbol): return _get_object_from_known(given, @@ -44,7 +45,7 @@ def to_stack_symbol(self, given: Any) -> StackSymbol: StackSymbol) -def _get_object_from_known(given: Any, +def _get_object_from_known(given: PDAObject, obj_converter: Dict[Any, Any]) -> Any: if given.value in obj_converter: return obj_converter[given.value] From f3e403441908aff9645932f026fa1f9b9ca2d6de Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 14:28:39 +0300 Subject: [PATCH 07/30] refactor cfg to pda conversion, add grammar abstract class to manage import cycles, correct types in get_words --- pyformlang/cfg/cfg.py | 148 ++++------------ .../{pda => cfg}/cfg_variable_converter.py | 2 +- pyformlang/cfg/cyk_table.py | 16 +- pyformlang/cfg/grammar.py | 72 ++++++++ pyformlang/cfg/pda_object_creator.py | 52 ------ pyformlang/cfg/production.py | 22 ++- pyformlang/pda/pda.py | 164 +++++++++++------- pyformlang/pda/utils.py | 128 ++++++++------ 8 files changed, 305 insertions(+), 299 deletions(-) rename pyformlang/{pda => cfg}/cfg_variable_converter.py (99%) create mode 100644 pyformlang/cfg/grammar.py delete mode 100644 pyformlang/cfg/pda_object_creator.py diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 19d1d3d..06ed3cc 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -8,17 +8,17 @@ from networkx import DiGraph, find_cycle from networkx.exception import NetworkXNoCycle -from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State as FAState -from pyformlang.pda import PDA, State as PDAState, Epsilon as PDAEpsilon -from pyformlang.pda.cfg_variable_converter import CFGVariableConverter +from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State -from .cyk_table import CYKTable, ParseTree, DerivationDoesNotExist -from .pda_object_creator import PDAObjectCreator +from .grammar import Grammar from .cfg_object import CFGObject from .variable import Variable from .terminal import Terminal -from .epsilon import Epsilon as CFGEpsilon +from .epsilon import Epsilon from .production import Production +from .parse_tree import ParseTree +from .cyk_table import CYKTable, DerivationDoesNotExist +from .cfg_variable_converter import CFGVariableConverter from .utils import to_variable, to_terminal from .utils_cfg import remove_nullable_production, get_productions_d @@ -38,7 +38,7 @@ def is_special_text(text: str) -> bool: text[-1] == '"' -class CFG: +class CFG(Grammar): """ A class representing a context free grammar Parameters @@ -60,19 +60,20 @@ def __init__(self, terminals: AbstractSet[Hashable] = None, start_symbol: Hashable = None, productions: Iterable[Production] = None) -> None: + super().__init__() if variables is not None: variables = {to_variable(x) for x in variables} - self._variables: Set[Variable] = variables or set() + self._variables = variables or set() if terminals is not None: terminals = {to_terminal(x) for x in terminals} - self._terminals: Set[Terminal] = terminals or set() + self._terminals = terminals or set() if start_symbol is not None: start_symbol = to_variable(start_symbol) self._variables.add(start_symbol) - self._start_symbol: Optional[Variable] = start_symbol + self._start_symbol = start_symbol if productions is not None: productions = set(productions) - self._productions: Set[Production] = productions or set() + self._productions = productions or set() for production in self._productions: self.__initialize_production_in_cfg(production) self._normal_form: Optional[CFG] = None @@ -82,50 +83,6 @@ def __init__(self, self._remaining_lists: Dict[CFGObject, List[int]] = {} self._added_impacts: Set[CFGObject] = set() - @property - def variables(self) -> Set[Variable]: - """ Gives the variables - - Returns - ---------- - variables : set of :class:`~pyformlang.cfg.Variable` - The variables of the CFG - """ - return self._variables - - @property - def terminals(self) -> Set[Terminal]: - """ Gives the terminals - - Returns - ---------- - terminals : set of :class:`~pyformlang.cfg.Terminal` - The terminals of the CFG - """ - return self._terminals - - @property - def productions(self) -> Set[Production]: - """ Gives the productions - - Returns - ---------- - productions : set of :class:`~pyformlang.cfg.Production` - The productions of the CFG - """ - return self._productions - - @property - def start_symbol(self) -> Optional[Variable]: - """ Gives the start symbol - - Returns - ---------- - start_variable : :class:`~pyformlang.cfg.Variable` - The start symbol of the CFG - """ - return self._start_symbol - def __initialize_production_in_cfg(self, production: Production) -> None: self._variables.add(production.head) for cfg_object in production.body: @@ -149,8 +106,8 @@ def get_generating_symbols(self) -> Set[CFGObject]: def _get_generating_or_nullable(self, nullable: bool = False) \ -> Set[CFGObject]: """ Merge of nullable and generating """ - to_process: List[CFGObject] = [CFGEpsilon()] - g_symbols: Set[CFGObject] = {CFGEpsilon()} + to_process: List[CFGObject] = [Epsilon()] + g_symbols: Set[CFGObject] = {Epsilon()} self._set_impacts_and_remaining_lists() @@ -179,7 +136,7 @@ def _get_generating_or_nullable(self, nullable: bool = False) \ # Fix modifications for symbol_impact, index_impact in processed_with_modification: self._remaining_lists[symbol_impact][index_impact] += 1 - g_symbols.remove(CFGEpsilon()) + g_symbols.remove(Epsilon()) return g_symbols def _set_impacts_and_remaining_lists(self) -> None: @@ -209,8 +166,8 @@ def generate_epsilon(self) -> bool: generate_epsilon : bool Whether epsilon is generated or not by the CFG """ - generate_epsilon: Set[CFGObject] = {CFGEpsilon()} - to_process: List[CFGObject] = [CFGEpsilon()] + generate_epsilon: Set[CFGObject] = {Epsilon()} + to_process: List[CFGObject] = [Epsilon()] self._set_impacts_and_remaining_lists() @@ -253,7 +210,7 @@ def get_reachable_symbols(self) -> Set[CFGObject]: for production in self._productions: temp = reachable_transition_d.setdefault(production.head, []) for symbol in production.body: - if not isinstance(symbol, CFGEpsilon): + if not isinstance(symbol, Epsilon): temp.append(symbol) to_process = [self._start_symbol] while to_process: @@ -723,7 +680,7 @@ def contains(self, word: Iterable[Hashable]) -> bool: Whether word if in the CFG or not """ # Remove epsilons - word = [to_terminal(x) for x in word if x != CFGEpsilon()] + word = [to_terminal(x) for x in word if x != Epsilon()] if not word: return self.generate_epsilon() cyk_table = CYKTable(self, word) @@ -747,52 +704,12 @@ def get_cnf_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: The parse tree """ - word = [to_terminal(x) for x in word if x != CFGEpsilon()] + word = [to_terminal(x) for x in word if x != Epsilon()] if not word and not self.generate_epsilon(): raise DerivationDoesNotExist cyk_table = CYKTable(self, word) return cyk_table.get_parse_tree() - def to_pda(self) -> "PDA": - """ Converts the CFG to a PDA that generates on empty stack an \ - equivalent language - - Returns - ---------- - new_pda : :class:`~pyformlang.pda.PDA` - The equivalent PDA when accepting on empty stack - """ - state = PDAState("q") - pda_object_creator = PDAObjectCreator(self._terminals, self._variables) - input_symbols = {pda_object_creator.get_symbol_from(x) - for x in self._terminals} - stack_alphabet = {pda_object_creator.get_stack_symbol_from(x) - for x in self._terminals.union(self._variables)} - start_stack_symbol = None - if self._start_symbol: - start_stack_symbol = pda_object_creator.get_stack_symbol_from( - self._start_symbol) - new_pda = PDA(states={state}, - input_symbols=input_symbols, - stack_alphabet=stack_alphabet, - start_state=state, - start_stack_symbol=start_stack_symbol) - for production in self._productions: - new_pda.add_transition(state, PDAEpsilon(), - pda_object_creator.get_stack_symbol_from( - production.head), - state, - [pda_object_creator.get_stack_symbol_from(x) - for x in production.body]) - for terminal in self._terminals: - new_pda.add_transition(state, - pda_object_creator.get_symbol_from( - terminal), - pda_object_creator.get_stack_symbol_from( - terminal), - state, []) - return new_pda - def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object @@ -863,7 +780,7 @@ def _intersection_starting_rules(cfg: "CFG", def _intersection_when_terminal( other: DeterministicFiniteAutomaton, production: Production, - states: Iterable[FAState], + states: Iterable[State], cv_converter: CFGVariableConverter) \ -> List[Production]: productions_temp = [] @@ -881,7 +798,7 @@ def _intersection_when_terminal( @staticmethod def _intersection_when_two_non_terminals( production: Production, - states: Iterable[FAState], + states: Iterable[State], cv_converter: CFGVariableConverter) \ -> List[Production]: productions_temp = [] @@ -900,9 +817,9 @@ def _intersection_when_two_non_terminals( @staticmethod def _get_all_bodies(production: Production, - state_p: FAState, - state_r: FAState, - states: Iterable[FAState], + state_p: State, + state_r: State, + states: Iterable[State], cv_converter: CFGVariableConverter) \ -> List[List[CFGObject]]: return [ @@ -934,7 +851,7 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ return self.intersection(other) - def get_words(self, max_length: int = -1) -> Iterable[List[CFGObject]]: + def get_words(self, max_length: int = -1) -> Iterable[List[Terminal]]: """ Get the words generated by the CFG Parameters @@ -949,7 +866,7 @@ def get_words(self, max_length: int = -1) -> Iterable[List[CFGObject]]: return cfg = self.to_normal_form() productions = cfg.productions - gen_d = {} + gen_d: Dict[CFGObject, List[List[List[Terminal]]]] = {} # Look for Epsilon Transitions for production in productions: if production.head not in gen_d: @@ -960,14 +877,15 @@ def get_words(self, max_length: int = -1) -> Iterable[List[CFGObject]]: gen_d[obj] = [[]] # To a single terminal for production in productions: - body = production.body - if len(body) == 1: + if len(production.body) == 1 \ + and len(production.body_terminals) == 1: + terminals = list(production.body_terminals) if len(gen_d[production.head]) == 1: gen_d[production.head].append([]) - if body not in gen_d[production.head][-1]: - gen_d[production.head][-1].append(list(body)) + if terminals not in gen_d[production.head][-1]: + gen_d[production.head][-1].append(terminals) if production.head == cfg.start_symbol: - yield list(body) + yield terminals # Complete what is missing current_length = 2 total_no_modification = 0 diff --git a/pyformlang/pda/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py similarity index 99% rename from pyformlang/pda/cfg_variable_converter.py rename to pyformlang/cfg/cfg_variable_converter.py index 22bf1a7..3f62ac8 100644 --- a/pyformlang/pda/cfg_variable_converter.py +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -2,7 +2,7 @@ from typing import Dict, List, AbstractSet, Tuple, Optional, Any -from pyformlang.cfg import Variable +from .cfg import Variable class CFGVariableConverter: diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index dabbd85..3e6612c 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -4,8 +4,9 @@ from typing import Dict, List, Set, Iterable, Tuple, Any -from .cfg import CFG, Terminal +from .grammar import Grammar from .cfg_object import CFGObject +from .terminal import Terminal from .epsilon import Epsilon from .parse_tree import ParseTree @@ -21,8 +22,8 @@ class CYKTable: The word from which we construct the CYK table """ - def __init__(self, cfg: CFG, word: List[Terminal]) -> None: - self._cnf: CFG = cfg.to_normal_form() + def __init__(self, grammar: Grammar, word: List[Terminal]) -> None: + self._normal_form: Grammar = grammar.to_normal_form() self._word: List[Terminal] = word self._productions_d: Dict[Tuple, List[CFGObject]] = {} self._cyk_table: Dict[Tuple[int, int], Set[CYKNode]] = {} @@ -34,7 +35,7 @@ def __init__(self, cfg: CFG, word: List[Terminal]) -> None: def _set_productions_by_body(self) -> None: # Organize productions - for production in self._cnf.productions: + for production in self._normal_form.productions: temp = tuple(production.body) if temp in self._productions_d: self._productions_d[temp].append(production.head) @@ -87,7 +88,8 @@ def generate_word(self) -> bool: is_generated : bool """ - return self._cnf.start_symbol in self._cyk_table[(0, len(self._word))] + return self._normal_form.start_symbol \ + in self._cyk_table[(0, len(self._word))] def _generates_all_terminals(self) -> bool: generate_all_terminals = True @@ -105,13 +107,13 @@ def get_parse_tree(self) -> ParseTree: parse_tree : :class:`~pyformlang.cfg.ParseTree` """ if not self._word: - return CYKNode(self._cnf.start_symbol or Epsilon()) + return CYKNode(self._normal_form.start_symbol or Epsilon()) if not self.generate_word(): raise DerivationDoesNotExist root = [ x for x in self._cyk_table[(0, len(self._word))] - if x == self._cnf.start_symbol][0] + if x == self._normal_form.start_symbol][0] return root diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py new file mode 100644 index 0000000..c4835ba --- /dev/null +++ b/pyformlang/cfg/grammar.py @@ -0,0 +1,72 @@ +""" Basic grammar representation """ + +from typing import Set, Optional +from abc import abstractmethod + +from .variable import Variable +from .terminal import Terminal +from .production import Production + + +class Grammar: + """ Basic grammar representation """ + + def __init__(self) -> None: + self._variables: Set[Variable] + self._terminals: Set[Terminal] + self._start_symbol: Optional[Variable] + self._productions: Set[Production] + + @property + def variables(self) -> Set[Variable]: + """ Gives the variables + + Returns + ---------- + variables : set of :class:`~pyformlang.cfg.Variable` + The variables of the CFG + """ + return self._variables + + @property + def terminals(self) -> Set[Terminal]: + """ Gives the terminals + + Returns + ---------- + terminals : set of :class:`~pyformlang.cfg.Terminal` + The terminals of the CFG + """ + return self._terminals + + @property + def productions(self) -> Set[Production]: + """ Gives the productions + + Returns + ---------- + productions : set of :class:`~pyformlang.cfg.Production` + The productions of the CFG + """ + return self._productions + + @property + def start_symbol(self) -> Optional[Variable]: + """ Gives the start symbol + + Returns + ---------- + start_variable : :class:`~pyformlang.cfg.Variable` + The start symbol of the CFG + """ + return self._start_symbol + + @abstractmethod + def to_normal_form(self) -> "Grammar": + """ Gets some normal form of the grammar""" + raise NotImplementedError + + @abstractmethod + def is_normal_form(self) -> bool: + """ Whether the grammar is in normal form """ + raise NotImplementedError diff --git a/pyformlang/cfg/pda_object_creator.py b/pyformlang/cfg/pda_object_creator.py deleted file mode 100644 index 0d32ce0..0000000 --- a/pyformlang/cfg/pda_object_creator.py +++ /dev/null @@ -1,52 +0,0 @@ -"""Creation of objects for PDA""" - -from typing import Dict, Iterable, Optional - -from pyformlang.pda import Epsilon as PDAEpsilon, Symbol, StackSymbol - -from .cfg_object import CFGObject -from .variable import Variable -from .terminal import Terminal -from .epsilon import Epsilon as CFGEpsilon - - -class PDAObjectCreator: - """Creates Objects for a PDA""" - - def __init__(self, - terminals: Iterable[Terminal], - variables: Iterable[Variable]) -> None: - self._inverse_symbol: Dict[CFGObject, Optional[Symbol]] = {} - self._inverse_stack_symbol: Dict[CFGObject, Optional[StackSymbol]] = {} - for terminal in terminals: - self._inverse_symbol[terminal] = None - self._inverse_stack_symbol[terminal] = None - for variable in variables: - self._inverse_stack_symbol[variable] = None - - def get_symbol_from(self, symbol: CFGObject) -> Symbol: - """Get a symbol""" - if isinstance(symbol, CFGEpsilon): - return PDAEpsilon() - inverse_symbol = self._inverse_symbol[symbol] - if inverse_symbol is None: - value = str(symbol.value) - temp = Symbol(value) - self._inverse_symbol[symbol] = temp - return temp - return inverse_symbol - - def get_stack_symbol_from(self, stack_symbol: CFGObject) \ - -> StackSymbol: - """Get a stack symbol""" - if isinstance(stack_symbol, CFGEpsilon): - return PDAEpsilon() - inverse_stack_symbol = self._inverse_stack_symbol[stack_symbol] - if inverse_stack_symbol is None: - value = str(stack_symbol.value) - if isinstance(stack_symbol, Terminal): - value = "#TERM#" + value - temp = StackSymbol(value) - self._inverse_stack_symbol[stack_symbol] = temp - return temp - return inverse_stack_symbol diff --git a/pyformlang/cfg/production.py b/pyformlang/cfg/production.py index 0e79922..bb2becf 100644 --- a/pyformlang/cfg/production.py +++ b/pyformlang/cfg/production.py @@ -1,6 +1,6 @@ """ A production or rule of a CFG """ -from typing import List, Any +from typing import List, Set, Any from .cfg_object import CFGObject from .variable import Variable @@ -34,21 +34,31 @@ def __init__(self, @property def head(self) -> Variable: - """Get the head variable""" + """Gets the head variable""" return self._head @property def body(self) -> List[CFGObject]: - """Get the body objects""" + """Gets the body objects""" return self._body + @property + def body_variables(self) -> Set[Variable]: + """Gets variables of body of the production""" + return {object for object in self.body if isinstance(object, Variable)} + + @property + def body_terminals(self) -> Set[Terminal]: + """Gets terminals of body of the production""" + return {object for object in self.body if isinstance(object, Terminal)} + def __repr__(self) -> str: return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) def __eq__(self, other: Any) -> bool: - if isinstance(other, Production): - return self.head == other.head and self.body == other.body - return False + if not isinstance(other, Production): + return False + return self.head == other.head and self.body == other.body def __hash__(self) -> int: if self._hash is None: diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 28c9462..ae17d9a 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -14,6 +14,7 @@ from pyformlang.finite_automaton import Epsilon as FAEpsilon from pyformlang.cfg import CFG, Variable, Terminal, Production from pyformlang.cfg.cfg_object import CFGObject +from pyformlang.cfg.cfg_variable_converter import CFGVariableConverter from .state import State as PDAState from .symbol import Symbol as PDASymbol @@ -21,8 +22,7 @@ from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction from .transition_function import TransitionKey, TransitionValues, Transition -from .cfg_variable_converter import CFGVariableConverter -from .utils import PDAObjectCreator +from .utils import PDAObjectConverter, to_state, to_symbol, to_stack_symbol INPUT_SYMBOL = 1 @@ -74,23 +74,18 @@ def __init__(self, start_stack_symbol: Hashable = None, final_states: AbstractSet[Hashable] = None): # pylint: disable=too-many-arguments - self._pda_obj_creator = PDAObjectCreator() if states is not None: - states = {self._pda_obj_creator.to_state(x) for x in states} + states = {to_state(x) for x in states} if input_symbols is not None: - input_symbols = {self._pda_obj_creator.to_symbol(x) - for x in input_symbols} + input_symbols = {to_symbol(x) for x in input_symbols} if stack_alphabet is not None: - stack_alphabet = {self._pda_obj_creator.to_stack_symbol(x) - for x in stack_alphabet} + stack_alphabet = {to_stack_symbol(x) for x in stack_alphabet} if start_state is not None: - start_state = self._pda_obj_creator.to_state(start_state) + start_state = to_state(start_state) if start_stack_symbol is not None: - start_stack_symbol = \ - self._pda_obj_creator.to_stack_symbol(start_stack_symbol) + start_stack_symbol = to_stack_symbol(start_stack_symbol) if final_states is not None: - final_states = {self._pda_obj_creator.to_state(x) - for x in final_states} + final_states = {to_state(x) for x in final_states} self._states: Set[PDAState] = states or set() self._input_symbols: Set[PDASymbol] = input_symbols or set() self._stack_alphabet: Set[StackSymbol] = stack_alphabet or set() @@ -170,7 +165,7 @@ def set_start_state(self, start_state: Hashable) -> None: start_state : :class:`~pyformlang.pda.State` The start state """ - start_state = self._pda_obj_creator.to_state(start_state) + start_state = to_state(start_state) self._states.add(start_state) self._start_state = start_state @@ -182,8 +177,7 @@ def set_start_stack_symbol(self, start_stack_symbol: Hashable) -> None: start_stack_symbol : :class:`~pyformlang.pda.StackSymbol` The start stack symbol """ - start_stack_symbol = self._pda_obj_creator.to_stack_symbol( - start_stack_symbol) + start_stack_symbol = to_stack_symbol(start_stack_symbol) self._stack_alphabet.add(start_stack_symbol) self._start_stack_symbol = start_stack_symbol @@ -195,7 +189,7 @@ def add_final_state(self, state: Hashable) -> None: state : :class:`~pyformlang.pda.State` The state to add """ - state = self._pda_obj_creator.to_state(state) + state = to_state(state) self._final_states.add(state) def get_number_transitions(self) -> int: @@ -230,11 +224,11 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ # pylint: disable=too-many-arguments - s_from = self._pda_obj_creator.to_state(s_from) - input_symbol = self._pda_obj_creator.to_symbol(input_symbol) - stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) - s_to = self._pda_obj_creator.to_state(s_to) - stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to] + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = [to_stack_symbol(x) for x in stack_to] self._states.add(s_from) self._states.add(s_to) if input_symbol != PDAEpsilon(): @@ -267,19 +261,19 @@ def __call__(self, input_symbol: Hashable, stack_from: Hashable) -> TransitionValues: """ Calls transition function with given arguments """ - s_from = self._pda_obj_creator.to_state(s_from) - input_symbol = self._pda_obj_creator.to_symbol(input_symbol) - stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) return self._transition_function(s_from, input_symbol, stack_from) def __contains__(self, transition: InputTransition) -> bool: """ Whether the given transition is present in the PDA """ s_from, input_symbol, stack_from, s_to, stack_to = transition - s_from = self._pda_obj_creator.to_state(s_from) - input_symbol = self._pda_obj_creator.to_symbol(input_symbol) - stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) - s_to = self._pda_obj_creator.to_state(s_to) - stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to] + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = [to_stack_symbol(x) for x in stack_to] return (s_to, stack_to) in self(s_from, input_symbol, stack_from) def __iter__(self) -> Iterator[Transition]: @@ -301,11 +295,15 @@ def to_final_state(self) -> "PDA": if self._start_stack_symbol is None: raise RuntimeError("start stack symbol should not be None") - new_start = get_next_free("#STARTTOFINAL#", PDAState, self._states) - new_end = get_next_free("#ENDTOFINAL#", PDAState, self._states) - new_stack_symbol = get_next_free("#BOTTOMTOFINAL#", - StackSymbol, - self._stack_alphabet) + new_start = self.__get_next_free("#STARTTOFINAL#", + PDAState, + self._states) + new_end = self.__get_next_free("#ENDTOFINAL#", + PDAState, + self._states) + new_stack_symbol = self.__get_next_free("#BOTTOMTOFINAL#", + StackSymbol, + self._stack_alphabet) new_states = self._states.copy() new_states.add(new_start) new_states.add(new_end) @@ -341,11 +339,15 @@ def to_empty_stack(self) -> "PDA": if self._start_stack_symbol is None: raise RuntimeError("start stack symbol should not be None") - new_start = get_next_free("#STARTEMPTYS#", PDAState, self._states) - new_end = get_next_free("#ENDEMPTYS#", PDAState, self._states) - new_stack_symbol = get_next_free("#BOTTOMEMPTYS#", - StackSymbol, - self._stack_alphabet) + new_start = self.__get_next_free("#STARTEMPTYS#", + PDAState, + self._states) + new_end = self.__get_next_free("#ENDEMPTYS#", + PDAState, + self._states) + new_stack_symbol = self.__get_next_free("#BOTTOMEMPTYS#", + StackSymbol, + self._stack_alphabet) new_states = self._states.copy() new_states.add(new_start) new_states.add(new_end) @@ -427,7 +429,7 @@ def _process_transition_and_state_to_cfg_safe( bodies = self._get_all_bodies_from_state_and_transition( state, transition, variable_converter) if transition[INPUT][INPUT_SYMBOL] != PDAEpsilon(): - _prepend_input_symbol_to_the_bodies(bodies, transition) + self.__prepend_input_symbol_to_the_bodies(bodies, transition) for body in bodies: productions.append(Production(head, body, filtering=False)) @@ -519,6 +521,46 @@ def _initialize_production_from_start_in_to_cfg( state)])) return productions + def from_cfg(self, cfg: CFG) -> "PDA": + """ Converts the CFG to a PDA that generates on empty stack an \ + equivalent language + + Returns + ---------- + new_pda : :class:`~pyformlang.pda.PDA` + The equivalent PDA when accepting on empty stack + """ + state = PDAState("q") + pda_object_converter = PDAObjectConverter(cfg.terminals, cfg.variables) + input_symbols = {pda_object_converter.get_symbol_from(x) + for x in cfg.terminals} + stack_alphabet = {pda_object_converter.get_stack_symbol_from(x) + for x in cfg.terminals.union(cfg.variables)} + start_stack_symbol = None + if cfg.start_symbol: + start_stack_symbol = pda_object_converter.get_stack_symbol_from( + cfg.start_symbol) + new_pda = PDA(states={state}, + input_symbols=input_symbols, + stack_alphabet=stack_alphabet, + start_state=state, + start_stack_symbol=start_stack_symbol) + for production in cfg.productions: + new_pda.add_transition(state, PDAEpsilon(), + pda_object_converter.get_stack_symbol_from( + production.head), + state, + [pda_object_converter.get_stack_symbol_from( + x) for x in production.body]) + for terminal in cfg.terminals: + new_pda.add_transition(state, + pda_object_converter.get_symbol_from( + terminal), + pda_object_converter.get_stack_symbol_from( + terminal), + state, []) + return new_pda + def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the language L generated by the \ current PDA when accepting by final state with something else @@ -726,8 +768,8 @@ def write_as_dot(self, filename: str) -> None: """ write_dot(self.to_networkx(), filename) - def __add_start_state_to_graph(self, - graph: MultiDiGraph, + @staticmethod + def __add_start_state_to_graph(graph: MultiDiGraph, state: PDAState) -> None: """ Adds a starting node to a given graph """ graph.add_node("starting_" + str(state.value), @@ -738,13 +780,25 @@ def __add_start_state_to_graph(self, graph.add_edge("starting_" + str(state.value), state.value) + @staticmethod + def __prepend_input_symbol_to_the_bodies(bodies: List[List[CFGObject]], + transition: Tuple[Tuple, Tuple]) \ + -> None: + to_prepend = Terminal(transition[INPUT][INPUT_SYMBOL].value) + for body in bodies: + body.insert(0, to_prepend) -def _prepend_input_symbol_to_the_bodies(bodies: List[List[CFGObject]], - transition: Tuple[Tuple, Tuple]) \ - -> None: - to_prepend = Terminal(transition[INPUT][INPUT_SYMBOL].value) - for body in bodies: - body.insert(0, to_prepend) + @staticmethod + def __get_next_free(prefix: str, + type_generating: Type, + to_check: Iterable[Any]) -> Any: + """ Get free next state or symbol """ + idx = 0 + new_var = type_generating(prefix) + while new_var in to_check: + new_var = type_generating(prefix + str(idx)) + idx += 1 + return new_var class _PDAStateConverter: @@ -772,15 +826,3 @@ def to_pda_combined_state(self, self._conversions[i_state_pda, i_state_other] = \ [PDAState((state_pda, state_other))] return self._conversions[i_state_pda, i_state_other][0] - - -def get_next_free(prefix: str, - type_generating: Type, - to_check: Iterable[Any]) -> Any: - """ Get free next state or symbol """ - idx = 0 - new_var = type_generating(prefix) - while new_var in to_check: - new_var = type_generating(prefix + str(idx)) - idx += 1 - return new_var diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 48d72e6..3f86d04 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,63 +1,77 @@ """ Useful functions for a PDA """ -from typing import Type, Dict, Hashable, Any +from typing import Dict, Iterable, Optional, Hashable + +from pyformlang.cfg import CFGObject, Variable, Terminal, Epsilon as CFGEpsilon -from .pda_object import PDAObject from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol -from .epsilon import Epsilon - - -class PDAObjectCreator: - """ - A Object in a PDA - """ - - def __init__(self) -> None: - self._state_creator: Dict[Hashable, State] = {} - self._symbol_creator: Dict[Hashable, Symbol] = {} - self._stack_symbol_creator: Dict[Hashable, StackSymbol] = {} - - def to_state(self, given: Hashable) -> State: - """ Convert to a state """ - if isinstance(given, State): - return _get_object_from_known(given, self._state_creator) - return _get_object_from_raw(given, self._state_creator, State) - - def to_symbol(self, given: Hashable) -> Symbol: - """ Convert to a symbol """ - if isinstance(given, Symbol): - return _get_object_from_known(given, self._symbol_creator) - if given == "epsilon": - return Epsilon() - return _get_object_from_raw(given, self._symbol_creator, Symbol) - - def to_stack_symbol(self, given: Hashable) -> StackSymbol: - """ Convert to a stack symbol """ - if isinstance(given, StackSymbol): - return _get_object_from_known(given, - self._stack_symbol_creator) - if isinstance(given, Epsilon): - return given - return _get_object_from_raw(given, - self._stack_symbol_creator, - StackSymbol) - - -def _get_object_from_known(given: PDAObject, - obj_converter: Dict[Any, Any]) -> Any: - if given.value in obj_converter: - return obj_converter[given.value] - obj_converter[given.value] = given - return given - - -def _get_object_from_raw(given: Any, - obj_converter: Dict[Any, Any], - to_type: Type) -> Any: - if given in obj_converter: - return obj_converter[given] - temp = to_type(given) - obj_converter[given] = temp - return temp +from .epsilon import Epsilon as PDAEpsilon + + +class PDAObjectConverter: + """Creates Objects for a PDA""" + + def __init__(self, + terminals: Iterable[Terminal], + variables: Iterable[Variable]) -> None: + self._inverse_symbol: Dict[CFGObject, Optional[Symbol]] = {} + self._inverse_stack_symbol: Dict[CFGObject, Optional[StackSymbol]] = {} + for terminal in terminals: + self._inverse_symbol[terminal] = None + self._inverse_stack_symbol[terminal] = None + for variable in variables: + self._inverse_stack_symbol[variable] = None + + def get_symbol_from(self, symbol: CFGObject) -> Symbol: + """Get a symbol""" + if isinstance(symbol, CFGEpsilon): + return PDAEpsilon() + inverse_symbol = self._inverse_symbol[symbol] + if inverse_symbol is None: + value = str(symbol.value) + temp = Symbol(value) + self._inverse_symbol[symbol] = temp + return temp + return inverse_symbol + + def get_stack_symbol_from(self, stack_symbol: CFGObject) \ + -> StackSymbol: + """Get a stack symbol""" + if isinstance(stack_symbol, CFGEpsilon): + return PDAEpsilon() + inverse_stack_symbol = self._inverse_stack_symbol[stack_symbol] + if inverse_stack_symbol is None: + value = str(stack_symbol.value) + if isinstance(stack_symbol, Terminal): + value = "#TERM#" + value + temp = StackSymbol(value) + self._inverse_stack_symbol[stack_symbol] = temp + return temp + return inverse_stack_symbol + + +def to_state(given: Hashable) -> State: + """ Convert to a state """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Convert to a symbol """ + if isinstance(given, Symbol): + return given + if given == "epsilon": + return PDAEpsilon() + return Symbol(given) + + +def to_stack_symbol(given: Hashable) -> StackSymbol: + """ Convert to a stack symbol """ + if isinstance(given, StackSymbol): + return given + if given == "epsilon": + return PDAEpsilon() + return StackSymbol(given) From 89fd2a4d8a5e3ccd80de04df88ac6553ebd4f00c Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 14:41:53 +0300 Subject: [PATCH 08/30] correct start state checks in pda --- pyformlang/pda/pda.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index ae17d9a..94eeb8d 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -290,11 +290,8 @@ def to_final_state(self) -> "PDA": The new PDA which accepts by final state the language that \ was accepted by empty stack """ - if self._start_state is None: - raise RuntimeError("start state should not be None") - if self._start_stack_symbol is None: - raise RuntimeError("start stack symbol should not be None") - + if self.start_state is None or self.start_stack_symbol is None: + return PDA() new_start = self.__get_next_free("#STARTTOFINAL#", PDAState, self._states) @@ -311,8 +308,8 @@ def to_final_state(self) -> "PDA": new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self._start_state, [self._start_stack_symbol, - new_stack_symbol]) + self.start_state, [self.start_stack_symbol, + new_stack_symbol]) for state in self._states: new_tf.add_transition(state, PDAEpsilon(), new_stack_symbol, new_end, []) @@ -334,11 +331,8 @@ def to_empty_stack(self) -> "PDA": The new PDA which accepts by empty stack the language that was \ accepted by final state """ - if self._start_state is None: - raise RuntimeError("start state should not be None") - if self._start_stack_symbol is None: - raise RuntimeError("start stack symbol should not be None") - + if self.start_state is None or self.start_stack_symbol is None: + return PDA() new_start = self.__get_next_free("#STARTEMPTYS#", PDAState, self._states) @@ -355,8 +349,8 @@ def to_empty_stack(self) -> "PDA": new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self._start_state, [self._start_stack_symbol, - new_stack_symbol]) + self.start_state, [self.start_stack_symbol, + new_stack_symbol]) for state in self._final_states: for stack_symbol in new_stack_alphabet: new_tf.add_transition(state, PDAEpsilon(), stack_symbol, From ff69840c0c3af09d5c73e5bfc21f6f2f627c4e45 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 15:38:06 +0300 Subject: [PATCH 09/30] create object module for better structure --- pyformlang/__init__.py | 13 ++++- pyformlang/cfg/__init__.py | 2 +- pyformlang/cfg/cfg.py | 4 +- pyformlang/cfg/grammar.py | 2 +- pyformlang/cfg/llone_parser.py | 2 +- pyformlang/cfg/utils.py | 52 +++++++++++++----- pyformlang/cfg/utils_cfg.py | 46 ---------------- pyformlang/finite_automaton/utils.py | 30 +---------- pyformlang/objects/__init__.py | 13 +++++ pyformlang/objects/cfg_objects/__init__.py | 14 +++++ .../cfg_objects}/cfg_object.py | 0 .../{cfg => objects/cfg_objects}/epsilon.py | 0 .../cfg_objects}/production.py | 0 .../{cfg => objects/cfg_objects}/terminal.py | 0 pyformlang/objects/cfg_objects/utils.py | 23 ++++++++ .../{cfg => objects/cfg_objects}/variable.py | 4 +- .../finite_automaton_objects/__init__.py | 10 ++++ .../finite_automaton_objects}/epsilon.py | 7 +-- .../finite_automaton_object.py | 16 +++--- .../finite_automaton_objects}/state.py | 7 +-- .../finite_automaton_objects}/symbol.py | 7 +-- .../objects/finite_automaton_objects/utils.py | 35 ++++++++++++ pyformlang/objects/pda_objects/__init__.py | 12 +++++ .../{pda => objects/pda_objects}/epsilon.py | 0 .../pda_objects}/pda_object.py | 0 .../pda_objects}/stack_symbol.py | 0 .../{pda => objects/pda_objects}/state.py | 0 .../{pda => objects/pda_objects}/symbol.py | 0 pyformlang/objects/pda_objects/utils.py | 33 ++++++++++++ pyformlang/objects/regex_objects/__init__.py | 13 +++++ .../regex_objects}/regex_objects.py | 36 +------------ pyformlang/objects/regex_objects/utils.py | 35 ++++++++++++ pyformlang/pda/pda.py | 31 +---------- pyformlang/pda/utils.py | 53 ++++++++++--------- pyformlang/regular_expression/__init__.py | 2 +- pyformlang/regular_expression/python_regex.py | 2 +- pyformlang/regular_expression/regex.py | 2 +- pyformlang/regular_expression/regex_reader.py | 2 +- 38 files changed, 304 insertions(+), 204 deletions(-) delete mode 100644 pyformlang/cfg/utils_cfg.py create mode 100644 pyformlang/objects/__init__.py create mode 100644 pyformlang/objects/cfg_objects/__init__.py rename pyformlang/{cfg => objects/cfg_objects}/cfg_object.py (100%) rename pyformlang/{cfg => objects/cfg_objects}/epsilon.py (100%) rename pyformlang/{cfg => objects/cfg_objects}/production.py (100%) rename pyformlang/{cfg => objects/cfg_objects}/terminal.py (100%) create mode 100644 pyformlang/objects/cfg_objects/utils.py rename pyformlang/{cfg => objects/cfg_objects}/variable.py (87%) create mode 100644 pyformlang/objects/finite_automaton_objects/__init__.py rename pyformlang/{finite_automaton => objects/finite_automaton_objects}/epsilon.py (99%) rename pyformlang/{finite_automaton => objects/finite_automaton_objects}/finite_automaton_object.py (100%) rename pyformlang/{finite_automaton => objects/finite_automaton_objects}/state.py (99%) rename pyformlang/{finite_automaton => objects/finite_automaton_objects}/symbol.py (99%) create mode 100644 pyformlang/objects/finite_automaton_objects/utils.py create mode 100644 pyformlang/objects/pda_objects/__init__.py rename pyformlang/{pda => objects/pda_objects}/epsilon.py (100%) rename pyformlang/{pda => objects/pda_objects}/pda_object.py (100%) rename pyformlang/{pda => objects/pda_objects}/stack_symbol.py (100%) rename pyformlang/{pda => objects/pda_objects}/state.py (100%) rename pyformlang/{pda => objects/pda_objects}/symbol.py (100%) create mode 100644 pyformlang/objects/pda_objects/utils.py create mode 100644 pyformlang/objects/regex_objects/__init__.py rename pyformlang/{regular_expression => objects/regex_objects}/regex_objects.py (85%) create mode 100644 pyformlang/objects/regex_objects/utils.py diff --git a/pyformlang/__init__.py b/pyformlang/__init__.py index dc80d0a..9361f6e 100644 --- a/pyformlang/__init__.py +++ b/pyformlang/__init__.py @@ -26,10 +26,21 @@ """ +import finite_automaton +import regular_expression +import cfg +import fst +import indexed_grammar +import pda +import rsa +import fcfg + + __all__ = ["finite_automaton", "regular_expression", "cfg", "fst", "indexed_grammar", "pda", - "rsa"] + "rsa", + "fcfg"] diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 826c411..35f134e 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -24,7 +24,7 @@ from .variable import Variable from .terminal import Terminal from .epsilon import Epsilon -from .production import Production +from ..objects.cfg_objects.production import Production from .cfg import CFG from .llone_parser import LLOneParser diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 06ed3cc..4abc7d5 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -15,12 +15,12 @@ from .variable import Variable from .terminal import Terminal from .epsilon import Epsilon -from .production import Production +from ..objects.cfg_objects.production import Production from .parse_tree import ParseTree from .cyk_table import CYKTable, DerivationDoesNotExist from .cfg_variable_converter import CFGVariableConverter from .utils import to_variable, to_terminal -from .utils_cfg import remove_nullable_production, get_productions_d +from .utils import remove_nullable_production, get_productions_d EPSILON_SYMBOLS = ["epsilon", "$", "ε", "ϵ", "Є"] diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index c4835ba..9b29cc9 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -5,7 +5,7 @@ from .variable import Variable from .terminal import Terminal -from .production import Production +from ..objects.cfg_objects.production import Production class Grammar: diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index 8d258b8..fe8d862 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -8,7 +8,7 @@ from .parse_tree import ParseTree from .set_queue import SetQueue from .utils import to_terminal -from .utils_cfg import get_productions_d +from .utils import get_productions_d class LLOneParser: diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index c49da5c..be911c6 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -1,20 +1,46 @@ -""" Useful functions """ +""" Internal Usage only """ -from typing import Hashable +from typing import Dict, List, Iterable, AbstractSet +from .cfg_object import CFGObject from .variable import Variable -from .terminal import Terminal +from .epsilon import Epsilon +from ..objects.cfg_objects.production import Production -def to_variable(given: Hashable) -> Variable: - """ Transformation into a variable """ - if isinstance(given, Variable): - return given - return Variable(given) +def remove_nullable_production_sub(body: List[CFGObject], + nullables: AbstractSet[CFGObject]) \ + -> List[List[CFGObject]]: + """ Recursive sub function to remove nullable objects """ + if not body: + return [[]] + all_next = remove_nullable_production_sub(body[1:], nullables) + res = [] + for body_temp in all_next: + if body[0] in nullables: + res.append(body_temp) + if body[0] != Epsilon(): + res.append([body[0]] + body_temp.copy()) + return res -def to_terminal(given: Hashable) -> Terminal: - """ Transformation into a terminal """ - if isinstance(given, Terminal): - return given - return Terminal(given) +def remove_nullable_production(production: Production, + nullables: AbstractSet[CFGObject]) \ + -> List[Production]: + """ Get all combinations of productions rules after removing nullable """ + next_prod_l = remove_nullable_production_sub(production.body, + nullables) + res = [Production(production.head, prod_l) + for prod_l in next_prod_l + if prod_l] + return res + + +def get_productions_d(productions: Iterable[Production]) \ + -> Dict[Variable, List[Production]]: + """ Get productions as a dictionary """ + productions_d: Dict[Variable, List[Production]] = {} + for production in productions: + production_head = productions_d.setdefault(production.head, []) + production_head.append(production) + return productions_d diff --git a/pyformlang/cfg/utils_cfg.py b/pyformlang/cfg/utils_cfg.py deleted file mode 100644 index c0902d2..0000000 --- a/pyformlang/cfg/utils_cfg.py +++ /dev/null @@ -1,46 +0,0 @@ -""" Internal Usage only""" - -from typing import Dict, List, Iterable, AbstractSet - -from .cfg_object import CFGObject -from .variable import Variable -from .epsilon import Epsilon -from .production import Production - - -def remove_nullable_production_sub(body: List[CFGObject], - nullables: AbstractSet[CFGObject]) \ - -> List[List[CFGObject]]: - """ Recursive sub function to remove nullable objects """ - if not body: - return [[]] - all_next = remove_nullable_production_sub(body[1:], nullables) - res = [] - for body_temp in all_next: - if body[0] in nullables: - res.append(body_temp) - if body[0] != Epsilon(): - res.append([body[0]] + body_temp.copy()) - return res - - -def remove_nullable_production(production: Production, - nullables: AbstractSet[CFGObject]) \ - -> List[Production]: - """ Get all combinations of productions rules after removing nullable """ - next_prod_l = remove_nullable_production_sub(production.body, - nullables) - res = [Production(production.head, prod_l) - for prod_l in next_prod_l - if prod_l] - return res - - -def get_productions_d(productions: Iterable[Production]) \ - -> Dict[Variable, List[Production]]: - """ Get productions as a dictionary """ - productions_d: Dict[Variable, List[Production]] = {} - for production in productions: - production_head = productions_d.setdefault(production.head, []) - production_head.append(production) - return productions_d diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py index ed7e4a2..b5489eb 100644 --- a/pyformlang/finite_automaton/utils.py +++ b/pyformlang/finite_automaton/utils.py @@ -1,6 +1,6 @@ """ Utility for finite automata """ -from typing import Dict, List, AbstractSet, Iterable, Optional, Hashable +from typing import Dict, List, AbstractSet, Iterable, Optional from numpy import empty from .state import State @@ -8,34 +8,6 @@ from .epsilon import Epsilon -def to_state(given: Hashable) -> State: - """ Transforms the input into a state - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, State): - return given - return State(given) - - -def to_symbol(given: Hashable) -> Symbol: - """ Transforms the input into a symbol - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, Symbol): - return given - if given in ("epsilon", "ɛ"): - return Epsilon() - return Symbol(given) - - def to_single_state(l_states: Iterable[State]) -> State: """ Merge a list of states diff --git a/pyformlang/objects/__init__.py b/pyformlang/objects/__init__.py new file mode 100644 index 0000000..bb83787 --- /dev/null +++ b/pyformlang/objects/__init__.py @@ -0,0 +1,13 @@ +""" Collection of object representations """ + +import finite_automaton_objects +import cfg_objects +import regex_objects +import pda_objects + + +__all__ = ["finite_automaton_objects", + "cfg_objects", + "regex_objects", + "pda_objects"] + diff --git a/pyformlang/objects/cfg_objects/__init__.py b/pyformlang/objects/cfg_objects/__init__.py new file mode 100644 index 0000000..a999974 --- /dev/null +++ b/pyformlang/objects/cfg_objects/__init__.py @@ -0,0 +1,14 @@ +""" CFG object representations """ + +from .cfg_object import CFGObject +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon +from .production import Production + + +__all__ = ["CFGObject", + "Variable", + "Terminal", + "Epsilon", + "Production"] diff --git a/pyformlang/cfg/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py similarity index 100% rename from pyformlang/cfg/cfg_object.py rename to pyformlang/objects/cfg_objects/cfg_object.py diff --git a/pyformlang/cfg/epsilon.py b/pyformlang/objects/cfg_objects/epsilon.py similarity index 100% rename from pyformlang/cfg/epsilon.py rename to pyformlang/objects/cfg_objects/epsilon.py diff --git a/pyformlang/cfg/production.py b/pyformlang/objects/cfg_objects/production.py similarity index 100% rename from pyformlang/cfg/production.py rename to pyformlang/objects/cfg_objects/production.py diff --git a/pyformlang/cfg/terminal.py b/pyformlang/objects/cfg_objects/terminal.py similarity index 100% rename from pyformlang/cfg/terminal.py rename to pyformlang/objects/cfg_objects/terminal.py diff --git a/pyformlang/objects/cfg_objects/utils.py b/pyformlang/objects/cfg_objects/utils.py new file mode 100644 index 0000000..4f65edc --- /dev/null +++ b/pyformlang/objects/cfg_objects/utils.py @@ -0,0 +1,23 @@ +""" Utility for cfg object creation """ + +from typing import Hashable + +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon + + +def to_variable(given: Hashable) -> Variable: + """ Transformation into a variable """ + if isinstance(given, Variable): + return given + return Variable(given) + + +def to_terminal(given: Hashable) -> Terminal: + """ Transformation into a terminal """ + if isinstance(given, Terminal): + return given + if given == "epsilon": + return Epsilon() + return Terminal(given) diff --git a/pyformlang/cfg/variable.py b/pyformlang/objects/cfg_objects/variable.py similarity index 87% rename from pyformlang/cfg/variable.py rename to pyformlang/objects/cfg_objects/variable.py index 7c60de5..0617504 100644 --- a/pyformlang/cfg/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -1,7 +1,7 @@ """ A variable in a CFG """ -import string from typing import Optional, Hashable +from string import ascii_uppercase from .cfg_object import CFGObject @@ -24,6 +24,6 @@ def __repr__(self) -> str: def to_text(self) -> str: text = str(self._value) - if text and text[0] not in string.ascii_uppercase: + if text and text[0] not in ascii_uppercase: return '"VAR:' + text + '"' return text diff --git a/pyformlang/objects/finite_automaton_objects/__init__.py b/pyformlang/objects/finite_automaton_objects/__init__.py new file mode 100644 index 0000000..02fefae --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/__init__.py @@ -0,0 +1,10 @@ +""" Finite automaton object representations """ + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +__all__ = ["State", + "Symbol", + "Epsilon"] diff --git a/pyformlang/finite_automaton/epsilon.py b/pyformlang/objects/finite_automaton_objects/epsilon.py similarity index 99% rename from pyformlang/finite_automaton/epsilon.py rename to pyformlang/objects/finite_automaton_objects/epsilon.py index 431a98c..fa0dcc5 100644 --- a/pyformlang/finite_automaton/epsilon.py +++ b/pyformlang/objects/finite_automaton_objects/epsilon.py @@ -3,6 +3,7 @@ """ from typing import Any + from .symbol import Symbol @@ -19,8 +20,8 @@ class Epsilon(Symbol): # pylint: disable=too-few-public-methods def __init__(self) -> None: super().__init__("epsilon") - def __hash__(self) -> int: - return hash("EPSILON TRANSITION") - def __eq__(self, other: Any) -> bool: return isinstance(other, Epsilon) + + def __hash__(self) -> int: + return hash("EPSILON TRANSITION") diff --git a/pyformlang/finite_automaton/finite_automaton_object.py b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py similarity index 100% rename from pyformlang/finite_automaton/finite_automaton_object.py rename to pyformlang/objects/finite_automaton_objects/finite_automaton_object.py index ed80609..96a453c 100644 --- a/pyformlang/finite_automaton/finite_automaton_object.py +++ b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py @@ -18,14 +18,6 @@ def __init__(self, value: Hashable) -> None: self._value = value self._hash = None - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - def __repr__(self) -> str: - return str(self._value) - @property def value(self) -> Hashable: """ Gets the value of the object @@ -36,3 +28,11 @@ def value(self) -> Hashable: The value of the object """ return self._value + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + def __repr__(self) -> str: + return str(self._value) diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/objects/finite_automaton_objects/state.py similarity index 99% rename from pyformlang/finite_automaton/state.py rename to pyformlang/objects/finite_automaton_objects/state.py index 356bc8d..b8c2ec5 100644 --- a/pyformlang/finite_automaton/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -3,6 +3,7 @@ """ from typing import Hashable, Optional, Any + from .finite_automaton_object import FiniteAutomatonObject @@ -27,10 +28,10 @@ def __init__(self, value: Hashable) -> None: self.index = None self.index_cfg_converter: Optional[int] = None - def __hash__(self) -> int: - return super().__hash__() - def __eq__(self, other: Any) -> bool: if isinstance(other, State): return self._value == other._value return self._value == other + + def __hash__(self) -> int: + return super().__hash__() diff --git a/pyformlang/finite_automaton/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py similarity index 99% rename from pyformlang/finite_automaton/symbol.py rename to pyformlang/objects/finite_automaton_objects/symbol.py index 8599108..7f1b238 100644 --- a/pyformlang/finite_automaton/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -3,6 +3,7 @@ """ from typing import Any + from .finite_automaton_object import FiniteAutomatonObject @@ -21,10 +22,10 @@ class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods A """ - def __hash__(self) -> int: - return super().__hash__() - def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self._value == other.value return self._value == other + + def __hash__(self) -> int: + return super().__hash__() diff --git a/pyformlang/objects/finite_automaton_objects/utils.py b/pyformlang/objects/finite_automaton_objects/utils.py new file mode 100644 index 0000000..1e342cf --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/utils.py @@ -0,0 +1,35 @@ +""" Utility for finite automaton object creation """ + +from typing import Hashable + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Transforms the input into a state + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Transforms the input into a symbol + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, Symbol): + return given + if given in ("epsilon", "ɛ"): + return Epsilon() + return Symbol(given) diff --git a/pyformlang/objects/pda_objects/__init__.py b/pyformlang/objects/pda_objects/__init__.py new file mode 100644 index 0000000..eb6ec6f --- /dev/null +++ b/pyformlang/objects/pda_objects/__init__.py @@ -0,0 +1,12 @@ +""" PDA object representations """ + +from .state import State +from .symbol import Symbol +from .stack_symbol import StackSymbol +from .epsilon import Epsilon + + +__all__ = ["State", + "Symbol", + "StackSymbol", + "Epsilon"] diff --git a/pyformlang/pda/epsilon.py b/pyformlang/objects/pda_objects/epsilon.py similarity index 100% rename from pyformlang/pda/epsilon.py rename to pyformlang/objects/pda_objects/epsilon.py diff --git a/pyformlang/pda/pda_object.py b/pyformlang/objects/pda_objects/pda_object.py similarity index 100% rename from pyformlang/pda/pda_object.py rename to pyformlang/objects/pda_objects/pda_object.py diff --git a/pyformlang/pda/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py similarity index 100% rename from pyformlang/pda/stack_symbol.py rename to pyformlang/objects/pda_objects/stack_symbol.py diff --git a/pyformlang/pda/state.py b/pyformlang/objects/pda_objects/state.py similarity index 100% rename from pyformlang/pda/state.py rename to pyformlang/objects/pda_objects/state.py diff --git a/pyformlang/pda/symbol.py b/pyformlang/objects/pda_objects/symbol.py similarity index 100% rename from pyformlang/pda/symbol.py rename to pyformlang/objects/pda_objects/symbol.py diff --git a/pyformlang/objects/pda_objects/utils.py b/pyformlang/objects/pda_objects/utils.py new file mode 100644 index 0000000..68f0fa0 --- /dev/null +++ b/pyformlang/objects/pda_objects/utils.py @@ -0,0 +1,33 @@ +""" Utility for pda object creation """ + +from typing import Hashable + +from .state import State +from .symbol import Symbol +from .stack_symbol import StackSymbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Convert to a state """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Convert to a symbol """ + if isinstance(given, Symbol): + return given + if given == "epsilon": + return Epsilon() + return Symbol(given) + + +def to_stack_symbol(given: Hashable) -> StackSymbol: + """ Convert to a stack symbol """ + if isinstance(given, StackSymbol): + return given + if given == "epsilon": + return Epsilon() + return StackSymbol(given) diff --git a/pyformlang/objects/regex_objects/__init__.py b/pyformlang/objects/regex_objects/__init__.py new file mode 100644 index 0000000..f7d9944 --- /dev/null +++ b/pyformlang/objects/regex_objects/__init__.py @@ -0,0 +1,13 @@ +""" Regex object representations """ + +from .regex_objects import * + + +__all__ = ["Node", + "Operator", + "Symbol", + "Concatenation", + "Union", + "KleeneStar", + "Epsilon", + "Empty"] diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/objects/regex_objects/regex_objects.py similarity index 85% rename from pyformlang/regular_expression/regex_objects.py rename to pyformlang/objects/regex_objects/regex_objects.py index 65fc5b1..e14dd4e 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/objects/regex_objects/regex_objects.py @@ -5,8 +5,8 @@ from typing import List, Iterable from abc import abstractmethod -from pyformlang.cfg import Production -from pyformlang.cfg.utils import to_variable, to_terminal +from cfg_objects.production import Production +from cfg_objects.utils import to_variable, to_terminal class Node: # pylint: disable=too-few-public-methods @@ -58,38 +58,6 @@ def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ raise NotImplementedError -CONCATENATION_SYMBOLS = ["."] -UNION_SYMBOLS = ["|", "+"] -KLEENE_STAR_SYMBOLS = ["*"] -EPSILON_SYMBOLS = ["epsilon", "$"] -PARENTHESIS = ["(", ")"] - -SPECIAL_SYMBOLS = CONCATENATION_SYMBOLS + \ - UNION_SYMBOLS + \ - KLEENE_STAR_SYMBOLS + \ - EPSILON_SYMBOLS + \ - PARENTHESIS - - -def to_node(value: str) -> Node: - """ Transforms a given value into a node """ - if not value: - res = Empty() - elif value in CONCATENATION_SYMBOLS: - res = Concatenation() - elif value in UNION_SYMBOLS: - res = Union() - elif value in KLEENE_STAR_SYMBOLS: - res = KleeneStar() - elif value in EPSILON_SYMBOLS: - res = Epsilon() - elif value[0] == "\\": - res = Symbol(value[1:]) - else: - res = Symbol(value) - return res - - class Operator(Node): # pylint: disable=too-few-public-methods """ Represents an operator diff --git a/pyformlang/objects/regex_objects/utils.py b/pyformlang/objects/regex_objects/utils.py new file mode 100644 index 0000000..5c061f9 --- /dev/null +++ b/pyformlang/objects/regex_objects/utils.py @@ -0,0 +1,35 @@ +""" Utility for regex object creation """ + +from .regex_objects import Symbol, Node, \ + Empty, Concatenation, Union, KleeneStar, Epsilon + +CONCATENATION_SYMBOLS = ["."] +UNION_SYMBOLS = ["|", "+"] +KLEENE_STAR_SYMBOLS = ["*"] +EPSILON_SYMBOLS = ["epsilon", "$"] +PARENTHESIS = ["(", ")"] + +SPECIAL_SYMBOLS = CONCATENATION_SYMBOLS + \ + UNION_SYMBOLS + \ + KLEENE_STAR_SYMBOLS + \ + EPSILON_SYMBOLS + \ + PARENTHESIS + + +def to_node(value: str) -> Node: + """ Transforms a given value into a node """ + if not value: + res = Empty() + elif value in CONCATENATION_SYMBOLS: + res = Concatenation() + elif value in UNION_SYMBOLS: + res = Union() + elif value in KLEENE_STAR_SYMBOLS: + res = KleeneStar() + elif value in EPSILON_SYMBOLS: + res = Epsilon() + elif value[0] == "\\": + res = Symbol(value[1:]) + else: + res = Symbol(value) + return res diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 94eeb8d..bf4cb61 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -22,7 +22,7 @@ from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction from .transition_function import TransitionKey, TransitionValues, Transition -from .utils import PDAObjectConverter, to_state, to_symbol, to_stack_symbol +from .utils import PDASymbolConverter, to_state, to_symbol, to_stack_symbol INPUT_SYMBOL = 1 @@ -525,7 +525,7 @@ def from_cfg(self, cfg: CFG) -> "PDA": The equivalent PDA when accepting on empty stack """ state = PDAState("q") - pda_object_converter = PDAObjectConverter(cfg.terminals, cfg.variables) + pda_object_converter = PDASymbolConverter(cfg.terminals, cfg.variables) input_symbols = {pda_object_converter.get_symbol_from(x) for x in cfg.terminals} stack_alphabet = {pda_object_converter.get_stack_symbol_from(x) @@ -793,30 +793,3 @@ def __get_next_free(prefix: str, new_var = type_generating(prefix + str(idx)) idx += 1 return new_var - - -class _PDAStateConverter: - # pylint: disable=too-few-public-methods - - def __init__(self, - states_pda: Set[PDAState], - states_dfa: Set[FAState]) -> None: - self._inverse_state_pda = {} - for i, state in enumerate(states_pda): - self._inverse_state_pda[state] = i - self._inverse_state_dfa = {} - for i, state in enumerate(states_dfa): - self._inverse_state_dfa[state] = i - self._conversions = empty((len(states_pda), len(states_dfa)), - dtype=PDAState) - - def to_pda_combined_state(self, - state_pda: PDAState, - state_other: FAState) -> PDAState: - """ To PDA state in the intersection function """ - i_state_pda = self._inverse_state_pda[state_pda] - i_state_other = self._inverse_state_dfa[state_other] - if self._conversions[i_state_pda, i_state_other] is None: - self._conversions[i_state_pda, i_state_other] = \ - [PDAState((state_pda, state_other))] - return self._conversions[i_state_pda, i_state_other][0] diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 3f86d04..0a2204c 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,16 +1,18 @@ """ Useful functions for a PDA """ -from typing import Dict, Iterable, Optional, Hashable +from typing import Dict, Set, Iterable, Optional, Hashable +from numpy import empty from pyformlang.cfg import CFGObject, Variable, Terminal, Epsilon as CFGEpsilon +from pyformlang.finite_automaton import State as FAState -from .state import State +from .state import State as PDAState from .symbol import Symbol from .stack_symbol import StackSymbol from .epsilon import Epsilon as PDAEpsilon -class PDAObjectConverter: +class PDASymbolConverter: """Creates Objects for a PDA""" def __init__(self, @@ -52,26 +54,29 @@ def get_stack_symbol_from(self, stack_symbol: CFGObject) \ return inverse_stack_symbol -def to_state(given: Hashable) -> State: - """ Convert to a state """ - if isinstance(given, State): - return given - return State(given) - - -def to_symbol(given: Hashable) -> Symbol: - """ Convert to a symbol """ - if isinstance(given, Symbol): - return given - if given == "epsilon": - return PDAEpsilon() - return Symbol(given) +class PDAStateConverter: + """Combines PDA and FA states""" + # pylint: disable=too-few-public-methods + def __init__(self, + states_pda: Set[PDAState], + states_dfa: Set[FAState]) -> None: + self._inverse_state_pda = {} + for i, state in enumerate(states_pda): + self._inverse_state_pda[state] = i + self._inverse_state_dfa = {} + for i, state in enumerate(states_dfa): + self._inverse_state_dfa[state] = i + self._conversions = empty((len(states_pda), len(states_dfa)), + dtype=PDAState) -def to_stack_symbol(given: Hashable) -> StackSymbol: - """ Convert to a stack symbol """ - if isinstance(given, StackSymbol): - return given - if given == "epsilon": - return PDAEpsilon() - return StackSymbol(given) + def to_pda_combined_state(self, + state_pda: PDAState, + state_other: FAState) -> PDAState: + """ To PDA state in the intersection function """ + i_state_pda = self._inverse_state_pda[state_pda] + i_state_other = self._inverse_state_dfa[state_other] + if self._conversions[i_state_pda, i_state_other] is None: + self._conversions[i_state_pda, i_state_other] = \ + [PDAState((state_pda, state_other))] + return self._conversions[i_state_pda, i_state_other][0] diff --git a/pyformlang/regular_expression/__init__.py b/pyformlang/regular_expression/__init__.py index 72435d1..b3fd098 100644 --- a/pyformlang/regular_expression/__init__.py +++ b/pyformlang/regular_expression/__init__.py @@ -21,7 +21,7 @@ from .regex import Regex -from .regex_objects import MisformedRegexError +from ..objects.regex_objects.regex_objects import MisformedRegexError from .python_regex import PythonRegex __all__ = ["Regex", "PythonRegex", "MisformedRegexError"] diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 3f6f48f..5607668 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -7,7 +7,7 @@ from string import printable from unicodedata import lookup -from .regex_objects import MisformedRegexError +from ..objects.regex_objects.regex_objects import MisformedRegexError from .regex_reader import WRONG_PARENTHESIS_MESSAGE from .regex import Regex diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 80274b0..17edba9 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -11,7 +11,7 @@ from pyformlang.cfg.utils import to_variable from .regex_reader import RegexReader -from .regex_objects import Epsilon as RegexEpsilon, \ +from ..objects.regex_objects.regex_objects import Epsilon as RegexEpsilon, \ Empty, Concatenation, Union, KleeneStar diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 11d86fa..f9a5a23 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -5,7 +5,7 @@ from typing import List, Optional from re import sub -from .regex_objects import to_node, Node, Operator, Symbol, Empty, \ +from ..objects.regex_objects.regex_objects import to_node, Node, Operator, Symbol, Empty, \ Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." From 4418f093c740a7e248253452c9c5096a0cb7c153 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 17:10:00 +0300 Subject: [PATCH 10/30] correct object imports --- pyformlang/__init__.py | 16 ++--- pyformlang/cfg/__init__.py | 10 ++- pyformlang/cfg/cfg.py | 9 +-- pyformlang/cfg/cfg_variable_converter.py | 2 +- pyformlang/cfg/cyk_table.py | 4 +- pyformlang/cfg/grammar.py | 4 +- pyformlang/cfg/llone_parser.py | 5 +- pyformlang/cfg/parse_tree.py | 3 +- pyformlang/cfg/recursive_decent_parser.py | 6 +- pyformlang/cfg/utils.py | 5 +- pyformlang/fcfg/__init__.py | 13 ++-- pyformlang/fcfg/fcfg.py | 6 +- pyformlang/fcfg/feature_production.py | 3 +- pyformlang/fcfg/state.py | 3 +- pyformlang/finite_automaton/__init__.py | 6 +- .../deterministic_finite_automaton.py | 5 +- .../deterministic_transition_function.py | 4 +- .../finite_automaton/doubly_linked_list.py | 1 + pyformlang/finite_automaton/epsilon_nfa.py | 6 +- .../finite_automaton/finite_automaton.py | 6 +- .../hopcroft_processing_list.py | 2 +- .../nondeterministic_finite_automaton.py | 4 +- .../nondeterministic_transition_function.py | 3 +- pyformlang/finite_automaton/partition.py | 2 +- .../finite_automaton/transition_function.py | 3 +- pyformlang/finite_automaton/utils.py | 4 +- pyformlang/fst/__init__.py | 1 + pyformlang/indexed_grammar/__init__.py | 1 + pyformlang/objects/__init__.py | 9 ++- .../objects/pda_objects/stack_symbol.py | 1 + .../objects/regex_objects/regex_objects.py | 4 +- pyformlang/pda/__init__.py | 6 +- pyformlang/pda/pda.py | 57 ++++++++-------- pyformlang/pda/transition_function.py | 4 +- pyformlang/pda/utils.py | 65 +++++++++---------- pyformlang/regular_expression/__init__.py | 4 +- pyformlang/regular_expression/python_regex.py | 2 +- pyformlang/regular_expression/regex.py | 8 +-- pyformlang/regular_expression/regex_reader.py | 6 +- pyformlang/rsa/__init__.py | 2 +- pyformlang/rsa/box.py | 3 +- pyformlang/rsa/recursive_automaton.py | 2 +- 42 files changed, 141 insertions(+), 169 deletions(-) diff --git a/pyformlang/__init__.py b/pyformlang/__init__.py index 9361f6e..cd1b1cc 100644 --- a/pyformlang/__init__.py +++ b/pyformlang/__init__.py @@ -26,14 +26,14 @@ """ -import finite_automaton -import regular_expression -import cfg -import fst -import indexed_grammar -import pda -import rsa -import fcfg +from . import finite_automaton +from . import regular_expression +from . import cfg +from . import fst +from . import indexed_grammar +from . import pda +from . import rsa +from . import fcfg __all__ = ["finite_automaton", diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 35f134e..79efc12 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -20,18 +20,16 @@ """ -from .cfg_object import CFGObject -from .variable import Variable -from .terminal import Terminal -from .epsilon import Epsilon -from ..objects.cfg_objects.production import Production -from .cfg import CFG +from .cfg import CFG, CFGObject, Variable, Terminal, Epsilon, Production +from .parse_tree import ParseTree from .llone_parser import LLOneParser + __all__ = ["CFGObject", "Variable", "Terminal", "Production", "CFG", "Epsilon", + "ParseTree", "LLOneParser"] diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 4abc7d5..9b17903 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -11,16 +11,13 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State from .grammar import Grammar -from .cfg_object import CFGObject -from .variable import Variable -from .terminal import Terminal -from .epsilon import Epsilon -from ..objects.cfg_objects.production import Production from .parse_tree import ParseTree from .cyk_table import CYKTable, DerivationDoesNotExist from .cfg_variable_converter import CFGVariableConverter -from .utils import to_variable, to_terminal from .utils import remove_nullable_production, get_productions_d +from ..objects.cfg_objects import CFGObject, \ + Variable, Terminal, Epsilon, Production +from ..objects.cfg_objects.utils import to_variable, to_terminal EPSILON_SYMBOLS = ["epsilon", "$", "ε", "ϵ", "Є"] diff --git a/pyformlang/cfg/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py index 3f62ac8..14f01c8 100644 --- a/pyformlang/cfg/cfg_variable_converter.py +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -2,7 +2,7 @@ from typing import Dict, List, AbstractSet, Tuple, Optional, Any -from .cfg import Variable +from ..objects.cfg_objects import Variable class CFGVariableConverter: diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 3e6612c..13bed8d 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -5,10 +5,8 @@ from typing import Dict, List, Set, Iterable, Tuple, Any from .grammar import Grammar -from .cfg_object import CFGObject -from .terminal import Terminal -from .epsilon import Epsilon from .parse_tree import ParseTree +from ..objects.cfg_objects import CFGObject, Terminal, Epsilon class CYKTable: diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index 9b29cc9..ac2aa3b 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -3,9 +3,7 @@ from typing import Set, Optional from abc import abstractmethod -from .variable import Variable -from .terminal import Terminal -from ..objects.cfg_objects.production import Production +from ..objects.cfg_objects import Variable, Terminal, Production class Grammar: diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index fe8d862..05c82f6 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -3,12 +3,11 @@ from typing import Dict, List, Set, Iterable, Tuple, Hashable from .cfg import CFG, Production, NotParsableException -from .cfg_object import CFGObject -from .epsilon import Epsilon from .parse_tree import ParseTree from .set_queue import SetQueue -from .utils import to_terminal from .utils import get_productions_d +from ..objects.cfg_objects import CFGObject, Epsilon +from ..objects.cfg_objects.utils import to_terminal class LLOneParser: diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index cf74554..490964b 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -5,8 +5,7 @@ from networkx import DiGraph from networkx.drawing.nx_pydot import write_dot -from .cfg_object import CFGObject -from .variable import Variable +from ..objects.cfg_objects import CFGObject, Variable class ParseTree: diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index 1974b61..07c434f 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -5,11 +5,9 @@ from typing import List, Iterable, Tuple, Optional, Hashable, Any from .cfg import CFG, NotParsableException -from .variable import Variable -from .terminal import Terminal -from .epsilon import Epsilon from .parse_tree import ParseTree -from .utils import to_terminal +from ..objects.cfg_objects import Variable, Terminal, Epsilon +from ..objects.cfg_objects.utils import to_terminal def _get_index_to_extend(current_expansion: List[Any], left: bool) \ diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index be911c6..b3dc56b 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -2,10 +2,7 @@ from typing import Dict, List, Iterable, AbstractSet -from .cfg_object import CFGObject -from .variable import Variable -from .epsilon import Epsilon -from ..objects.cfg_objects.production import Production +from ..objects.cfg_objects import CFGObject, Variable, Epsilon, Production def remove_nullable_production_sub(body: List[CFGObject], diff --git a/pyformlang/fcfg/__init__.py b/pyformlang/fcfg/__init__.py index ab4d04c..fb844e4 100644 --- a/pyformlang/fcfg/__init__.py +++ b/pyformlang/fcfg/__init__.py @@ -15,14 +15,17 @@ """ +from .fcfg import FCFG +from .feature_production import FeatureProduction +from .feature_structure import FeatureStructure, \ + ContentAlreadyExistsException, \ + FeatureStructuresNotCompatibleException, \ + PathDoesNotExistsException + + __all__ = ["FCFG", "FeatureStructure", "FeatureProduction", "ContentAlreadyExistsException", "FeatureStructuresNotCompatibleException", "PathDoesNotExistsException"] - -from pyformlang.fcfg.fcfg import FCFG -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure, ContentAlreadyExistsException, \ - FeatureStructuresNotCompatibleException, PathDoesNotExistsException diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index c02da46..e9fc296 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -2,10 +2,10 @@ import string from typing import Iterable, AbstractSet, Union -from pyformlang.cfg import CFG, Terminal, Epsilon, Variable +from objects.cfg_objects.utils import to_terminal + +from pyformlang.cfg import CFG, Terminal, Epsilon, Variable, ParseTree from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS, NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.utils import to_terminal from pyformlang.fcfg.feature_production import FeatureProduction from pyformlang.fcfg.feature_structure import FeatureStructure, FeatureStructuresNotCompatibleException from pyformlang.fcfg.state import State, StateProcessed diff --git a/pyformlang/fcfg/feature_production.py b/pyformlang/fcfg/feature_production.py index 587aba2..59b187e 100644 --- a/pyformlang/fcfg/feature_production.py +++ b/pyformlang/fcfg/feature_production.py @@ -1,8 +1,7 @@ """Production rules with features""" from typing import List -from pyformlang.cfg import Production, Variable -from pyformlang.cfg.cfg_object import CFGObject +from pyformlang.cfg import CFGObject, Variable, Production from pyformlang.fcfg.feature_structure import FeatureStructure diff --git a/pyformlang/fcfg/state.py b/pyformlang/fcfg/state.py index fd5e2bd..a16f9b4 100644 --- a/pyformlang/fcfg/state.py +++ b/pyformlang/fcfg/state.py @@ -1,8 +1,7 @@ """Internal usage states""" from typing import Tuple -from pyformlang.cfg import Variable -from pyformlang.cfg.parse_tree import ParseTree +from pyformlang.cfg import Variable, ParseTree from pyformlang.fcfg.feature_production import FeatureProduction from pyformlang.fcfg.feature_structure import FeatureStructure diff --git a/pyformlang/finite_automaton/__init__.py b/pyformlang/finite_automaton/__init__.py index e6b7db8..6fec7db 100644 --- a/pyformlang/finite_automaton/__init__.py +++ b/pyformlang/finite_automaton/__init__.py @@ -34,13 +34,10 @@ """ -from .finite_automaton import FiniteAutomaton +from .finite_automaton import FiniteAutomaton, State, Symbol, Epsilon from .deterministic_finite_automaton import DeterministicFiniteAutomaton from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .epsilon_nfa import EpsilonNFA -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .deterministic_transition_function import \ (DeterministicTransitionFunction, DuplicateTransitionError, @@ -48,6 +45,7 @@ from .nondeterministic_transition_function import \ NondeterministicTransitionFunction + __all__ = ["FiniteAutomaton", "DeterministicFiniteAutomaton", "NondeterministicFiniteAutomaton", diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index d695388..3856f72 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -4,13 +4,14 @@ from typing import Iterable, AbstractSet, Optional, Hashable, Any -from .state import State from .deterministic_transition_function import DeterministicTransitionFunction from .epsilon_nfa import EpsilonNFA from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition -from .utils import to_state, to_symbol, to_single_state, PreviousTransitions +from .utils import to_single_state, PreviousTransitions +from ..objects.finite_automaton_objects import State +from ..objects.finite_automaton_objects.utils import to_state, to_symbol class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py index 39d0a8f..7b2f9a7 100644 --- a/pyformlang/finite_automaton/deterministic_transition_function.py +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -4,12 +4,10 @@ from typing import Optional -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction from .nondeterministic_finite_automaton import InvalidEpsilonTransition +from ..objects.finite_automaton_objects import State, Symbol, Epsilon class DeterministicTransitionFunction(NondeterministicTransitionFunction): diff --git a/pyformlang/finite_automaton/doubly_linked_list.py b/pyformlang/finite_automaton/doubly_linked_list.py index ce09237..d1345e9 100644 --- a/pyformlang/finite_automaton/doubly_linked_list.py +++ b/pyformlang/finite_automaton/doubly_linked_list.py @@ -1,6 +1,7 @@ """A doubly linked list""" from typing import Iterable, Optional, Any + from .doubly_linked_node import DoublyLinkedNode diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 626019d..e80a125 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -5,13 +5,11 @@ from typing import Iterable, Set, AbstractSet, Hashable from networkx import MultiDiGraph -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction from .finite_automaton import FiniteAutomaton -from .utils import to_state, to_symbol +from ..objects.finite_automaton_objects import State, Symbol, Epsilon +from ..objects.finite_automaton_objects.utils import to_state, to_symbol class EpsilonNFA(FiniteAutomaton): diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index efc7392..654a039 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -9,11 +9,9 @@ from pyformlang.fst import FST -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .transition_function import TransitionFunction -from .utils import to_state, to_symbol +from ..objects.finite_automaton_objects import State, Symbol, Epsilon +from ..objects.finite_automaton_objects.utils import to_state, to_symbol AutomatonT = TypeVar("AutomatonT", bound="FiniteAutomaton") diff --git a/pyformlang/finite_automaton/hopcroft_processing_list.py b/pyformlang/finite_automaton/hopcroft_processing_list.py index 2bc02f4..3997754 100644 --- a/pyformlang/finite_automaton/hopcroft_processing_list.py +++ b/pyformlang/finite_automaton/hopcroft_processing_list.py @@ -5,7 +5,7 @@ from typing import Dict, List, Set, Tuple from numpy import zeros -from .symbol import Symbol +from ..objects.finite_automaton_objects import Symbol class HopcroftProcessingList: diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 5d32776..7f56f9f 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -4,9 +4,9 @@ from typing import Iterable, Hashable -from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA -from .utils import to_symbol +from ..objects.finite_automaton_objects import Epsilon +from ..objects.finite_automaton_objects.utils import to_symbol class NondeterministicFiniteAutomaton(EpsilonNFA): diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 030a605..4815cbe 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -5,9 +5,8 @@ from typing import Dict, Set, Iterable, Tuple from copy import deepcopy -from .state import State -from .symbol import Symbol from .transition_function import TransitionFunction +from ..objects.finite_automaton_objects import State, Symbol class NondeterministicTransitionFunction(TransitionFunction): diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index e292900..77be2d6 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -6,7 +6,7 @@ from .doubly_linked_list import DoublyLinkedList from .doubly_linked_node import DoublyLinkedNode -from .state import State +from ..objects.finite_automaton_objects import State class Partition: diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 4682e89..2a283aa 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -5,8 +5,7 @@ from typing import Dict, Set, Tuple, Iterable, Iterator from abc import abstractmethod -from .state import State -from .symbol import Symbol +from ..objects.finite_automaton_objects import State, Symbol class TransitionFunction(Iterable[Tuple[State, Symbol, State]]): diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py index b5489eb..d488604 100644 --- a/pyformlang/finite_automaton/utils.py +++ b/pyformlang/finite_automaton/utils.py @@ -3,9 +3,7 @@ from typing import Dict, List, AbstractSet, Iterable, Optional from numpy import empty -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon +from ..objects.finite_automaton_objects import State, Symbol def to_single_state(l_states: Iterable[State]) -> State: diff --git a/pyformlang/fst/__init__.py b/pyformlang/fst/__init__.py index ebf1832..afd33d1 100644 --- a/pyformlang/fst/__init__.py +++ b/pyformlang/fst/__init__.py @@ -14,4 +14,5 @@ from .fst import FST + __all__ = ["FST"] diff --git a/pyformlang/indexed_grammar/__init__.py b/pyformlang/indexed_grammar/__init__.py index 06f5ea5..14da624 100644 --- a/pyformlang/indexed_grammar/__init__.py +++ b/pyformlang/indexed_grammar/__init__.py @@ -29,6 +29,7 @@ from .duplication_rule import DuplicationRule from .indexed_grammar import IndexedGrammar + __all__ = ["Rules", "ConsumptionRule", "EndRule", diff --git a/pyformlang/objects/__init__.py b/pyformlang/objects/__init__.py index bb83787..abc7ae7 100644 --- a/pyformlang/objects/__init__.py +++ b/pyformlang/objects/__init__.py @@ -1,13 +1,12 @@ """ Collection of object representations """ -import finite_automaton_objects -import cfg_objects -import regex_objects -import pda_objects +from . import finite_automaton_objects +from . import cfg_objects +from . import regex_objects +from . import pda_objects __all__ = ["finite_automaton_objects", "cfg_objects", "regex_objects", "pda_objects"] - diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index a932c14..0ff6346 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -4,6 +4,7 @@ from .symbol import Symbol + class StackSymbol(Symbol): """ A StackSymbol in a pushdown automaton diff --git a/pyformlang/objects/regex_objects/regex_objects.py b/pyformlang/objects/regex_objects/regex_objects.py index e14dd4e..e5e78d8 100644 --- a/pyformlang/objects/regex_objects/regex_objects.py +++ b/pyformlang/objects/regex_objects/regex_objects.py @@ -5,8 +5,8 @@ from typing import List, Iterable from abc import abstractmethod -from cfg_objects.production import Production -from cfg_objects.utils import to_variable, to_terminal +from ..cfg_objects.production import Production +from ..cfg_objects.utils import to_variable, to_terminal class Node: # pylint: disable=too-few-public-methods diff --git a/pyformlang/pda/__init__.py b/pyformlang/pda/__init__.py index c77cfd0..2eee7d1 100644 --- a/pyformlang/pda/__init__.py +++ b/pyformlang/pda/__init__.py @@ -21,10 +21,8 @@ """ from .pda import PDA -from .state import State -from .symbol import Symbol -from .stack_symbol import StackSymbol -from .epsilon import Epsilon +from ..objects.pda_objects import State, Symbol, StackSymbol, Epsilon + __all__ = ["PDA", "State", diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index bf4cb61..5b55dd4 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -4,25 +4,22 @@ Iterator, Iterable, Tuple, Type, Optional, Hashable, Any from json import dumps, loads from itertools import product -from numpy import empty from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.finite_automaton import State as FAState from pyformlang.finite_automaton import Symbol as FASymbol from pyformlang.finite_automaton import Epsilon as FAEpsilon -from pyformlang.cfg import CFG, Variable, Terminal, Production -from pyformlang.cfg.cfg_object import CFGObject +from pyformlang.cfg import CFG, CFGObject, Variable, Terminal, Production from pyformlang.cfg.cfg_variable_converter import CFGVariableConverter -from .state import State as PDAState -from .symbol import Symbol as PDASymbol -from .stack_symbol import StackSymbol -from .epsilon import Epsilon as PDAEpsilon from .transition_function import TransitionFunction from .transition_function import TransitionKey, TransitionValues, Transition -from .utils import PDASymbolConverter, to_state, to_symbol, to_stack_symbol +from .utils import PDAStateConverter, PDASymbolConverter +from ..objects.pda_objects import State, StackSymbol +from ..objects.pda_objects import Symbol as PDASymbol +from ..objects.pda_objects import Epsilon as PDAEpsilon +from ..objects.pda_objects.utils import to_state, to_symbol, to_stack_symbol INPUT_SYMBOL = 1 @@ -86,22 +83,22 @@ def __init__(self, start_stack_symbol = to_stack_symbol(start_stack_symbol) if final_states is not None: final_states = {to_state(x) for x in final_states} - self._states: Set[PDAState] = states or set() + self._states: Set[State] = states or set() self._input_symbols: Set[PDASymbol] = input_symbols or set() self._stack_alphabet: Set[StackSymbol] = stack_alphabet or set() self._transition_function = transition_function or TransitionFunction() - self._start_state: Optional[PDAState] = start_state + self._start_state: Optional[State] = start_state if start_state is not None: self._states.add(start_state) self._start_stack_symbol: Optional[StackSymbol] = start_stack_symbol if start_stack_symbol is not None: self._stack_alphabet.add(start_stack_symbol) - self._final_states: Set[PDAState] = final_states or set() + self._final_states: Set[State] = final_states or set() for state in self._final_states: self._states.add(state) @property - def states(self) -> Set[PDAState]: + def states(self) -> Set[State]: """ Get the states fo the PDA Returns @@ -136,7 +133,7 @@ def stack_symbols(self) -> Set[StackSymbol]: return self._stack_alphabet @property - def start_state(self) -> Optional[PDAState]: + def start_state(self) -> Optional[State]: """ Get start state """ return self._start_state @@ -146,7 +143,7 @@ def start_stack_symbol(self) -> Optional[StackSymbol]: return self._start_stack_symbol @property - def final_states(self) -> Set[PDAState]: + def final_states(self) -> Set[State]: """ The final states of the PDA Returns @@ -293,10 +290,10 @@ def to_final_state(self) -> "PDA": if self.start_state is None or self.start_stack_symbol is None: return PDA() new_start = self.__get_next_free("#STARTTOFINAL#", - PDAState, + State, self._states) new_end = self.__get_next_free("#ENDTOFINAL#", - PDAState, + State, self._states) new_stack_symbol = self.__get_next_free("#BOTTOMTOFINAL#", StackSymbol, @@ -334,10 +331,10 @@ def to_empty_stack(self) -> "PDA": if self.start_state is None or self.start_stack_symbol is None: return PDA() new_start = self.__get_next_free("#STARTEMPTYS#", - PDAState, + State, self._states) new_end = self.__get_next_free("#ENDEMPTYS#", - PDAState, + State, self._states) new_stack_symbol = self.__get_next_free("#BOTTOMEMPTYS#", StackSymbol, @@ -398,7 +395,7 @@ def to_cfg(self) -> CFG: def _process_transition_and_state_to_cfg( self, productions: List[Production], - state: PDAState, + state: State, transition: Tuple[Tuple, Tuple], variable_converter: CFGVariableConverter) \ -> None: @@ -414,7 +411,7 @@ def _process_transition_and_state_to_cfg( def _process_transition_and_state_to_cfg_safe( self, productions: List[Production], - state: PDAState, + state: State, transition: Tuple[Tuple, Tuple], variable_converter: CFGVariableConverter) \ -> None: @@ -429,7 +426,7 @@ def _process_transition_and_state_to_cfg_safe( def _get_all_bodies_from_state_and_transition( self, - state: PDAState, + state: State, transition: Tuple[Tuple, Tuple], variable_converter: CFGVariableConverter) \ -> List[List[CFGObject]]: @@ -439,8 +436,8 @@ def _get_all_bodies_from_state_and_transition( variable_converter) def _generate_all_rules(self, - s_from: PDAState, - s_to: PDAState, + s_from: State, + s_to: State, ss_by: List[StackSymbol], variable_converter: CFGVariableConverter) \ -> List[List[CFGObject]]: @@ -477,8 +474,8 @@ def _generate_all_rules(self, return res def _generate_length_one_rules(self, - s_from: PDAState, - s_to: PDAState, + s_from: State, + s_to: State, ss_by: List[StackSymbol], variable_converter: CFGVariableConverter) \ -> List[List[CFGObject]]: @@ -490,7 +487,7 @@ def _generate_length_one_rules(self, def _get_head_from_state_and_transition( self, - state: PDAState, + state: State, transition: Tuple[Tuple, Tuple], variable_converter: CFGVariableConverter) \ -> Variable: @@ -524,7 +521,7 @@ def from_cfg(self, cfg: CFG) -> "PDA": new_pda : :class:`~pyformlang.pda.PDA` The equivalent PDA when accepting on empty stack """ - state = PDAState("q") + state = State("q") pda_object_converter = PDASymbolConverter(cfg.terminals, cfg.variables) input_symbols = {pda_object_converter.get_symbol_from(x) for x in cfg.terminals} @@ -585,7 +582,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": """ if not self.start_state or not other.start_state: return PDA() - pda_state_converter = _PDAStateConverter(self._states, other.states) + pda_state_converter = PDAStateConverter(self._states, other.states) final_states_other = other.final_states start = pda_state_converter.to_pda_combined_state(self.start_state, other.start_state) @@ -764,7 +761,7 @@ def write_as_dot(self, filename: str) -> None: @staticmethod def __add_start_state_to_graph(graph: MultiDiGraph, - state: PDAState) -> None: + state: State) -> None: """ Adds a starting node to a given graph """ graph.add_node("starting_" + str(state.value), label="", diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index 44aa806..155eba1 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -3,9 +3,7 @@ from copy import deepcopy from typing import Dict, List, Set, Iterator, Iterable, Tuple, Optional -from .state import State -from .symbol import Symbol -from .stack_symbol import StackSymbol +from ..objects.pda_objects import State, Symbol, StackSymbol TransitionKey = Tuple[State, Symbol, StackSymbol] TransitionValue = Tuple[State, List[StackSymbol]] diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 0a2204c..e1b0dea 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,15 +1,42 @@ """ Useful functions for a PDA """ -from typing import Dict, Set, Iterable, Optional, Hashable +from typing import Dict, Set, Iterable, Optional from numpy import empty from pyformlang.cfg import CFGObject, Variable, Terminal, Epsilon as CFGEpsilon from pyformlang.finite_automaton import State as FAState -from .state import State as PDAState -from .symbol import Symbol -from .stack_symbol import StackSymbol -from .epsilon import Epsilon as PDAEpsilon +from ..objects.pda_objects import Symbol, StackSymbol +from ..objects.pda_objects import State as PDAState +from ..objects.pda_objects import Epsilon as PDAEpsilon + + +class PDAStateConverter: + """Combines PDA and FA states""" + # pylint: disable=too-few-public-methods + + def __init__(self, + states_pda: Set[PDAState], + states_dfa: Set[FAState]) -> None: + self._inverse_state_pda = {} + for i, state in enumerate(states_pda): + self._inverse_state_pda[state] = i + self._inverse_state_dfa = {} + for i, state in enumerate(states_dfa): + self._inverse_state_dfa[state] = i + self._conversions = empty((len(states_pda), len(states_dfa)), + dtype=PDAState) + + def to_pda_combined_state(self, + state_pda: PDAState, + state_other: FAState) -> PDAState: + """ To PDA state in the intersection function """ + i_state_pda = self._inverse_state_pda[state_pda] + i_state_other = self._inverse_state_dfa[state_other] + if self._conversions[i_state_pda, i_state_other] is None: + self._conversions[i_state_pda, i_state_other] = \ + [PDAState((state_pda, state_other))] + return self._conversions[i_state_pda, i_state_other][0] class PDASymbolConverter: @@ -52,31 +79,3 @@ def get_stack_symbol_from(self, stack_symbol: CFGObject) \ self._inverse_stack_symbol[stack_symbol] = temp return temp return inverse_stack_symbol - - -class PDAStateConverter: - """Combines PDA and FA states""" - # pylint: disable=too-few-public-methods - - def __init__(self, - states_pda: Set[PDAState], - states_dfa: Set[FAState]) -> None: - self._inverse_state_pda = {} - for i, state in enumerate(states_pda): - self._inverse_state_pda[state] = i - self._inverse_state_dfa = {} - for i, state in enumerate(states_dfa): - self._inverse_state_dfa[state] = i - self._conversions = empty((len(states_pda), len(states_dfa)), - dtype=PDAState) - - def to_pda_combined_state(self, - state_pda: PDAState, - state_other: FAState) -> PDAState: - """ To PDA state in the intersection function """ - i_state_pda = self._inverse_state_pda[state_pda] - i_state_other = self._inverse_state_dfa[state_other] - if self._conversions[i_state_pda, i_state_other] is None: - self._conversions[i_state_pda, i_state_other] = \ - [PDAState((state_pda, state_other))] - return self._conversions[i_state_pda, i_state_other][0] diff --git a/pyformlang/regular_expression/__init__.py b/pyformlang/regular_expression/__init__.py index b3fd098..04f3454 100644 --- a/pyformlang/regular_expression/__init__.py +++ b/pyformlang/regular_expression/__init__.py @@ -19,9 +19,9 @@ """ - from .regex import Regex -from ..objects.regex_objects.regex_objects import MisformedRegexError from .python_regex import PythonRegex +from ..objects.regex_objects import MisformedRegexError + __all__ = ["Regex", "PythonRegex", "MisformedRegexError"] diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 5607668..0d446eb 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -7,9 +7,9 @@ from string import printable from unicodedata import lookup -from ..objects.regex_objects.regex_objects import MisformedRegexError from .regex_reader import WRONG_PARENTHESIS_MESSAGE from .regex import Regex +from ..objects.regex_objects import MisformedRegexError PRINTABLES = list(printable) diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 17edba9..9fee921 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -7,12 +7,12 @@ from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon -from pyformlang.cfg.cfg import CFG, Production -from pyformlang.cfg.utils import to_variable +from pyformlang.cfg import CFG, Production from .regex_reader import RegexReader -from ..objects.regex_objects.regex_objects import Epsilon as RegexEpsilon, \ - Empty, Concatenation, Union, KleeneStar +from ..objects.regex_objects import \ + Epsilon as RegexEpsilon, Empty, Concatenation, Union, KleeneStar +from ..objects.cfg_objects.utils import to_variable class Regex(RegexReader): diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index f9a5a23..157847b 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -5,8 +5,10 @@ from typing import List, Optional from re import sub -from ..objects.regex_objects.regex_objects import to_node, Node, Operator, Symbol, Empty, \ - Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS +from ..objects.regex_objects import \ + Node, Operator, Symbol, Empty, Concatenation, Union, KleeneStar, \ + MisformedRegexError +from ..objects.regex_objects.utils import to_node, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." diff --git a/pyformlang/rsa/__init__.py b/pyformlang/rsa/__init__.py index dda8323..66d25d2 100644 --- a/pyformlang/rsa/__init__.py +++ b/pyformlang/rsa/__init__.py @@ -22,8 +22,8 @@ """ - from .recursive_automaton import RecursiveAutomaton from .box import Box + __all__ = ["RecursiveAutomaton", "Box"] diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 4f792b9..d1cdf55 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -6,7 +6,8 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol -from pyformlang.finite_automaton.utils import to_symbol + +from ..objects.finite_automaton_objects.utils import to_symbol class Box: diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index d823dc8..703bbfa 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -5,11 +5,11 @@ from typing import Dict, Set, AbstractSet, Optional, Hashable, Any from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton.utils import to_symbol from pyformlang.regular_expression import Regex from pyformlang.cfg import Epsilon from .box import Box +from ..objects.finite_automaton_objects.utils import to_symbol class RecursiveAutomaton: From 1b9dfbc577862ea9e1336db73ee6cbaaf68c222d Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 17:37:22 +0300 Subject: [PATCH 11/30] add CFGConvertible interface --- pyformlang/cfg/cfg_variable_converter.py | 43 +++++++++---------- pyformlang/objects/cfg_objects/__init__.py | 4 +- .../objects/cfg_objects/cfg_convertible.py | 10 +++++ pyformlang/objects/cfg_objects/cfg_object.py | 2 +- pyformlang/objects/cfg_objects/epsilon.py | 1 - pyformlang/objects/cfg_objects/terminal.py | 2 +- pyformlang/objects/cfg_objects/variable.py | 8 +--- .../finite_automaton_objects/__init__.py | 4 +- .../finite_automaton_objects/epsilon.py | 2 +- .../finite_automaton_object.py | 2 +- .../objects/finite_automaton_objects/state.py | 14 +++--- .../finite_automaton_objects/symbol.py | 6 +-- pyformlang/objects/pda_objects/__init__.py | 4 +- pyformlang/objects/pda_objects/epsilon.py | 1 - .../objects/pda_objects/stack_symbol.py | 9 ++-- pyformlang/objects/pda_objects/state.py | 9 ++-- .../objects/regex_objects/regex_objects.py | 16 +++---- 17 files changed, 68 insertions(+), 69 deletions(-) create mode 100644 pyformlang/objects/cfg_objects/cfg_convertible.py diff --git a/pyformlang/cfg/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py index 14f01c8..79b236f 100644 --- a/pyformlang/cfg/cfg_variable_converter.py +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -1,24 +1,24 @@ """A CFG Variable Converter""" -from typing import Dict, List, AbstractSet, Tuple, Optional, Any +from typing import Dict, List, AbstractSet, Tuple, Optional, Hashable -from ..objects.cfg_objects import Variable +from ..objects.cfg_objects import Variable, CFGConvertible class CFGVariableConverter: """A CFG Variable Converter""" def __init__(self, - states: AbstractSet[Any], - stack_symbols: AbstractSet[Any]) -> None: + states: AbstractSet[CFGConvertible], + stack_symbols: AbstractSet[CFGConvertible]) -> None: self._counter = 0 - self._inverse_states_d: Dict[Any, int] = {} + self._inverse_states_d: Dict[CFGConvertible, int] = {} self._counter_state = 0 for self._counter_state, state in enumerate(states): self._inverse_states_d[state] = self._counter_state state.index_cfg_converter = self._counter_state self._counter_state += 1 - self._inverse_stack_symbol_d: Dict[Any, int] = {} + self._inverse_stack_symbol_d: Dict[CFGConvertible, int] = {} self._counter_symbol = 0 for self._counter_symbol, symbol in enumerate(stack_symbols): self._inverse_stack_symbol_d[symbol] = self._counter_symbol @@ -29,7 +29,7 @@ def __init__(self, for _ in range(len(stack_symbols))] for _ in range(len(states))] - def _get_state_index(self, state: Any) -> int: + def _get_state_index(self, state: CFGConvertible) -> int: """Get the state index""" if state.index_cfg_converter is None: if state not in self._inverse_states_d: @@ -38,7 +38,7 @@ def _get_state_index(self, state: Any) -> int: state.index_cfg_converter = self._inverse_states_d[state] return state.index_cfg_converter - def _get_symbol_index(self, symbol: Any) -> int: + def _get_symbol_index(self, symbol: CFGConvertible) -> int: """Get the symbol index""" if symbol.index_cfg_converter is None: if symbol not in self._inverse_stack_symbol_d: @@ -48,9 +48,9 @@ def _get_symbol_index(self, symbol: Any) -> int: return symbol.index_cfg_converter def to_cfg_combined_variable(self, - state0: Any, - stack_symbol: Any, - state1: Any) -> Variable: + state0: CFGConvertible, + stack_symbol: CFGConvertible, + state1: CFGConvertible) -> Variable: """ Conversion used in the to_pda method """ i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -65,8 +65,7 @@ def _create_new_variable(self, i_state0: int, i_state1: int, prev: Tuple, - value: Any = None) -> Tuple[bool, Variable]: - # pylint: disable=too-many-arguments + value: Hashable = None) -> Tuple[bool, Variable]: if value is None: value = self._counter temp = (prev[0], Variable(value)) @@ -75,9 +74,9 @@ def _create_new_variable(self, return temp def set_valid(self, - state0: Any, - stack_symbol: Any, - state1: Any) -> None: + state0: CFGConvertible, + stack_symbol: CFGConvertible, + state1: CFGConvertible) -> None: """Set valid""" i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -85,9 +84,9 @@ def set_valid(self, self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) def is_valid_and_get(self, - state0: Any, - stack_symbol: Any, - state1: Any) -> Optional[Variable]: + state0: CFGConvertible, + stack_symbol: CFGConvertible, + state1: CFGConvertible) -> Optional[Variable]: """Check if valid and get""" i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) @@ -103,9 +102,9 @@ def is_valid_and_get(self, return current[1] def _get_indexes(self, - stack_symbol: Any, - state0: Any, - state1: Any) \ + stack_symbol: CFGConvertible, + state0: CFGConvertible, + state1: CFGConvertible) \ -> Tuple[int, int, int]: i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) diff --git a/pyformlang/objects/cfg_objects/__init__.py b/pyformlang/objects/cfg_objects/__init__.py index a999974..11c3d1b 100644 --- a/pyformlang/objects/cfg_objects/__init__.py +++ b/pyformlang/objects/cfg_objects/__init__.py @@ -5,10 +5,12 @@ from .terminal import Terminal from .epsilon import Epsilon from .production import Production +from .cfg_convertible import CFGConvertible __all__ = ["CFGObject", "Variable", "Terminal", "Epsilon", - "Production"] + "Production", + "CFGConvertible"] diff --git a/pyformlang/objects/cfg_objects/cfg_convertible.py b/pyformlang/objects/cfg_objects/cfg_convertible.py new file mode 100644 index 0000000..d8a5bdf --- /dev/null +++ b/pyformlang/objects/cfg_objects/cfg_convertible.py @@ -0,0 +1,10 @@ +""" Interface representing the ability of conversion to cfg object """ + +from typing import Optional + + +class CFGConvertible: + """ Interface representing the ability of conversion to cfg object """ + + def __init__(self) -> None: + self.index_cfg_converter: Optional[int] = None diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index 99fcc90..196309a 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -4,7 +4,7 @@ from abc import abstractmethod -class CFGObject: # pylint: disable=too-few-public-methods +class CFGObject: """ An object in a CFG Parameters diff --git a/pyformlang/objects/cfg_objects/epsilon.py b/pyformlang/objects/cfg_objects/epsilon.py index b70e7fc..3680ad1 100644 --- a/pyformlang/objects/cfg_objects/epsilon.py +++ b/pyformlang/objects/cfg_objects/epsilon.py @@ -7,7 +7,6 @@ class Epsilon(Terminal): """ An epsilon terminal """ - # pylint: disable=too-few-public-methods def __init__(self) -> None: super().__init__("epsilon") diff --git a/pyformlang/objects/cfg_objects/terminal.py b/pyformlang/objects/cfg_objects/terminal.py index 7bc36b7..f54f5b1 100644 --- a/pyformlang/objects/cfg_objects/terminal.py +++ b/pyformlang/objects/cfg_objects/terminal.py @@ -3,7 +3,7 @@ from .cfg_object import CFGObject -class Terminal(CFGObject): # pylint: disable=too-few-public-methods +class Terminal(CFGObject): """ A terminal in a CFG """ def __repr__(self) -> str: diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py index 0617504..7b3b5d6 100644 --- a/pyformlang/objects/cfg_objects/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -1,12 +1,12 @@ """ A variable in a CFG """ -from typing import Optional, Hashable from string import ascii_uppercase from .cfg_object import CFGObject +from .cfg_convertible import CFGConvertible -class Variable(CFGObject): # pylint: disable=too-few-public-methods +class Variable(CFGObject, CFGConvertible): """ An variable in a CFG Parameters @@ -15,10 +15,6 @@ class Variable(CFGObject): # pylint: disable=too-few-public-methods The value of the variable """ - def __init__(self, value: Hashable) -> None: - super().__init__(value) - self.index_cfg_converter: Optional[int] = None - def __repr__(self) -> str: return "Variable(" + str(self.value) + ")" diff --git a/pyformlang/objects/finite_automaton_objects/__init__.py b/pyformlang/objects/finite_automaton_objects/__init__.py index 02fefae..b2d9074 100644 --- a/pyformlang/objects/finite_automaton_objects/__init__.py +++ b/pyformlang/objects/finite_automaton_objects/__init__.py @@ -1,10 +1,12 @@ """ Finite automaton object representations """ +from .finite_automaton_object import FiniteAutomatonObject from .state import State from .symbol import Symbol from .epsilon import Epsilon -__all__ = ["State", +__all__ = ["FiniteAutomatonObject", + "State", "Symbol", "Epsilon"] diff --git a/pyformlang/objects/finite_automaton_objects/epsilon.py b/pyformlang/objects/finite_automaton_objects/epsilon.py index fa0dcc5..88a656e 100644 --- a/pyformlang/objects/finite_automaton_objects/epsilon.py +++ b/pyformlang/objects/finite_automaton_objects/epsilon.py @@ -7,7 +7,7 @@ from .symbol import Symbol -class Epsilon(Symbol): # pylint: disable=too-few-public-methods +class Epsilon(Symbol): """ An epsilon transition Examples diff --git a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py index 96a453c..4eaaad7 100644 --- a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py +++ b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py @@ -5,7 +5,7 @@ from typing import Hashable -class FiniteAutomatonObject: # pylint: disable=too-few-public-methods +class FiniteAutomatonObject: """ Represents an object in a finite state automaton Parameters diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index b8c2ec5..dbb72ea 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -2,12 +2,13 @@ Representation of a state in a finite state automaton """ -from typing import Hashable, Optional, Any +from typing import Any from .finite_automaton_object import FiniteAutomatonObject +from ..cfg_objects import CFGConvertible -class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods +class State(FiniteAutomatonObject, CFGConvertible): """ A state in a finite automaton Parameters @@ -23,15 +24,10 @@ class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods """ - def __init__(self, value: Hashable) -> None: - super().__init__(value) - self.index = None - self.index_cfg_converter: Optional[int] = None - def __eq__(self, other: Any) -> bool: if isinstance(other, State): - return self._value == other._value - return self._value == other + return self.value == other.value + return self.value == other def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/finite_automaton_objects/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py index 7f1b238..28667e2 100644 --- a/pyformlang/objects/finite_automaton_objects/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -7,7 +7,7 @@ from .finite_automaton_object import FiniteAutomatonObject -class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods +class Symbol(FiniteAutomatonObject): """ A symbol in a finite automaton Parameters @@ -24,8 +24,8 @@ class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): - return self._value == other.value - return self._value == other + return self.value == other.value + return self.value == other def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/pda_objects/__init__.py b/pyformlang/objects/pda_objects/__init__.py index eb6ec6f..de26f00 100644 --- a/pyformlang/objects/pda_objects/__init__.py +++ b/pyformlang/objects/pda_objects/__init__.py @@ -1,12 +1,14 @@ """ PDA object representations """ +from .pda_object import PDAObject from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol from .epsilon import Epsilon -__all__ = ["State", +__all__ = ["PDAObject", + "State", "Symbol", "StackSymbol", "Epsilon"] diff --git a/pyformlang/objects/pda_objects/epsilon.py b/pyformlang/objects/pda_objects/epsilon.py index b4a6ca7..d4e57b8 100644 --- a/pyformlang/objects/pda_objects/epsilon.py +++ b/pyformlang/objects/pda_objects/epsilon.py @@ -7,7 +7,6 @@ class Epsilon(StackSymbol): """ An epsilon symbol """ - # pylint: disable=too-few-public-methods def __init__(self) -> None: super().__init__("epsilon") diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 0ff6346..a2dba9d 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -1,11 +1,12 @@ """ A StackSymbol in a pushdown automaton """ -from typing import Optional, Hashable, Any +from typing import Any from .symbol import Symbol +from ..cfg_objects import CFGConvertible -class StackSymbol(Symbol): +class StackSymbol(Symbol, CFGConvertible): """ A StackSymbol in a pushdown automaton Parameters @@ -15,10 +16,6 @@ class StackSymbol(Symbol): """ - def __init__(self, value: Hashable) -> None: - super().__init__(value) - self.index_cfg_converter: Optional[int] = None - def __eq__(self, other: Any) -> bool: if not isinstance(other, StackSymbol): return False diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 52f1bb5..8462d6a 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -1,11 +1,12 @@ """ A State in a pushdown automaton """ -from typing import Optional, Hashable, Any +from typing import Any from .pda_object import PDAObject +from ..cfg_objects import CFGConvertible -class State(PDAObject): +class State(PDAObject, CFGConvertible): """ A State in a pushdown automaton Parameters @@ -15,10 +16,6 @@ class State(PDAObject): """ - def __init__(self, value: Hashable) -> None: - super().__init__(value) - self.index_cfg_converter: Optional[int] = None - def __eq__(self, other: Any) -> bool: if not isinstance(other, State): return False diff --git a/pyformlang/objects/regex_objects/regex_objects.py b/pyformlang/objects/regex_objects/regex_objects.py index e5e78d8..9522e05 100644 --- a/pyformlang/objects/regex_objects/regex_objects.py +++ b/pyformlang/objects/regex_objects/regex_objects.py @@ -9,7 +9,7 @@ from ..cfg_objects.utils import to_variable, to_terminal -class Node: # pylint: disable=too-few-public-methods +class Node: """ Represents a node in the tree representation of a regex Parameters @@ -58,7 +58,7 @@ def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ raise NotImplementedError -class Operator(Node): # pylint: disable=too-few-public-methods +class Operator(Node): """ Represents an operator Parameters @@ -81,7 +81,7 @@ def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ raise NotImplementedError -class Symbol(Node): # pylint: disable=too-few-public-methods +class Symbol(Node): """ Represents a symbol Parameters @@ -105,7 +105,7 @@ def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" -class Concatenation(Operator): # pylint: disable=too-few-public-methods +class Concatenation(Operator): """ Represents a concatenation """ @@ -122,7 +122,7 @@ def __init__(self) -> None: super().__init__("Concatenation") -class Union(Operator): # pylint: disable=too-few-public-methods +class Union(Operator): """ Represents a union """ @@ -139,7 +139,7 @@ def __init__(self) -> None: super().__init__("Union") -class KleeneStar(Operator): # pylint: disable=too-few-public-methods +class KleeneStar(Operator): """ Represents an epsilon symbol """ @@ -163,7 +163,7 @@ def __init__(self) -> None: super().__init__("Kleene Star") -class Epsilon(Symbol): # pylint: disable=too-few-public-methods +class Epsilon(Symbol): """ Represents an epsilon symbol """ @@ -178,7 +178,7 @@ def __init__(self) -> None: super().__init__("Epsilon") -class Empty(Symbol): # pylint: disable=too-few-public-methods +class Empty(Symbol): """ Represents an empty symbol """ From e27a1b1624c5c72f671b88c3e32bb99310793f23 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 18:34:00 +0300 Subject: [PATCH 12/30] handle null start states in pda --- pyformlang/cfg/cfg.py | 46 ++++++++++---------- pyformlang/objects/cfg_objects/cfg_object.py | 5 ++- pyformlang/objects/cfg_objects/variable.py | 3 +- pyformlang/pda/pda.py | 42 ++++++++---------- 4 files changed, 46 insertions(+), 50 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 9b17903..3853371 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -729,7 +729,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": When trying to intersect with something else than a regex or a finite automaton """ - if other.is_empty(): + if self.is_empty() or other.is_empty(): return CFG() generate_empty = self.contains([]) and other.accepts([]) cfg = self.to_normal_form() @@ -746,32 +746,31 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": production, states, cv_converter) + start = Variable("Start") new_productions += self._intersection_starting_rules(cfg, + start, other, cv_converter) - start = Variable("Start") if generate_empty: new_productions.append(Production(start, [])) res_cfg = CFG(start_symbol=start, productions=new_productions) return res_cfg @staticmethod - def _intersection_starting_rules(cfg: "CFG", - other: DeterministicFiniteAutomaton, - cv_converter: CFGVariableConverter) \ - -> List[Production]: - start = Variable("Start") - productions_temp = [] - start_other = other.start_state - for final_state in other.final_states: - new_body: List[CFGObject] = [ - cv_converter.to_cfg_combined_variable( - start_other, - cfg.start_symbol, - final_state)] - productions_temp.append( - Production(start, new_body, filtering=False)) - return productions_temp + def _intersection_starting_rules( + cfg: "CFG", + start: Variable, + other: DeterministicFiniteAutomaton, + cv_converter: CFGVariableConverter) \ + -> List[Production]: + if not cfg.start_symbol or not other.start_state: + return [] + return [Production(start, + [cv_converter.to_cfg_combined_variable( + other.start_state, + cfg.start_symbol, + final_state)]) + for final_state in other.final_states] @staticmethod def _intersection_when_terminal( @@ -874,15 +873,14 @@ def get_words(self, max_length: int = -1) -> Iterable[List[Terminal]]: gen_d[obj] = [[]] # To a single terminal for production in productions: - if len(production.body) == 1 \ - and len(production.body_terminals) == 1: - terminals = list(production.body_terminals) + body = production.body + if len(body) == 1 and isinstance(body[0], Terminal): if len(gen_d[production.head]) == 1: gen_d[production.head].append([]) - if terminals not in gen_d[production.head][-1]: - gen_d[production.head][-1].append(terminals) + if [body[0]] not in gen_d[production.head][-1]: + gen_d[production.head][-1].append([body[0]]) if production.head == cfg.start_symbol: - yield terminals + yield [body[0]] # Complete what is missing current_length = 2 total_no_modification = 0 diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index 196309a..e5cce96 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -3,8 +3,10 @@ from typing import Hashable, Any from abc import abstractmethod +from .cfg_convertible import CFGConvertible -class CFGObject: + +class CFGObject(CFGConvertible): """ An object in a CFG Parameters @@ -16,6 +18,7 @@ class CFGObject: __slots__ = ["_value", "_hash"] def __init__(self, value: Hashable) -> None: + super().__init__() self._value = value self._hash = None diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py index 7b3b5d6..6281e16 100644 --- a/pyformlang/objects/cfg_objects/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -3,10 +3,9 @@ from string import ascii_uppercase from .cfg_object import CFGObject -from .cfg_convertible import CFGConvertible -class Variable(CFGObject, CFGConvertible): +class Variable(CFGObject): """ An variable in a CFG Parameters diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 5b55dd4..8974703 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -287,8 +287,6 @@ def to_final_state(self) -> "PDA": The new PDA which accepts by final state the language that \ was accepted by empty stack """ - if self.start_state is None or self.start_stack_symbol is None: - return PDA() new_start = self.__get_next_free("#STARTTOFINAL#", State, self._states) @@ -304,9 +302,10 @@ def to_final_state(self) -> "PDA": new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self.start_state, [self.start_stack_symbol, - new_stack_symbol]) + if self.start_state and self.start_stack_symbol: + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, + self.start_state, [self.start_stack_symbol, + new_stack_symbol]) for state in self._states: new_tf.add_transition(state, PDAEpsilon(), new_stack_symbol, new_end, []) @@ -328,8 +327,6 @@ def to_empty_stack(self) -> "PDA": The new PDA which accepts by empty stack the language that was \ accepted by final state """ - if self.start_state is None or self.start_stack_symbol is None: - return PDA() new_start = self.__get_next_free("#STARTEMPTYS#", State, self._states) @@ -345,9 +342,10 @@ def to_empty_stack(self) -> "PDA": new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self.start_state, [self.start_stack_symbol, - new_stack_symbol]) + if self.start_state and self.start_stack_symbol: + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, + self.start_state, [self.start_stack_symbol, + new_stack_symbol]) for state in self._final_states: for stack_symbol in new_stack_alphabet: new_tf.add_transition(state, PDAEpsilon(), stack_symbol, @@ -372,8 +370,8 @@ def to_cfg(self) -> CFG: new_cfg : :class:`~pyformlang.cfg.CFG` The equivalent CFG """ - variable_converter = \ - CFGVariableConverter(self._states, self._stack_alphabet) + variable_converter = CFGVariableConverter(self._states, + self._stack_alphabet) start = Variable("#StartCFG#") productions = self._initialize_production_from_start_in_to_cfg( start, variable_converter) @@ -501,16 +499,14 @@ def _initialize_production_from_start_in_to_cfg( start: Variable, variable_converter: CFGVariableConverter) \ -> List[Production]: - productions = [] - for state in self._states: - productions.append( - Production( - start, - [variable_converter.to_cfg_combined_variable( - self._start_state, - self._start_stack_symbol, - state)])) - return productions + if not self.start_state or not self.start_stack_symbol: + return [] + return [Production(start, + [variable_converter.to_cfg_combined_variable( + self.start_state, + self.start_stack_symbol, + state)]) + for state in self.states] def from_cfg(self, cfg: CFG) -> "PDA": """ Converts the CFG to a PDA that generates on empty stack an \ @@ -580,7 +576,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - if not self.start_state or not other.start_state: + if not self.start_state or not other.start_state or other.is_empty(): return PDA() pda_state_converter = PDAStateConverter(self._states, other.states) final_states_other = other.final_states From 0231483e4e590a072a937c927c6d91c34cc8ac25 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 19:00:43 +0300 Subject: [PATCH 13/30] update pda tests, correct cfg parsers annotations --- pyformlang/cfg/cyk_table.py | 7 +++-- pyformlang/cfg/llone_parser.py | 47 ++++++++++++++++---------------- pyformlang/cfg/tests/test_cfg.py | 26 +++++++----------- pyformlang/pda/pda.py | 3 +- 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 13bed8d..bec32a2 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -8,6 +8,9 @@ from .parse_tree import ParseTree from ..objects.cfg_objects import CFGObject, Terminal, Epsilon +ProductionsType = Dict[Tuple[CFGObject, ...], List[CFGObject]] +CYKTableType = Dict[Tuple[int, int], Set["CYKNode"]] + class CYKTable: """ @@ -23,8 +26,8 @@ class CYKTable: def __init__(self, grammar: Grammar, word: List[Terminal]) -> None: self._normal_form: Grammar = grammar.to_normal_form() self._word: List[Terminal] = word - self._productions_d: Dict[Tuple, List[CFGObject]] = {} - self._cyk_table: Dict[Tuple[int, int], Set[CYKNode]] = {} + self._productions_d: ProductionsType = {} + self._cyk_table: CYKTableType = {} self._set_productions_by_body() if not self._generates_all_terminals(): self._cyk_table[(0, len(self._word))] = set() diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index 05c82f6..ba78ffe 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -9,6 +9,10 @@ from ..objects.cfg_objects import CFGObject, Epsilon from ..objects.cfg_objects.utils import to_terminal +SetType = Dict[CFGObject, Set[CFGObject]] +TriggersType = Dict[CFGObject, List[CFGObject]] +ParsingTableType = Dict[CFGObject, Dict[CFGObject, List[Production]]] + class LLOneParser: """ @@ -23,7 +27,7 @@ class LLOneParser: def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_first_set(self) -> Dict[CFGObject, Set[CFGObject]]: + def get_first_set(self) -> SetType: """ Used in LL(1) """ # Algorithm from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ @@ -48,8 +52,7 @@ def get_first_set(self) -> Dict[CFGObject, Set[CFGObject]]: @staticmethod def _get_first_set_production(production: Production, - first_set: Dict[CFGObject, Set[CFGObject]]) \ - -> Set[CFGObject]: + first_set: SetType) -> Set[CFGObject]: first_not_containing_epsilon = 0 first_set_temp = set() for body_component in production.body: @@ -66,11 +69,11 @@ def _get_first_set_production(production: Production, return first_set_temp def _initialize_first_set(self, - triggers: Dict[CFGObject, List[CFGObject]]) \ - -> Tuple[Dict, SetQueue]: + triggers: TriggersType) \ + -> Tuple[SetType, SetQueue]: + first_set: SetType = {} to_process = SetQueue() - first_set: Dict[CFGObject, Set[CFGObject]] = {} - # Initialisation + # Initialization for terminal in self._cfg.terminals: first_set[terminal] = {terminal} for triggered in triggers.get(terminal, []): @@ -83,8 +86,8 @@ def _initialize_first_set(self, to_process.append(triggered) return first_set, to_process - def _get_triggers(self) -> Dict[CFGObject, List[CFGObject]]: - triggers: Dict[CFGObject, List[CFGObject]] = {} + def _get_triggers(self) -> TriggersType: + triggers: TriggersType = {} for production in self._cfg.productions: for body_component in production.body: if body_component not in triggers: @@ -92,7 +95,7 @@ def _get_triggers(self) -> Dict[CFGObject, List[CFGObject]]: triggers[body_component].append(production.head) return triggers - def get_follow_set(self) -> Dict[CFGObject, Set[CFGObject]]: + def get_follow_set(self) -> SetType: """ Get follow set """ first_set = self.get_first_set() triggers = self._get_triggers_follow_set(first_set) @@ -109,8 +112,8 @@ def get_follow_set(self) -> Dict[CFGObject, Set[CFGObject]]: return follow_set def _initialize_follow_set(self, - first_set: Dict[CFGObject, Set[CFGObject]]) \ - -> Tuple[Dict, SetQueue]: + first_set: SetType) \ + -> Tuple[SetType, SetQueue]: to_process = SetQueue() follow_set = {} follow_set[self._cfg.start_symbol] = {"$"} @@ -131,12 +134,12 @@ def _initialize_follow_set(self, return follow_set, to_process def _get_triggers_follow_set(self, - first_set: Dict[CFGObject, Set[CFGObject]]) \ - -> Dict[CFGObject, List[CFGObject]]: - triggers = {} + first_set: SetType) \ + -> SetType: + follow_set: SetType = {} for production in self._cfg.productions: - if production.head not in triggers: - triggers[production.head] = set() + if production.head not in follow_set: + follow_set[production.head] = set() for i, component in enumerate(production.body): all_epsilon = True for component_next in production.body[i + 1:]: @@ -144,11 +147,10 @@ def _get_triggers_follow_set(self, all_epsilon = False break if all_epsilon: - triggers[production.head].add(component) - return triggers + follow_set[production.head].add(component) + return follow_set - def get_llone_parsing_table(self) \ - -> Dict[CFGObject, Dict[CFGObject, List[Production]]]: + def get_llone_parsing_table(self) -> ParsingTableType: """ Get the LL(1) parsing table From: https://www.slideshare.net/MahbuburRahman273/ll1-parser-in-compilers @@ -163,8 +165,7 @@ def get_llone_parsing_table(self) \ nullable_productions.append(production) else: non_nullable_productions.append(production) - llone_parsing_table: Dict[CFGObject, - Dict[CFGObject, List[Production]]] = {} + llone_parsing_table: ParsingTableType = {} for production in nullable_productions: if production.head not in llone_parsing_table: llone_parsing_table[production.head] = {} diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index 75c480a..361184e 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -1,14 +1,14 @@ """ Tests the CFG """ -from pyformlang import pda +import pytest + +from pyformlang.pda import PDA from pyformlang.cfg import Production, Variable, Terminal, CFG, Epsilon from pyformlang.cfg.cyk_table import DerivationDoesNotExist -from pyformlang.cfg.pda_object_creator import PDAObjectCreator from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State from pyformlang.finite_automaton import Symbol from pyformlang.regular_expression import Regex -import pytest class TestCFG: @@ -415,7 +415,7 @@ def test_to_pda(self): ter_par_close, ter_mult, ter_plus}, var_e, productions) - pda_equivalent = cfg.to_pda() + pda_equivalent = PDA.from_cfg(cfg) assert len(pda_equivalent.states) == 1 assert len(pda_equivalent.final_states) == 0 assert len(pda_equivalent.input_symbols) == 8 @@ -431,7 +431,7 @@ def test_conversions(self): productions = {Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() assert cfg.contains([ter_c]) assert cfg.contains([ter_a, ter_c, ter_b]) assert cfg.contains([ter_a, ter_a, ter_c, ter_b, ter_b]) @@ -448,9 +448,9 @@ def test_profiling_conversions(): productions = {Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() - cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() + PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() def test_generation_words(self): """ Tests the generation of word """ @@ -604,11 +604,11 @@ def test_intersection_with_epsilon(self): assert not cfg.is_empty() assert cfg.contains([ter_a]) - cfg_temp = cfg.to_pda().to_cfg() + cfg_temp = PDA.from_cfg(cfg).to_cfg() assert not cfg_temp.is_empty() assert cfg_temp.contains([ter_a]) - cfg_temp = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg_temp = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() assert not cfg_temp.is_empty() assert cfg_temp.contains([ter_a]) @@ -675,12 +675,6 @@ def test_profiling_intersection(self): assert cfg_i.contains([ter_a] * size + [ter_b] * size) assert not cfg_i.contains([]) - def test_pda_object_creator(self): - pda_oc = PDAObjectCreator([], []) - assert pda_oc.get_symbol_from(Epsilon()) == pda.Epsilon() - assert pda_oc.get_stack_symbol_from(Epsilon()) == \ - pda.Epsilon() - def test_string_variable(self): var = Variable("A") assert repr(var) == "Variable(A)" diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 8974703..84afffd 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -508,7 +508,8 @@ def _initialize_production_from_start_in_to_cfg( state)]) for state in self.states] - def from_cfg(self, cfg: CFG) -> "PDA": + @classmethod + def from_cfg(cls, cfg: CFG) -> "PDA": """ Converts the CFG to a PDA that generates on empty stack an \ equivalent language From 570bf0255baa71645fd9e2236a7464a814d055d4 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 2 Dec 2024 20:00:37 +0300 Subject: [PATCH 14/30] update tests, correct transition function of pda, correct CFGConvertible interface --- pyformlang/cfg/tests/test_llone_parser.py | 8 ++++---- pyformlang/fcfg/fcfg.py | 12 ++++++------ pyformlang/objects/cfg_objects/cfg_convertible.py | 5 +++-- .../objects/finite_automaton_objects/state.py | 2 +- pyformlang/objects/pda_objects/stack_symbol.py | 2 +- pyformlang/objects/pda_objects/state.py | 2 +- pyformlang/pda/tests/test_pda.py | 6 ------ pyformlang/pda/transition_function.py | 13 +++++-------- pyformlang/rsa/tests/test_rsa.py | 2 +- 9 files changed, 22 insertions(+), 30 deletions(-) diff --git a/pyformlang/cfg/tests/test_llone_parser.py b/pyformlang/cfg/tests/test_llone_parser.py index a4a843a..e7bb2fa 100644 --- a/pyformlang/cfg/tests/test_llone_parser.py +++ b/pyformlang/cfg/tests/test_llone_parser.py @@ -88,13 +88,13 @@ def test_get_follow_set2(self): cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) follow_set = llone_parser.get_follow_set() - assert follow_set["S"] == \ + assert follow_set[Variable("S")] == \ {"$"} - assert follow_set["A"] == \ + assert follow_set[Variable("A")] == \ {"$", Terminal("h"), Terminal("g")} - assert follow_set["B"] == \ + assert follow_set[Variable("B")] == \ {"$", Terminal("h"), Terminal("g"), Terminal("a")} - assert follow_set["C"] == \ + assert follow_set[Variable("C")] == \ {"$", Terminal("h"), Terminal("g"), Terminal("b")} def test_get_llone_table(self): diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index e9fc296..098ffaf 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -1,8 +1,6 @@ """Feature Context-Free Grammar""" import string -from typing import Iterable, AbstractSet, Union - -from objects.cfg_objects.utils import to_terminal +from typing import Iterable, AbstractSet, Hashable from pyformlang.cfg import CFG, Terminal, Epsilon, Variable, ParseTree from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS, NotParsableException @@ -10,6 +8,8 @@ from pyformlang.fcfg.feature_structure import FeatureStructure, FeatureStructuresNotCompatibleException from pyformlang.fcfg.state import State, StateProcessed +from ..objects.cfg_objects.utils import to_terminal + class FCFG(CFG): """ A class representing a feature context-free grammar @@ -73,7 +73,7 @@ def __predictor(self, state, chart, processed): if processed.add(end_idx, new_state): chart[end_idx].append(new_state) - def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: + def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar Parameters @@ -88,7 +88,7 @@ def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: """ return self._get_final_state(word) is not None - def get_parse_tree(self, word: Iterable[Union[Terminal, str]]) -> ParseTree: + def get_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Gives the parse tree for a sentence, if possible Parameters @@ -111,7 +111,7 @@ def get_parse_tree(self, word: Iterable[Union[Terminal, str]]) -> ParseTree: raise NotParsableException() return final_state.parse_tree - def _get_final_state(self, word: Iterable[Terminal]): + def _get_final_state(self, word: Iterable[Hashable]): word = [to_terminal(x) for x in word if x != Epsilon()] chart = [[] for _ in range(len(word) + 1)] # Processed[i] contains all production rule that are currently working until i. diff --git a/pyformlang/objects/cfg_objects/cfg_convertible.py b/pyformlang/objects/cfg_objects/cfg_convertible.py index d8a5bdf..70daed2 100644 --- a/pyformlang/objects/cfg_objects/cfg_convertible.py +++ b/pyformlang/objects/cfg_objects/cfg_convertible.py @@ -1,10 +1,11 @@ """ Interface representing the ability of conversion to cfg object """ -from typing import Optional +from typing import Optional, Any class CFGConvertible: """ Interface representing the ability of conversion to cfg object """ - def __init__(self) -> None: + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) self.index_cfg_converter: Optional[int] = None diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index dbb72ea..7699a29 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -8,7 +8,7 @@ from ..cfg_objects import CFGConvertible -class State(FiniteAutomatonObject, CFGConvertible): +class State(CFGConvertible, FiniteAutomatonObject): """ A state in a finite automaton Parameters diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index a2dba9d..9f7c8a7 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -6,7 +6,7 @@ from ..cfg_objects import CFGConvertible -class StackSymbol(Symbol, CFGConvertible): +class StackSymbol(CFGConvertible, Symbol): """ A StackSymbol in a pushdown automaton Parameters diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 8462d6a..86562b1 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -6,7 +6,7 @@ from ..cfg_objects import CFGConvertible -class State(PDAObject, CFGConvertible): +class State(CFGConvertible, PDAObject): """ A State in a pushdown automaton Parameters diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 569415e..84e5254 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -5,7 +5,6 @@ from pyformlang.cfg import Terminal from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State as FAState, Symbol as FASymbol -from pyformlang.pda.utils import PDAObjectCreator from pyformlang.regular_expression import Regex @@ -326,11 +325,6 @@ def test_intersection_regex(self): cfg = pda_es.to_cfg() assert not cfg - def test_pda_object_creator_epsilon(self): - """ Test creation objects """ - poc = PDAObjectCreator() - assert poc.to_stack_symbol(Epsilon()) == Epsilon() - def test_pda_paper(self): """ Code in the paper """ pda = PDA() diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index 155eba1..f1d62e5 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -1,12 +1,12 @@ """ A transition function in a pushdown automaton """ from copy import deepcopy -from typing import Dict, List, Set, Iterator, Iterable, Tuple, Optional +from typing import Dict, Set, Sequence, Iterator, Iterable, Tuple from ..objects.pda_objects import State, Symbol, StackSymbol TransitionKey = Tuple[State, Symbol, StackSymbol] -TransitionValue = Tuple[State, List[StackSymbol]] +TransitionValue = Tuple[State, Tuple[StackSymbol, ...]] TransitionValues = Set[TransitionValue] Transition = Tuple[TransitionKey, TransitionValue] @@ -16,9 +16,6 @@ class TransitionFunction(Iterable[Transition]): def __init__(self) -> None: self._transitions: Dict[TransitionKey, TransitionValues] = {} - self._current_key: Optional[TransitionKey] = None - self._iter_key: Optional[Iterator[TransitionKey]] = None - self._iter_inside: Optional[Iterator[TransitionValue]] = None def get_number_transitions(self) -> int: """ Gets the number of transitions @@ -36,7 +33,7 @@ def add_transition(self, input_symbol: Symbol, stack_from: StackSymbol, s_to: State, - stack_to: List[StackSymbol]) -> None: + stack_to: Sequence[StackSymbol]) -> None: """ Add a transition to the function Parameters @@ -53,7 +50,7 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ temp_in = (s_from, input_symbol, stack_from) - temp_out = (s_to, stack_to.copy()) + temp_out = (s_to, tuple(stack_to)) if temp_in in self._transitions: self._transitions[temp_in].add(temp_out) else: @@ -71,7 +68,7 @@ def copy(self) -> "TransitionFunction": for temp_in, transition in self._transitions.items(): for temp_out in transition: new_tf.add_transition(temp_in[0], temp_in[1], temp_in[2], - temp_out[0], temp_out[1]) + *temp_out) return new_tf def __iter__(self) -> Iterator[Transition]: diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index 1fa3162..bba62d2 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -1,5 +1,5 @@ """ Tests for RSA """ -from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton import Symbol from pyformlang.regular_expression import Regex from pyformlang.rsa.recursive_automaton import RecursiveAutomaton From 7f877f52814880d3879f4b511e8f065ce7f5602e Mon Sep 17 00:00:00 2001 From: bygu4 Date: Mon, 28 Oct 2024 16:52:00 +0300 Subject: [PATCH 15/30] add fcfg annotations --- pyformlang/cfg/__init__.py | 7 +- pyformlang/cfg/cfg.py | 8 +- pyformlang/cfg/cyk_table.py | 16 +-- pyformlang/cfg/llone_parser.py | 44 +++---- pyformlang/cfg/parse_tree.py | 4 + pyformlang/cfg/recursive_decent_parser.py | 25 ++-- .../cfg/tests/test_recursive_decent_parser.py | 3 +- pyformlang/fcfg/fcfg.py | 113 ++++++++++++------ pyformlang/fcfg/feature_production.py | 20 +++- pyformlang/fcfg/feature_structure.py | 102 ++++++++++------ pyformlang/fcfg/state.py | 44 ++++--- pyformlang/fcfg/tests/test_fcfg.py | 13 +- pyformlang/regular_expression/__init__.py | 3 +- 13 files changed, 251 insertions(+), 151 deletions(-) diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 79efc12..411cc50 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -21,8 +21,9 @@ """ from .cfg import CFG, CFGObject, Variable, Terminal, Epsilon, Production -from .parse_tree import ParseTree +from .parse_tree import ParseTree, NotParsableException from .llone_parser import LLOneParser +from .recursive_decent_parser import RecursiveDecentParser __all__ = ["CFGObject", @@ -32,4 +33,6 @@ "CFG", "Epsilon", "ParseTree", - "LLOneParser"] + "LLOneParser", + "RecursiveDecentParser", + "NotParsableException"] diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 3853371..99ecc45 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -12,7 +12,7 @@ from .grammar import Grammar from .parse_tree import ParseTree -from .cyk_table import CYKTable, DerivationDoesNotExist +from .cyk_table import CYKTable from .cfg_variable_converter import CFGVariableConverter from .utils import remove_nullable_production, get_productions_d from ..objects.cfg_objects import CFGObject, \ @@ -24,10 +24,6 @@ SUBS_SUFFIX = "#SUBS#" -class NotParsableException(Exception): - """When the grammar cannot be parsed (parser not powerful enough)""" - - def is_special_text(text: str) -> bool: """ Check if the input is given an explicit type """ return len(text) > 5 and \ @@ -702,8 +698,6 @@ def get_cnf_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ word = [to_terminal(x) for x in word if x != Epsilon()] - if not word and not self.generate_epsilon(): - raise DerivationDoesNotExist cyk_table = CYKTable(self, word) return cyk_table.get_parse_tree() diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index bec32a2..192b2f8 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -5,11 +5,11 @@ from typing import Dict, List, Set, Iterable, Tuple, Any from .grammar import Grammar -from .parse_tree import ParseTree -from ..objects.cfg_objects import CFGObject, Terminal, Epsilon +from .parse_tree import ParseTree, NotParsableException +from ..objects.cfg_objects import CFGObject, Terminal -ProductionsType = Dict[Tuple[CFGObject, ...], List[CFGObject]] -CYKTableType = Dict[Tuple[int, int], Set["CYKNode"]] +ProductionsDict = Dict[Tuple[CFGObject, ...], List[CFGObject]] +Table = Dict[Tuple[int, int], Set["CYKNode"]] class CYKTable: @@ -26,8 +26,8 @@ class CYKTable: def __init__(self, grammar: Grammar, word: List[Terminal]) -> None: self._normal_form: Grammar = grammar.to_normal_form() self._word: List[Terminal] = word - self._productions_d: ProductionsType = {} - self._cyk_table: CYKTableType = {} + self._productions_d: ProductionsDict = {} + self._cyk_table: Table = {} self._set_productions_by_body() if not self._generates_all_terminals(): self._cyk_table[(0, len(self._word))] = set() @@ -107,8 +107,8 @@ def get_parse_tree(self) -> ParseTree: ------- parse_tree : :class:`~pyformlang.cfg.ParseTree` """ - if not self._word: - return CYKNode(self._normal_form.start_symbol or Epsilon()) + if not self._normal_form.start_symbol: + raise NotParsableException if not self.generate_word(): raise DerivationDoesNotExist root = [ diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index ba78ffe..661cf68 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -2,16 +2,16 @@ from typing import Dict, List, Set, Iterable, Tuple, Hashable -from .cfg import CFG, Production, NotParsableException -from .parse_tree import ParseTree +from .cfg import CFG, Production +from .parse_tree import ParseTree, NotParsableException from .set_queue import SetQueue from .utils import get_productions_d from ..objects.cfg_objects import CFGObject, Epsilon from ..objects.cfg_objects.utils import to_terminal -SetType = Dict[CFGObject, Set[CFGObject]] -TriggersType = Dict[CFGObject, List[CFGObject]] -ParsingTableType = Dict[CFGObject, Dict[CFGObject, List[Production]]] +ParserSet = Dict[CFGObject, Set[CFGObject]] +Triggers = Dict[CFGObject, List[CFGObject]] +ParsingTable = Dict[CFGObject, Dict[CFGObject, List[Production]]] class LLOneParser: @@ -27,7 +27,7 @@ class LLOneParser: def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_first_set(self) -> SetType: + def get_first_set(self) -> ParserSet: """ Used in LL(1) """ # Algorithm from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ @@ -52,7 +52,7 @@ def get_first_set(self) -> SetType: @staticmethod def _get_first_set_production(production: Production, - first_set: SetType) -> Set[CFGObject]: + first_set: ParserSet) -> Set[CFGObject]: first_not_containing_epsilon = 0 first_set_temp = set() for body_component in production.body: @@ -69,9 +69,9 @@ def _get_first_set_production(production: Production, return first_set_temp def _initialize_first_set(self, - triggers: TriggersType) \ - -> Tuple[SetType, SetQueue]: - first_set: SetType = {} + triggers: Triggers) \ + -> Tuple[ParserSet, SetQueue]: + first_set: ParserSet = {} to_process = SetQueue() # Initialization for terminal in self._cfg.terminals: @@ -86,8 +86,8 @@ def _initialize_first_set(self, to_process.append(triggered) return first_set, to_process - def _get_triggers(self) -> TriggersType: - triggers: TriggersType = {} + def _get_triggers(self) -> Triggers: + triggers: Triggers = {} for production in self._cfg.productions: for body_component in production.body: if body_component not in triggers: @@ -95,7 +95,7 @@ def _get_triggers(self) -> TriggersType: triggers[body_component].append(production.head) return triggers - def get_follow_set(self) -> SetType: + def get_follow_set(self) -> ParserSet: """ Get follow set """ first_set = self.get_first_set() triggers = self._get_triggers_follow_set(first_set) @@ -112,8 +112,8 @@ def get_follow_set(self) -> SetType: return follow_set def _initialize_follow_set(self, - first_set: SetType) \ - -> Tuple[SetType, SetQueue]: + first_set: ParserSet) \ + -> Tuple[ParserSet, SetQueue]: to_process = SetQueue() follow_set = {} follow_set[self._cfg.start_symbol] = {"$"} @@ -134,9 +134,9 @@ def _initialize_follow_set(self, return follow_set, to_process def _get_triggers_follow_set(self, - first_set: SetType) \ - -> SetType: - follow_set: SetType = {} + first_set: ParserSet) \ + -> ParserSet: + follow_set: ParserSet = {} for production in self._cfg.productions: if production.head not in follow_set: follow_set[production.head] = set() @@ -150,7 +150,7 @@ def _get_triggers_follow_set(self, follow_set[production.head].add(component) return follow_set - def get_llone_parsing_table(self) -> ParsingTableType: + def get_llone_parsing_table(self) -> ParsingTable: """ Get the LL(1) parsing table From: https://www.slideshare.net/MahbuburRahman273/ll1-parser-in-compilers @@ -165,7 +165,7 @@ def get_llone_parsing_table(self) -> ParsingTableType: nullable_productions.append(production) else: non_nullable_productions.append(production) - llone_parsing_table: ParsingTableType = {} + llone_parsing_table: ParsingTable = {} for production in nullable_productions: if production.head not in llone_parsing_table: llone_parsing_table[production.head] = {} @@ -222,11 +222,13 @@ def get_llone_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: When the word cannot be parsed """ + if not self._cfg.start_symbol: + raise NotParsableException word = [to_terminal(x) for x in word if x != Epsilon()] word.append("$") # type: ignore word = word[::-1] parsing_table = self.get_llone_parsing_table() - parse_tree = ParseTree(self._cfg.start_symbol or Epsilon()) + parse_tree = ParseTree(self._cfg.start_symbol) stack = ["$", parse_tree] while stack: current = stack.pop() diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index 490964b..45f232d 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -112,3 +112,7 @@ def write_as_dot(self, filename: str) -> None: """ write_dot(self.to_networkx(), filename) + + +class NotParsableException(Exception): + """When the grammar cannot be parsed (parser not powerful enough)""" diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index 07c434f..4e3f602 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -2,16 +2,19 @@ A recursive decent parser. """ -from typing import List, Iterable, Tuple, Optional, Hashable, Any +from typing import List, Iterable, Tuple, Optional, Hashable -from .cfg import CFG, NotParsableException -from .parse_tree import ParseTree -from ..objects.cfg_objects import Variable, Terminal, Epsilon +from .cfg import CFG +from .parse_tree import ParseTree, NotParsableException +from ..objects.cfg_objects import CFGObject, Variable, Terminal, Epsilon from ..objects.cfg_objects.utils import to_terminal +ExpansionSymbol = Tuple[CFGObject, ParseTree] +Expansion = List[ExpansionSymbol] -def _get_index_to_extend(current_expansion: List[Any], left: bool) \ - -> Tuple[int, Optional[List[Any]]]: + +def _get_index_to_extend(current_expansion: Expansion, left: bool) \ + -> Tuple[int, Optional[ExpansionSymbol]]: order = enumerate(current_expansion) if not left: order = reversed(list(order)) @@ -59,16 +62,18 @@ def get_parse_tree(self, word: Iterable[Hashable], left: bool = True) \ When the word cannot be parsed """ + if not self._cfg.start_symbol: + raise NotParsableException word = [to_terminal(x) for x in word if x != Epsilon()] - parse_tree = ParseTree(self._cfg.start_symbol or Epsilon()) - starting_expansion = [(self._cfg.start_symbol, parse_tree)] + parse_tree = ParseTree(self._cfg.start_symbol) + starting_expansion = [((CFGObject)(self._cfg.start_symbol), parse_tree)] if self._get_parse_tree_sub(word, starting_expansion, left): return parse_tree raise NotParsableException def _match(self, word: List[Terminal], - current_expansion: List[Any], + current_expansion: Expansion, idx_word: int = 0, idx_current_expansion: int = 0) -> bool: if idx_word == len(word) and \ @@ -91,7 +96,7 @@ def _match(self, def _get_parse_tree_sub(self, word: List[Terminal], - current_expansion: List[Any], + current_expansion: Expansion, left: bool = True) -> bool: if not self._match(word, current_expansion): return False diff --git a/pyformlang/cfg/tests/test_recursive_decent_parser.py b/pyformlang/cfg/tests/test_recursive_decent_parser.py index b727a8e..23384b1 100644 --- a/pyformlang/cfg/tests/test_recursive_decent_parser.py +++ b/pyformlang/cfg/tests/test_recursive_decent_parser.py @@ -1,8 +1,7 @@ # pylint: disable=missing-module-docstring # pylint: disable=missing-class-docstring # pylint: disable=missing-function-docstring -from pyformlang.cfg import CFG, Variable, Terminal -from pyformlang.cfg.cfg import NotParsableException +from pyformlang.cfg import CFG, Variable, Terminal, NotParsableException from pyformlang.cfg.recursive_decent_parser import RecursiveDecentParser import pytest diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index 098ffaf..8993f51 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -1,13 +1,16 @@ """Feature Context-Free Grammar""" -import string -from typing import Iterable, AbstractSet, Hashable -from pyformlang.cfg import CFG, Terminal, Epsilon, Variable, ParseTree -from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS, NotParsableException -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure, FeatureStructuresNotCompatibleException -from pyformlang.fcfg.state import State, StateProcessed +from typing import List, Set, Tuple, AbstractSet, Iterable, Optional, Hashable +from string import ascii_uppercase +from pyformlang.cfg import CFG, CFGObject, \ + Variable, Terminal, Epsilon, ParseTree, Production, NotParsableException +from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS + +from .feature_structure import FeatureStructure, \ + FeatureStructuresNotCompatibleException +from .feature_production import FeatureProduction +from .state import State, StateProcessed from ..objects.cfg_objects.utils import to_terminal @@ -58,18 +61,24 @@ def __init__(self, variables: AbstractSet[Variable] = None, terminals: AbstractSet[Terminal] = None, start_symbol: Variable = None, - productions: Iterable[FeatureProduction] = None): + productions: Iterable[FeatureProduction] = None) -> None: super().__init__(variables, terminals, start_symbol, productions) + self._productions: Set[FeatureProduction] - def __predictor(self, state, chart, processed): + def __predictor(self, + state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: # We have an incomplete state and the next token is a variable # We must ask to process the variable with another rule end_idx = state.positions[1] next_var = state.production.body[state.positions[2]] - for production in self.productions: + for production in self._productions: if production.head == next_var: - new_state = State(production, (end_idx, end_idx, 0), - production.features, ParseTree(production.head)) + new_state = State(production, + (end_idx, end_idx, 0), + production.features, + ParseTree(production.head)) if processed.add(end_idx, new_state): chart[end_idx].append(new_state) @@ -86,6 +95,7 @@ def contains(self, word: Iterable[Hashable]) -> bool: contains : bool Whether word if in the FCFG or not """ + word = [to_terminal(x) for x in word if x != Epsilon()] return self._get_final_state(word) is not None def get_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: @@ -106,20 +116,30 @@ def get_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: NotParsableException When the word is not parsable. """ + word = [to_terminal(x) for x in word if x != Epsilon()] final_state = self._get_final_state(word) if final_state is None: - raise NotParsableException() + raise NotParsableException return final_state.parse_tree - def _get_final_state(self, word: Iterable[Hashable]): - word = [to_terminal(x) for x in word if x != Epsilon()] - chart = [[] for _ in range(len(word) + 1)] - # Processed[i] contains all production rule that are currently working until i. + def _get_final_state(self, word: List[Terminal]) -> Optional[State]: + chart: List[List[State]] = [[] for _ in range(len(word) + 1)] + # Processed[i] contains all production rule \ + # that are currently working until i. processed = StateProcessed(len(word) + 1) gamma = Variable("Gamma") - dummy_rule = FeatureProduction(gamma, [self.start_symbol], FeatureStructure(), [FeatureStructure()]) + production_body: List[CFGObject] = [] + if self.start_symbol is not None: + production_body.append(self.start_symbol) + dummy_rule = FeatureProduction(gamma, + production_body, + FeatureStructure(), + [FeatureStructure()]) # State = (rule, [begin, end, dot position, diag) - first_state = State(dummy_rule, (0, 0, 0), dummy_rule.features, ParseTree("BEGIN")) + first_state = State(dummy_rule, + (0, 0, 0), + dummy_rule.features, + ParseTree(Variable("BEGIN"))) chart[0].append(first_state) processed.add(0, first_state) for i in range(len(chart) - 1): @@ -128,7 +148,7 @@ def _get_final_state(self, word: Iterable[Hashable]): if state.is_incomplete() and state.next_is_variable(): self.__predictor(state, chart, processed) elif state.is_incomplete(): - if state.next_is_word(word[i]): + if state.next_is_symbol(word[i]): _scanner(state, chart, processed) else: _completer(state, chart, processed) @@ -137,19 +157,26 @@ def _get_final_state(self, word: Iterable[Hashable]): if not state.is_incomplete(): _completer(state, chart, processed) for state in processed.generator(len(word)): - if state.positions[0] == 0 and not state.is_incomplete() and state.production.head == self.start_symbol: + if state.positions[0] == 0 \ + and not state.is_incomplete() \ + and state.production.head == self.start_symbol: return state return None @classmethod - def _read_line(cls, line, productions, terminals, variables): + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: structure_variables = {} head_s, body_s = line.split("->") head_text = head_s.strip() if is_special_text(head_text): head_text = head_text[5:-1] head_text, head_conditions = _split_text_conditions(head_text) - head_fs = FeatureStructure.from_text(head_conditions, structure_variables) + head_fs = FeatureStructure.from_text( + head_conditions, structure_variables) head = Variable(head_text) variables.add(head) all_body_fs = [] @@ -161,10 +188,12 @@ def _read_line(cls, line, productions, terminals, variables): body_component = body_component[5:-1] else: type_component = "" - if body_component[0] in string.ascii_uppercase or \ + if body_component[0] in ascii_uppercase or \ type_component == "VAR": - body_component, body_conditions = _split_text_conditions(body_component) - body_fs = FeatureStructure.from_text(body_conditions, structure_variables) + body_component, body_conditions = \ + _split_text_conditions(body_component) + body_fs = FeatureStructure.from_text( + body_conditions, structure_variables) all_body_fs.append(body_fs) body_var = Variable(body_component) variables.add(body_var) @@ -179,7 +208,7 @@ def _read_line(cls, line, productions, terminals, variables): productions.add(production) -def _split_text_conditions(head_text): +def _split_text_conditions(head_text: str) -> Tuple[str, str]: if head_text[-1] != "]": return head_text, "" idx = head_text.find("[") @@ -188,36 +217,48 @@ def _split_text_conditions(head_text): return head_text[:idx], head_text[idx+1:-1] -def _scanner(state, chart, processed): +def _scanner(state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: # We have an incomplete state and the next token is the word given as input # We move the end token and the dot token by one. end_idx = state.positions[1] - state.parse_tree.sons.append(ParseTree(state.production.body[state.positions[2]])) - new_state = State(state.production, (state.positions[0], end_idx + 1, state.positions[2] + 1), - state.feature_stucture, state.parse_tree) + state.parse_tree.sons.append( + ParseTree(state.production.body[state.positions[2]])) + new_state = State(state.production, + (state.positions[0], end_idx + 1, state.positions[2] + 1), + state.feature_stucture, + state.parse_tree) if processed.add(end_idx + 1, new_state): chart[end_idx + 1].append(new_state) -def _completer(state, chart, processed): - # We have a complete state. We must check if it helps to move another state forward. +def _completer(state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: + # We have a complete state. + # We must check if it helps to move another state forward. begin_idx = state.positions[0] head = state.production.head for next_state in processed.generator(begin_idx): # next_state[1][1] == begin_idx always true - if next_state.is_incomplete() and next_state.production.body[next_state.positions[2]] == head: + if next_state.is_incomplete() \ + and next_state.production.body[next_state.positions[2]] == head: try: copy_left = state.feature_stucture.copy() copy_left = copy_left.get_feature_by_path(["head"]) copy_right = next_state.feature_stucture.copy() - copy_right_considered = copy_right.get_feature_by_path([str(next_state.positions[2])]) + copy_right_considered = copy_right.get_feature_by_path( + [str(next_state.positions[2])]) copy_right_considered.unify(copy_left) except FeatureStructuresNotCompatibleException: continue parse_tree = next_state.parse_tree parse_tree.sons.append(state.parse_tree) new_state = State(next_state.production, - (next_state.positions[0], state.positions[1], next_state.positions[2] + 1), + (next_state.positions[0], + state.positions[1], + next_state.positions[2] + 1), copy_right, parse_tree) if processed.add(state.positions[1], new_state): chart[state.positions[1]].append(new_state) diff --git a/pyformlang/fcfg/feature_production.py b/pyformlang/fcfg/feature_production.py index 59b187e..19c7b1b 100644 --- a/pyformlang/fcfg/feature_production.py +++ b/pyformlang/fcfg/feature_production.py @@ -1,8 +1,10 @@ """Production rules with features""" -from typing import List + +from typing import List, Iterable from pyformlang.cfg import CFGObject, Variable, Production -from pyformlang.fcfg.feature_structure import FeatureStructure + +from .feature_structure import FeatureStructure class FeatureProduction(Production): @@ -17,10 +19,16 @@ class FeatureProduction(Production): head_feature : :class:`~pyformlang.fcfg.FeatureStructure` The feature structure of the head body_features : Iterable of :class:`~pyformlang.fcfg.FeatureStructure` - The feature structures of the elements of the body. Must be the same size as the body. + The feature structures of the elements of the body. + Must be the same size as the body. """ - def __init__(self, head: Variable, body: List[CFGObject], head_feature, body_features, filtering=True): + def __init__(self, + head: Variable, + body: List[CFGObject], + head_feature: FeatureStructure, + body_features: Iterable[FeatureStructure], + filtering: bool = True) -> None: super().__init__(head, body, filtering) self._features = FeatureStructure() self._features.add_content("head", head_feature) @@ -28,11 +36,11 @@ def __init__(self, head: Variable, body: List[CFGObject], head_feature, body_fea self._features.add_content(str(i), feature_structure) @property - def features(self): + def features(self) -> FeatureStructure: """The merged features of the production rules""" return self._features - def __repr__(self): + def __repr__(self) -> str: res = [self.head.to_text()] cond_head = str(self._features.get_feature_by_path(["head"])) if cond_head: diff --git a/pyformlang/fcfg/feature_structure.py b/pyformlang/fcfg/feature_structure.py index b116de0..7dde5a9 100644 --- a/pyformlang/fcfg/feature_structure.py +++ b/pyformlang/fcfg/feature_structure.py @@ -1,5 +1,6 @@ """Feature Structure""" -from typing import Any, List, Dict + +from typing import Dict, List, Iterable, Tuple, Optional, Any class ContentAlreadyExistsException(Exception): @@ -24,18 +25,21 @@ class FeatureStructure: """ - def __init__(self, value=None): - self._content = {} + def __init__(self, value: Any = None) -> None: + self._content: Dict[str, FeatureStructure] = {} self._value = value - self._pointer = None + self._pointer: Optional[FeatureStructure] = None - def copy(self, already_copied=None): + def copy(self, already_copied: Dict["FeatureStructure", + "FeatureStructure"] = None) \ + -> "FeatureStructure": """Copies the current feature structure Parameters ---------- already_copied : dict - A dictionary containing the parts already copied. For internal usage. + A dictionary containing the parts already copied. + For internal usage. Returns ---------- @@ -56,32 +60,33 @@ def copy(self, already_copied=None): return new_fs @property - def content(self) -> Any: + def content(self) -> Dict[str, "FeatureStructure"]: """Gets the content of the current node""" return self._content @property - def pointer(self) -> Any: + def pointer(self) -> Optional["FeatureStructure"]: """Gets the pointer of the current node""" return self._pointer @pointer.setter - def pointer(self, new_pointer): + def pointer(self, new_pointer: "FeatureStructure") -> None: """Set the value of the pointer""" self._pointer = new_pointer - @property def value(self) -> Any: """Gets the value associated to the current node""" return self._value if self.pointer is None else self.pointer.value @value.setter - def value(self, new_value) -> Any: + def value(self, new_value: Any) -> None: """Gets the value associated to the current node""" self._value = new_value - def add_content(self, content_name: str, feature_structure: "FeatureStructure"): + def add_content(self, + content_name: str, + feature_structure: "FeatureStructure") -> None: """Add content to the current feature structure. Parameters @@ -100,7 +105,10 @@ def add_content(self, content_name: str, feature_structure: "FeatureStructure"): raise ContentAlreadyExistsException() self._content[content_name] = feature_structure - def add_content_path(self, content_name: str, feature_structure: "FeatureStructure", path: List[str]): + def add_content_path(self, + content_name: str, + feature_structure: "FeatureStructure", + path: List[str]) -> None: """Add content to the current feature structure at a specific path Parameters @@ -122,16 +130,20 @@ def add_content_path(self, content_name: str, feature_structure: "FeatureStructu to_modify = self.get_feature_by_path(path) to_modify.add_content(content_name, feature_structure) - def get_dereferenced(self): - """Get the dereferences version of the feature structure. For internal usage.""" - return self._pointer.get_dereferenced() if self._pointer is not None else self + def get_dereferenced(self) -> "FeatureStructure": + """ + Get the dereferences version of the feature structure. + For internal usage. + """ + return self._pointer.get_dereferenced() \ + if self._pointer is not None else self - def get_feature_by_path(self, path: List[str] = None): + def get_feature_by_path(self, path: List[str] = None) -> "FeatureStructure": """ Get a feature at a given path. Parameters ----------- - path : Iterable of str, optional + path : List of str, optional The path to the new feature. Returns @@ -152,7 +164,7 @@ def get_feature_by_path(self, path: List[str] = None): raise PathDoesNotExistsException() return current.content[path[0]].get_feature_by_path(path[1:]) - def unify(self, other: "FeatureStructure"): + def unify(self, other: "FeatureStructure") -> None: """Unify the current structure with another one. Modifies the current structure. @@ -171,7 +183,8 @@ def unify(self, other: "FeatureStructure"): other_dereferenced = other.get_dereferenced() if current_dereferenced == other_dereferenced: return - if len(current_dereferenced.content) == 0 and len(other_dereferenced.content) == 0: + if len(current_dereferenced.content) == 0 \ + and len(other_dereferenced.content) == 0: # We have a simple feature if current_dereferenced.value == other_dereferenced.value: current_dereferenced.pointer = other_dereferenced @@ -186,9 +199,10 @@ def unify(self, other: "FeatureStructure"): for feature in other_dereferenced.content: if feature not in current_dereferenced.content: current_dereferenced.content[feature] = FeatureStructure() - current_dereferenced.content[feature].unify(other_dereferenced.content[feature]) + current_dereferenced.content[feature].unify( + other_dereferenced.content[feature]) - def subsumes(self, other: "FeatureStructure"): + def subsumes(self, other: "FeatureStructure") -> bool: """Check whether the current feature structure subsumes another one. Parameters @@ -208,16 +222,17 @@ def subsumes(self, other: "FeatureStructure"): for feature in current_dereferenced.content: if feature not in other_dereferenced.content: return False - if not current_dereferenced.content[feature].subsumes(other_dereferenced.content[feature]): + if not current_dereferenced.content[feature].subsumes( + other_dereferenced.content[feature]): return False return True - def get_all_paths(self): + def get_all_paths(self) -> List[List[str]]: """ Get the list of all path in the feature structure Returns -------- - paths : Iterable of :class:`~pyformlang.fcfg.FeatureStructure` + paths : List of string lists The paths """ @@ -230,7 +245,7 @@ def get_all_paths(self): res.append([]) return res - def __repr__(self): + def __repr__(self) -> str: res = [] for path in self.get_all_paths(): if path: @@ -242,14 +257,18 @@ def __repr__(self): return " | ".join(res) @classmethod - def from_text(cls, text: str, structure_variables: Dict[str, "FeatureStructure"] = None): + def from_text(cls, + text: str, + structure_variables: Dict[str, "FeatureStructure"] = None) \ + -> "FeatureStructure": """ Construct a feature structure from a text. Parameters ----------- text : str The text to parse - structure_variables : dict of (str, :class:`~pyformlang.fcfg.FeatureStructure`), optional + structure_variables : \ + dict of (str, :class:`~pyformlang.fcfg.FeatureStructure`), optional Existing structure variables. Returns @@ -261,10 +280,14 @@ def from_text(cls, text: str, structure_variables: Dict[str, "FeatureStructure"] if structure_variables is None: structure_variables = {} preprocessed_conditions = _preprocess_conditions(text) - return _create_feature_structure(preprocessed_conditions, structure_variables) + return _create_feature_structure( + preprocessed_conditions, structure_variables) -def _find_closing_bracket(condition, start, opening="[", closing="]"): +def _find_closing_bracket(condition: str, + start: int, + opening: str = "[", + closing: str = "]") -> int: counter = 0 pos = start for current_char in condition[start:]: @@ -282,7 +305,9 @@ class ParsingException(Exception): """When there is a problem during parsing.""" -def _preprocess_conditions(conditions, start=0, end=-1): +def _preprocess_conditions(conditions: str, + start: int = 0, + end: int = -1) -> List[Tuple[str, str, str]]: conditions = conditions.replace("->", "=") conditions = conditions.strip() res = [] @@ -304,7 +329,8 @@ def _preprocess_conditions(conditions, start=0, end=-1): end_bracket = _find_closing_bracket(conditions, pos) if end_bracket == -1: raise ParsingException() - current_value = _preprocess_conditions(conditions, pos + 1, end_bracket) + current_value = _preprocess_conditions( + conditions, pos + 1, end_bracket) pos = end_bracket + 1 elif current == "(": end_bracket = _find_closing_bracket(conditions, pos, "(", ")") @@ -322,7 +348,7 @@ def _preprocess_conditions(conditions, start=0, end=-1): reference = None pos += 1 else: - current_value += current + current_value += current # type: ignore pos += 1 if current_feature.strip(): if isinstance(current_value, str): @@ -331,7 +357,12 @@ def _preprocess_conditions(conditions, start=0, end=-1): return res -def _create_feature_structure(conditions, structure_variables, existing_references=None, feature_structure=None): +def _create_feature_structure( + conditions: Iterable[Tuple[str, str, str]], + structure_variables: Dict[str, FeatureStructure], + existing_references: Dict[str, FeatureStructure] = None, + feature_structure: FeatureStructure = None) \ + -> FeatureStructure: if existing_references is None: existing_references = {} if feature_structure is None: @@ -354,7 +385,8 @@ def _create_feature_structure(conditions, structure_variables, existing_referenc feature_structure.add_content(feature, new_fs) structure_variables[value[1:]] = new_fs elif not isinstance(value, str): - structure = _create_feature_structure(value, structure_variables, existing_references, new_fs) + structure = _create_feature_structure( + value, structure_variables, existing_references, new_fs) feature_structure.add_content(feature, structure) else: feature_structure.add_content(feature, new_fs) diff --git a/pyformlang/fcfg/state.py b/pyformlang/fcfg/state.py index a16f9b4..b7768b2 100644 --- a/pyformlang/fcfg/state.py +++ b/pyformlang/fcfg/state.py @@ -1,9 +1,15 @@ """Internal usage states""" -from typing import Tuple -from pyformlang.cfg import Variable, ParseTree -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure +from typing import Dict, List, Iterable, Tuple + +from pyformlang.cfg import Variable, Terminal, ParseTree + +from .feature_structure import FeatureStructure +from .feature_production import FeatureProduction + +Positions = Tuple[int, int, int] +StateKey = Tuple[FeatureProduction, Positions] +ProcessedStates = List[Dict[StateKey, List["State"]]] class State: @@ -11,39 +17,42 @@ class State: def __init__(self, production: FeatureProduction, - positions: Tuple[int, int, int], + positions: Positions, feature_stucture: FeatureStructure, - parse_tree: ParseTree): + parse_tree: ParseTree) -> None: self.production = production self.positions = positions self.feature_stucture = feature_stucture self.parse_tree = parse_tree - def get_key(self): + def get_key(self) -> StateKey: """Get the key of the state""" return self.production, self.positions - def is_incomplete(self): + def is_incomplete(self) -> bool: """Check if a state is incomplete""" return self.positions[2] < len(self.production.body) - def next_is_variable(self): + def next_is_variable(self) -> bool: """Check if the next symbol to process is a variable""" return isinstance(self.production.body[self.positions[2]], Variable) - def next_is_word(self, word): + def next_is_symbol(self, symbol: Terminal) -> bool: """Check if the next symbol matches a given word""" - return self.production.body[self.positions[2]] == word + return self.production.body[self.positions[2]] == symbol class StateProcessed: """For internal usage""" - def __init__(self, size: int): - self.processed = [{} for _ in range(size)] + def __init__(self, size: int) -> None: + self.processed: ProcessedStates = [{} for _ in range(size)] - def add(self, i: int, element: State): - """Add a state to the processed states. Returns if the insertion was successful or not.""" + def add(self, i: int, element: State) -> bool: + """ + Add a state to the processed states. + Returns if the insertion was successful or not. + """ key = element.get_key() if key not in self.processed[i]: self.processed[i][key] = [] @@ -53,8 +62,7 @@ def add(self, i: int, element: State): self.processed[i][key].append(element) return True - def generator(self, i: int): + def generator(self, i: int) -> Iterable[State]: """Generates a collection of all the states at a given position""" for states in self.processed[i].values(): - for state in states: - yield state + yield from states diff --git a/pyformlang/fcfg/tests/test_fcfg.py b/pyformlang/fcfg/tests/test_fcfg.py index 195e364..fa817e3 100644 --- a/pyformlang/fcfg/tests/test_fcfg.py +++ b/pyformlang/fcfg/tests/test_fcfg.py @@ -1,7 +1,6 @@ """Test a FCFG""" -from pyformlang.cfg import Variable, Terminal -from pyformlang.cfg.cfg import NotParsableException +from pyformlang.cfg import Variable, Terminal, NotParsableException from pyformlang.cfg.parse_tree import ParseTree from pyformlang.fcfg.fcfg import FCFG from pyformlang.fcfg.feature_production import FeatureProduction @@ -182,9 +181,15 @@ def test_state(self): """Test functions on states""" fs1 = FeatureStructure() fs1.add_content("NUMBER", FeatureStructure("sg")) - state0 = State(FeatureProduction(Variable("S"), [], fs1, []), (0, 0, 0), fs1, ParseTree("S")) + state0 = State(FeatureProduction(Variable("S"), [], fs1, []), + (0, 0, 0), + fs1, + ParseTree(Variable("S"))) processed = StateProcessed(1) - state1 = State(FeatureProduction(Variable("S"), [], fs1, []), (0, 0, 0), fs1, ParseTree("S")) + state1 = State(FeatureProduction(Variable("S"), [], fs1, []), + (0, 0, 0), + fs1, + ParseTree(Variable("S"))) assert processed.add(0, state0) assert not processed.add(0, state1) diff --git a/pyformlang/regular_expression/__init__.py b/pyformlang/regular_expression/__init__.py index 04f3454..5b8e6fe 100644 --- a/pyformlang/regular_expression/__init__.py +++ b/pyformlang/regular_expression/__init__.py @@ -20,8 +20,7 @@ """ from .regex import Regex -from .python_regex import PythonRegex -from ..objects.regex_objects import MisformedRegexError +from .python_regex import PythonRegex, MisformedRegexError __all__ = ["Regex", "PythonRegex", "MisformedRegexError"] From 4dc06ce68764400ae123e485471bf84992c78d7a Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 16:52:09 +0300 Subject: [PATCH 16/30] correct parsing with cyk table --- pyformlang/cfg/cyk_table.py | 5 +++++ pyformlang/cfg/grammar.py | 5 +++++ pyformlang/cfg/recursive_decent_parser.py | 2 +- pyformlang/pda/pda.py | 16 ++++++++-------- pyformlang/pda/transition_function.py | 17 ++++++++++------- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 192b2f8..24abae4 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -25,6 +25,7 @@ class CYKTable: def __init__(self, grammar: Grammar, word: List[Terminal]) -> None: self._normal_form: Grammar = grammar.to_normal_form() + self._generate_epsilon: bool = grammar.generate_epsilon() self._word: List[Terminal] = word self._productions_d: ProductionsDict = {} self._cyk_table: Table = {} @@ -89,6 +90,8 @@ def generate_word(self) -> bool: is_generated : bool """ + if not self._word: + return self._generate_epsilon return self._normal_form.start_symbol \ in self._cyk_table[(0, len(self._word))] @@ -111,6 +114,8 @@ def get_parse_tree(self) -> ParseTree: raise NotParsableException if not self.generate_word(): raise DerivationDoesNotExist + if not self._word: + return ParseTree(self._normal_form.start_symbol) root = [ x for x in self._cyk_table[(0, len(self._word))] diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index ac2aa3b..97e569b 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -68,3 +68,8 @@ def to_normal_form(self) -> "Grammar": def is_normal_form(self) -> bool: """ Whether the grammar is in normal form """ raise NotImplementedError + + @abstractmethod + def generate_epsilon(self) -> bool: + """ Whether the grammar generates epsilon or not """ + raise NotImplementedError diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index 4e3f602..ad6e43c 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -66,7 +66,7 @@ def get_parse_tree(self, word: Iterable[Hashable], left: bool = True) \ raise NotParsableException word = [to_terminal(x) for x in word if x != Epsilon()] parse_tree = ParseTree(self._cfg.start_symbol) - starting_expansion = [((CFGObject)(self._cfg.start_symbol), parse_tree)] + starting_expansion: Expansion = [(self._cfg.start_symbol, parse_tree)] if self._get_parse_tree_sub(word, starting_expansion, left): return parse_tree raise NotParsableException diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 84afffd..3ce11db 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -519,14 +519,14 @@ def from_cfg(cls, cfg: CFG) -> "PDA": The equivalent PDA when accepting on empty stack """ state = State("q") - pda_object_converter = PDASymbolConverter(cfg.terminals, cfg.variables) - input_symbols = {pda_object_converter.get_symbol_from(x) + pda_symbol_converter = PDASymbolConverter(cfg.terminals, cfg.variables) + input_symbols = {pda_symbol_converter.get_symbol_from(x) for x in cfg.terminals} - stack_alphabet = {pda_object_converter.get_stack_symbol_from(x) + stack_alphabet = {pda_symbol_converter.get_stack_symbol_from(x) for x in cfg.terminals.union(cfg.variables)} start_stack_symbol = None if cfg.start_symbol: - start_stack_symbol = pda_object_converter.get_stack_symbol_from( + start_stack_symbol = pda_symbol_converter.get_stack_symbol_from( cfg.start_symbol) new_pda = PDA(states={state}, input_symbols=input_symbols, @@ -535,16 +535,16 @@ def from_cfg(cls, cfg: CFG) -> "PDA": start_stack_symbol=start_stack_symbol) for production in cfg.productions: new_pda.add_transition(state, PDAEpsilon(), - pda_object_converter.get_stack_symbol_from( + pda_symbol_converter.get_stack_symbol_from( production.head), state, - [pda_object_converter.get_stack_symbol_from( + [pda_symbol_converter.get_stack_symbol_from( x) for x in production.body]) for terminal in cfg.terminals: new_pda.add_transition(state, - pda_object_converter.get_symbol_from( + pda_symbol_converter.get_symbol_from( terminal), - pda_object_converter.get_stack_symbol_from( + pda_symbol_converter.get_stack_symbol_from( terminal), state, []) return new_pda diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index f1d62e5..c8d3743 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -67,21 +67,24 @@ def copy(self) -> "TransitionFunction": new_tf = TransitionFunction() for temp_in, transition in self._transitions.items(): for temp_out in transition: - new_tf.add_transition(temp_in[0], temp_in[1], temp_in[2], - *temp_out) + new_tf.add_transition(*temp_in, *temp_out) return new_tf - def __iter__(self) -> Iterator[Transition]: - for key, values in self._transitions.items(): - for value in values: - yield key, value - def __call__(self, s_from: State, input_symbol: Symbol, stack_from: StackSymbol) -> TransitionValues: return self._transitions.get((s_from, input_symbol, stack_from), set()) + def __contains__(self, transition: Transition) -> bool: + key, value = transition + return value in self(*key) + + def __iter__(self) -> Iterator[Transition]: + for key, values in self._transitions.items(): + for value in values: + yield key, value + def to_dict(self) -> Dict[TransitionKey, TransitionValues]: """Get the dictionary representation of the transitions""" return deepcopy(self._transitions) From 291d9d03477d35773763ee1434afffbe9ae4df88 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 18:24:57 +0300 Subject: [PATCH 17/30] generalize some cfg methods, make from_text method generic to use in fcfg --- pyformlang/cfg/__init__.py | 11 +- pyformlang/cfg/cfg.py | 70 ------------ pyformlang/cfg/grammar.py | 107 ++++++++++++++++-- .../cfg/tests/test_recursive_decent_parser.py | 5 +- pyformlang/fcfg/__init__.py | 7 +- pyformlang/fcfg/fcfg.py | 3 +- pyformlang/fcfg/tests/test_fcfg.py | 3 +- 7 files changed, 116 insertions(+), 90 deletions(-) diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 411cc50..d5b89db 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -21,18 +21,15 @@ """ from .cfg import CFG, CFGObject, Variable, Terminal, Epsilon, Production -from .parse_tree import ParseTree, NotParsableException -from .llone_parser import LLOneParser -from .recursive_decent_parser import RecursiveDecentParser +from .parse_tree import ParseTree +from .cyk_table import DerivationDoesNotExist __all__ = ["CFGObject", "Variable", "Terminal", + "Epsilon", "Production", "CFG", - "Epsilon", "ParseTree", - "LLOneParser", - "RecursiveDecentParser", - "NotParsableException"] + "DerivationDoesNotExist"] diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 99ecc45..aa669ab 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -928,64 +928,6 @@ def is_finite(self) -> bool: return True return False - def to_text(self) -> str: - """ - Turns the grammar into its string representation. This might lose some\ - type information and the start_symbol. - Returns - ------- - text : str - The grammar as a string. - """ - res = [] - for production in self._productions: - res.append(str(production.head) + " -> " + - " ".join([x.to_text() for x in production.body])) - return "\n".join(res) + "\n" - - @classmethod - def from_text(cls, - text: str, - start_symbol: Optional[Hashable] = Variable("S")) -> "CFG": - """ - Read a context free grammar from a text. - The text contains one rule per line. - The structure of a production is: - head -> body1 | body2 | ... | bodyn - where | separates the bodies. - A variable (or non terminal) begins by a capital letter. - A terminal begins by a non-capital character - Terminals and Variables are separated by spaces. - An epsilon symbol can be represented by epsilon, $, ε, ϵ or Є. - If you want to have a variable name starting with a non-capital \ - letter or a terminal starting with a capital letter, you can \ - explicitly give the type of your symbol with "VAR:yourVariableName" \ - or "TER:yourTerminalName" (with the quotation marks). For example: - S -> "TER:John" "VAR:d" a b - - Parameters - ---------- - text : str - The text of transform - start_symbol : str, optional - The start symbol, S by default - - Returns - ------- - cfg : :class:`~pyformlang.cfg.CFG` - A context free grammar. - """ - variables = set() - productions = set() - terminals = set() - for line in text.splitlines(): - line = line.strip() - if not line: - continue - cls._read_line(line, productions, terminals, variables) - return cls(variables=variables, terminals=terminals, - productions=productions, start_symbol=start_symbol) - @classmethod def _read_line(cls, line: str, @@ -1017,15 +959,3 @@ def _read_line(cls, terminals.add(body_ter) body.append(body_ter) productions.add(Production(head, body)) - - def is_normal_form(self) -> bool: - """ - Tells is the current grammar is in Chomsky Normal Form or not - - Returns - ------- - is_normal_form : bool - If the current grammar is in CNF - """ - return all( - production.is_normal_form() for production in self._productions) diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index 97e569b..fd8eda1 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -1,15 +1,22 @@ """ Basic grammar representation """ -from typing import Set, Optional +from typing import Set, AbstractSet, Iterable, Optional, Hashable, TypeVar, Type from abc import abstractmethod from ..objects.cfg_objects import Variable, Terminal, Production +GrammarT = TypeVar("GrammarT", bound="Grammar") + class Grammar: """ Basic grammar representation """ - def __init__(self) -> None: + @abstractmethod + def __init__(self, + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, + productions: Iterable[Production] = None) -> None: self._variables: Set[Variable] self._terminals: Set[Terminal] self._start_symbol: Optional[Variable] @@ -60,16 +67,100 @@ def start_symbol(self) -> Optional[Variable]: return self._start_symbol @abstractmethod - def to_normal_form(self) -> "Grammar": - """ Gets some normal form of the grammar""" + def generate_epsilon(self) -> bool: + """ Whether the grammar generates epsilon or not """ raise NotImplementedError @abstractmethod - def is_normal_form(self) -> bool: - """ Whether the grammar is in normal form """ + def to_normal_form(self) -> "Grammar": + """ Gets Chomsky normal form of the grammar """ raise NotImplementedError + def is_normal_form(self) -> bool: + """ + Whether the current grammar is in Chomsky Normal Form + + Returns + ------- + is_normal_form : bool + If the current grammar is in CNF + """ + return all( + production.is_normal_form() for production in self._productions) + + def to_text(self) -> str: + """ + Turns the grammar into its string representation. This might lose some\ + type information and the start_symbol. + Returns + ------- + text : str + The grammar as a string. + """ + res = [] + for production in self._productions: + res.append(str(production.head) + " -> " + + " ".join([x.to_text() for x in production.body])) + return "\n".join(res) + "\n" + + @classmethod + def from_text( + cls: Type[GrammarT], + text: str, + start_symbol: Optional[Hashable] = Variable("S")) \ + -> GrammarT: + """ + Read a context free grammar from a text. + The text contains one rule per line. + The structure of a production is: + head -> body1 | body2 | ... | bodyn + where | separates the bodies. + A variable (or non terminal) begins by a capital letter. + A terminal begins by a non-capital character + Terminals and Variables are separated by spaces. + An epsilon symbol can be represented by epsilon, $, ε, ϵ or Є. + If you want to have a variable name starting with a non-capital \ + letter or a terminal starting with a capital letter, you can \ + explicitly give the type of your symbol with "VAR:yourVariableName" \ + or "TER:yourTerminalName" (with the quotation marks). For example: + S -> "TER:John" "VAR:d" a b + + Parameters + ---------- + text : str + The text of transform + start_symbol : str, optional + The start symbol, S by default + + Returns + ------- + cfg : :class:`~pyformlang.cfg.CFG` + A context free grammar. + """ + variables = set() + productions = set() + terminals = set() + cls._read_text(text, productions, terminals, variables) + return cls(variables=variables, terminals=terminals, + productions=productions, start_symbol=start_symbol) + + @classmethod + def _read_text(cls, + text: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: + for line in text.splitlines(): + line = line.strip() + if not line: + continue + cls._read_line(line, productions, terminals, variables) + + @classmethod @abstractmethod - def generate_epsilon(self) -> bool: - """ Whether the grammar generates epsilon or not """ + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: raise NotImplementedError diff --git a/pyformlang/cfg/tests/test_recursive_decent_parser.py b/pyformlang/cfg/tests/test_recursive_decent_parser.py index 23384b1..18ce9e2 100644 --- a/pyformlang/cfg/tests/test_recursive_decent_parser.py +++ b/pyformlang/cfg/tests/test_recursive_decent_parser.py @@ -1,8 +1,9 @@ # pylint: disable=missing-module-docstring # pylint: disable=missing-class-docstring # pylint: disable=missing-function-docstring -from pyformlang.cfg import CFG, Variable, Terminal, NotParsableException -from pyformlang.cfg.recursive_decent_parser import RecursiveDecentParser +from pyformlang.cfg import CFG, Variable, Terminal +from pyformlang.cfg.recursive_decent_parser import \ + RecursiveDecentParser, NotParsableException import pytest diff --git a/pyformlang/fcfg/__init__.py b/pyformlang/fcfg/__init__.py index fb844e4..66694b5 100644 --- a/pyformlang/fcfg/__init__.py +++ b/pyformlang/fcfg/__init__.py @@ -15,7 +15,7 @@ """ -from .fcfg import FCFG +from .fcfg import FCFG, CFGObject, Variable, Terminal, Epsilon, ParseTree from .feature_production import FeatureProduction from .feature_structure import FeatureStructure, \ ContentAlreadyExistsException, \ @@ -26,6 +26,11 @@ __all__ = ["FCFG", "FeatureStructure", "FeatureProduction", + "CFGObject", + "Variable", + "Terminal", + "Epsilon", + "ParseTree", "ContentAlreadyExistsException", "FeatureStructuresNotCompatibleException", "PathDoesNotExistsException"] diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index 8993f51..80d8040 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -4,8 +4,9 @@ from string import ascii_uppercase from pyformlang.cfg import CFG, CFGObject, \ - Variable, Terminal, Epsilon, ParseTree, Production, NotParsableException + Variable, Terminal, Epsilon, ParseTree, Production from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS +from pyformlang.cfg.llone_parser import NotParsableException from .feature_structure import FeatureStructure, \ FeatureStructuresNotCompatibleException diff --git a/pyformlang/fcfg/tests/test_fcfg.py b/pyformlang/fcfg/tests/test_fcfg.py index fa817e3..e3a0eff 100644 --- a/pyformlang/fcfg/tests/test_fcfg.py +++ b/pyformlang/fcfg/tests/test_fcfg.py @@ -1,7 +1,8 @@ """Test a FCFG""" -from pyformlang.cfg import Variable, Terminal, NotParsableException +from pyformlang.cfg import Variable, Terminal from pyformlang.cfg.parse_tree import ParseTree +from pyformlang.cfg.llone_parser import NotParsableException from pyformlang.fcfg.fcfg import FCFG from pyformlang.fcfg.feature_production import FeatureProduction from pyformlang.fcfg.feature_structure import FeatureStructure From 0e600fc3a7dce56fa1dd6020d61edbded98a9690 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 19:42:28 +0300 Subject: [PATCH 18/30] correct contains method of pda --- pyformlang/pda/__init__.py | 2 ++ pyformlang/pda/pda.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyformlang/pda/__init__.py b/pyformlang/pda/__init__.py index 2eee7d1..201cffc 100644 --- a/pyformlang/pda/__init__.py +++ b/pyformlang/pda/__init__.py @@ -21,10 +21,12 @@ """ from .pda import PDA +from .transition_function import TransitionFunction from ..objects.pda_objects import State, Symbol, StackSymbol, Epsilon __all__ = ["PDA", + "TransitionFunction", "State", "Symbol", "StackSymbol", diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 3ce11db..7cd3318 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -270,7 +270,7 @@ def __contains__(self, transition: InputTransition) -> bool: input_symbol = to_symbol(input_symbol) stack_from = to_stack_symbol(stack_from) s_to = to_state(s_to) - stack_to = [to_stack_symbol(x) for x in stack_to] + stack_to = tuple(to_stack_symbol(x) for x in stack_to) return (s_to, stack_to) in self(s_from, input_symbol, stack_from) def __iter__(self) -> Iterator[Transition]: From 1039177113fa310e4e7edca3fefa5da4424972ce Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 20:22:32 +0300 Subject: [PATCH 19/30] add copying of cfg, correct cfg normal from transformation --- pyformlang/cfg/cfg.py | 49 ++++++++++++------------------ pyformlang/cfg/grammar.py | 20 ++++++++++-- pyformlang/cfg/tests/test_cfg.py | 24 ++++++++++----- pyformlang/fcfg/fcfg.py | 4 +++ pyformlang/fcfg/tests/test_fcfg.py | 47 ++++++++++++++++++---------- 5 files changed, 89 insertions(+), 55 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index aa669ab..c896c8b 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -2,8 +2,7 @@ from string import ascii_uppercase from copy import deepcopy -from typing import Dict, List, Set, AbstractSet, \ - Iterable, Tuple, Optional, Hashable +from typing import Dict, List, Set, AbstractSet, Iterable, Tuple, Hashable from networkx import DiGraph, find_cycle from networkx.exception import NetworkXNoCycle @@ -69,9 +68,6 @@ def __init__(self, self._productions = productions or set() for production in self._productions: self.__initialize_production_in_cfg(production) - self._normal_form: Optional[CFG] = None - self._generating_symbols: Set[CFGObject] = set() - self._nullable_symbols: Set[CFGObject] = set() self._impacts: Dict[CFGObject, List[Tuple[CFGObject, int]]] = {} self._remaining_lists: Dict[CFGObject, List[int]] = {} self._added_impacts: Set[CFGObject] = set() @@ -84,6 +80,10 @@ def __initialize_production_in_cfg(self, production: Production) -> None: elif isinstance(cfg_object, Variable): self._variables.add(cfg_object) + def copy(self) -> "CFG": + """ Copies the Context Free Grammar """ + return CFG._copy_from(self) + def get_generating_symbols(self) -> Set[CFGObject]: """ Gives the objects which are generating in the CFG @@ -92,9 +92,17 @@ def get_generating_symbols(self) -> Set[CFGObject]: generating_symbols : set of :class:`~pyformlang.cfg.CFGObject` The generating symbols of the CFG """ - if not self._generating_symbols: - self._generating_symbols = self._get_generating_or_nullable(False) - return self._generating_symbols + return self._get_generating_or_nullable(False) + + def get_nullable_symbols(self) -> Set[CFGObject]: + """ Gives the objects which are nullable in the CFG + + Returns + ---------- + nullable_symbols : set of :class:`~pyformlang.cfg.CFGObject` + The nullable symbols of the CFG + """ + return self._get_generating_or_nullable(True) def _get_generating_or_nullable(self, nullable: bool = False) \ -> Set[CFGObject]: @@ -236,18 +244,6 @@ def remove_useless_symbols(self) -> "CFG": new_ter = new_ter.intersection(reachables) return CFG(new_var, new_ter, self._start_symbol, productions) - def get_nullable_symbols(self) -> Set[CFGObject]: - """ Gives the objects which are nullable in the CFG - - Returns - ---------- - nullable_symbols : set of :class:`~pyformlang.cfg.CFGObject` - The nullable symbols of the CFG - """ - if not self._nullable_symbols: - self._nullable_symbols = self._get_generating_or_nullable(True) - return self._nullable_symbols - def remove_epsilon(self) -> "CFG": """ Removes the epsilon of a cfg @@ -398,8 +394,6 @@ def to_normal_form(self) -> "CFG": contains the same word as before, except the epsilon word. """ - if self._normal_form is not None: - return self._normal_form nullables = self.get_nullable_symbols() unit_pairs = self.get_unit_pairs() generating = self.get_generating_symbols() @@ -410,23 +404,18 @@ def to_normal_form(self) -> "CFG": len(reachables) != len(self._variables) + len(self._terminals)): if len(self._productions) == 0: - self._normal_form = self return self new_cfg = self.remove_useless_symbols() \ .remove_epsilon() \ .remove_useless_symbols() \ .eliminate_unit_productions() \ .remove_useless_symbols() - cfg = new_cfg.to_normal_form() - self._normal_form = cfg - return cfg + return new_cfg.to_normal_form() # Remove terminals from body new_productions = self._get_productions_with_only_single_terminals() new_productions = self._decompose_productions(new_productions) - cfg = CFG(start_symbol=self._start_symbol, - productions=set(new_productions)) - self._normal_form = cfg - return cfg + return CFG(start_symbol=self._start_symbol, + productions=set(new_productions)) def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": """ Substitutes CFG to terminals in the current CFG diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index fd8eda1..dc32c2d 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -66,6 +66,20 @@ def start_symbol(self) -> Optional[Variable]: """ return self._start_symbol + @abstractmethod + def copy(self: GrammarT) -> GrammarT: + """ Copies the grammar """ + + def __copy__(self: GrammarT) -> GrammarT: + return self.copy() + + @classmethod + def _copy_from(cls: Type[GrammarT], other: GrammarT) -> GrammarT: + return cls(variables=other.variables, + terminals=other.terminals, + productions=other.productions, + start_symbol=other.start_symbol) + @abstractmethod def generate_epsilon(self) -> bool: """ Whether the grammar generates epsilon or not """ @@ -141,8 +155,10 @@ def from_text( productions = set() terminals = set() cls._read_text(text, productions, terminals, variables) - return cls(variables=variables, terminals=terminals, - productions=productions, start_symbol=start_symbol) + return cls(variables=variables, + terminals=terminals, + productions=productions, + start_symbol=start_symbol) @classmethod def _read_text(cls, diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index 361184e..c38d66a 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -225,7 +225,7 @@ def test_cnf(self): assert len(new_cfg.productions) == 41 assert not cfg.is_empty() new_cfg2 = cfg.to_normal_form() - assert new_cfg == new_cfg2 + assert new_cfg.productions == new_cfg2.productions cfg2 = CFG(start_symbol=var_e, productions={Production(var_e, [var_t])}) @@ -837,16 +837,26 @@ def test_to_text_epsilon(self): cfg = CFG.from_text("S -> a S b | a b epsilon") assert cfg.contains(["a", "b"]) + def test_copy(self): + text_example = get_example_text_duplicate() + cfg = CFG.from_text(text_example) + cfg_copy = cfg.copy() + assert cfg.variables == cfg_copy.variables + assert cfg.terminals == cfg_copy.terminals + assert cfg.productions == cfg_copy.productions + assert cfg.start_symbol == cfg_copy.start_symbol + assert cfg is not cfg_copy + def get_example_text_duplicate(): """ Duplicate text """ text = """ - E -> T E’ - E’ -> + T E’ | Є - T -> F T’ - T’ -> * F T’ | Є - F -> ( E ) | id - """ + E -> T E’ + E’ -> + T E’ | Є + T -> F T’ + T’ -> * F T’ | Є + F -> ( E ) | id + """ return text diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index 80d8040..e924a35 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -83,6 +83,10 @@ def __predictor(self, if processed.add(end_idx, new_state): chart[end_idx].append(new_state) + def copy(self) -> "FCFG": + """ Copies the FCFG """ + return FCFG._copy_from(self) + def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar diff --git a/pyformlang/fcfg/tests/test_fcfg.py b/pyformlang/fcfg/tests/test_fcfg.py index e3a0eff..ed5c8d9 100644 --- a/pyformlang/fcfg/tests/test_fcfg.py +++ b/pyformlang/fcfg/tests/test_fcfg.py @@ -10,6 +10,25 @@ import pytest +@pytest.fixture +def fcfg_text() -> str: + return """ + S -> NP[AGREEMENT=?a] VP[AGREEMENT=?a] + S -> Aux[AGREEMENT=?a] NP[AGREEMENT=?a] VP + NP[AGREEMENT=?a] -> Det[AGREEMENT=?a] Nominal[AGREEMENT=?a] + Aux[AGREEMENT=[NUMBER=pl, PERSON=3rd]] -> do + Aux[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> does + Det[AGREEMENT=[NUMBER=sg]] -> this + Det[AGREEMENT=[NUMBER=pl]] -> these + "VAR:VP[AGREEMENT=?a]" -> Verb[AGREEMENT=?a] + Verb[AGREEMENT=[NUMBER=pl]] -> serve + Verb[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> "TER:serves" + Noun[AGREEMENT=[NUMBER=sg]] -> flight + Noun[AGREEMENT=[NUMBER=pl]] -> flights + Nominal[AGREEMENT=?a] -> Noun[AGREEMENT=?a] + """ + + class TestFCFG: """Test a FCFG""" @@ -194,25 +213,21 @@ def test_state(self): assert processed.add(0, state0) assert not processed.add(0, state1) - def test_from_text(self): + def test_from_text(self, fcfg_text: str): """Test containment from a text description""" - fcfg = FCFG.from_text(""" - S -> NP[AGREEMENT=?a] VP[AGREEMENT=?a] - S -> Aux[AGREEMENT=?a] NP[AGREEMENT=?a] VP - NP[AGREEMENT=?a] -> Det[AGREEMENT=?a] Nominal[AGREEMENT=?a] - Aux[AGREEMENT=[NUMBER=pl, PERSON=3rd]] -> do - Aux[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> does - Det[AGREEMENT=[NUMBER=sg]] -> this - Det[AGREEMENT=[NUMBER=pl]] -> these - "VAR:VP[AGREEMENT=?a]" -> Verb[AGREEMENT=?a] - Verb[AGREEMENT=[NUMBER=pl]] -> serve - Verb[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> "TER:serves" - Noun[AGREEMENT=[NUMBER=sg]] -> flight - Noun[AGREEMENT=[NUMBER=pl]] -> flights - Nominal[AGREEMENT=?a] -> Noun[AGREEMENT=?a] - """) + fcfg = FCFG.from_text(fcfg_text) self._sub_tests_contains1(fcfg) parse_tree = fcfg.get_parse_tree(["this", "flight", "serves"]) with pytest.raises(NotParsableException): fcfg.get_parse_tree(["these", "flight", "serves"]) assert "Det" in str(parse_tree) + + def test_copy(self, fcfg_text: str): + """Test copying of FCFG""" + fcfg = FCFG.from_text(fcfg_text) + fcfg_copy = fcfg.copy() + assert fcfg.variables == fcfg_copy.variables + assert fcfg.terminals == fcfg_copy.terminals + assert fcfg.productions == fcfg_copy.productions + assert fcfg.start_symbol == fcfg_copy.start_symbol + assert fcfg is not fcfg_copy From 704a5084edcb27ac7ba3a085138068887a9ae504 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 20:43:48 +0300 Subject: [PATCH 20/30] add pda copying --- pyformlang/cfg/cfg.py | 8 ++--- pyformlang/cfg/grammar.py | 1 + pyformlang/fcfg/fcfg.py | 8 ++--- pyformlang/pda/pda.py | 13 ++++++++ pyformlang/pda/tests/test_pda.py | 43 +++++++++++++++++++-------- pyformlang/pda/transition_function.py | 3 ++ 6 files changed, 56 insertions(+), 20 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index c896c8b..020eb17 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -80,10 +80,6 @@ def __initialize_production_in_cfg(self, production: Production) -> None: elif isinstance(cfg_object, Variable): self._variables.add(cfg_object) - def copy(self) -> "CFG": - """ Copies the Context Free Grammar """ - return CFG._copy_from(self) - def get_generating_symbols(self) -> Set[CFGObject]: """ Gives the objects which are generating in the CFG @@ -917,6 +913,10 @@ def is_finite(self) -> bool: return True return False + def copy(self) -> "CFG": + """ Copies the Context Free Grammar """ + return CFG._copy_from(self) + @classmethod def _read_line(cls, line: str, diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/grammar.py index dc32c2d..2a28e1c 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/grammar.py @@ -69,6 +69,7 @@ def start_symbol(self) -> Optional[Variable]: @abstractmethod def copy(self: GrammarT) -> GrammarT: """ Copies the grammar """ + raise NotImplementedError def __copy__(self: GrammarT) -> GrammarT: return self.copy() diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index e924a35..5b502a7 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -83,10 +83,6 @@ def __predictor(self, if processed.add(end_idx, new_state): chart[end_idx].append(new_state) - def copy(self) -> "FCFG": - """ Copies the FCFG """ - return FCFG._copy_from(self) - def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar @@ -168,6 +164,10 @@ def _get_final_state(self, word: List[Terminal]) -> Optional[State]: return state return None + def copy(self) -> "FCFG": + """ Copies the FCFG """ + return FCFG._copy_from(self) + @classmethod def _read_line(cls, line: str, diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 7cd3318..a33c054 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -756,6 +756,19 @@ def write_as_dot(self, filename: str) -> None: """ write_dot(self.to_networkx(), filename) + def copy(self) -> "PDA": + """ Copies the Push-down Automaton """ + return PDA(self.states, + self.input_symbols, + self.stack_symbols, + self._transition_function.copy(), + self.start_state, + self.start_stack_symbol, + self.final_states) + + def __copy__(self) -> "PDA": + return self.copy() + @staticmethod def __add_start_state_to_graph(graph: MultiDiGraph, state: State) -> None: diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 84e5254..89c81ba 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -1,4 +1,6 @@ """ Tests the PDA """ + +import pytest from os import path from pyformlang.pda import PDA, State, StackSymbol, Symbol, Epsilon @@ -8,6 +10,20 @@ from pyformlang.regular_expression import Regex +@pytest.fixture +def pda_example() -> PDA: + pda = PDA() + pda.add_transitions([ + ("q0", "0", "Z0", "q1", ("Z1", "Z0")), + ("q1", "1", "Z1", "q2", []), + ("q0", "epsilon", "Z1", "q2", []) + ]) + pda.set_start_state("q0") + pda.set_start_stack_symbol("Z0") + pda.add_final_state("q2") + return pda + + class TestPDA: """ Tests the pushdown automata """ @@ -325,19 +341,9 @@ def test_intersection_regex(self): cfg = pda_es.to_cfg() assert not cfg - def test_pda_paper(self): + def test_pda_paper(self, pda_example: PDA): """ Code in the paper """ - pda = PDA() - pda.add_transitions( - [ - ("q0", "0", "Z0", "q1", ("Z1", "Z0")), - ("q1", "1", "Z1", "q2", []), - ("q0", "epsilon", "Z1", "q2", []) - ] - ) - pda.set_start_state("q0") - pda.set_start_stack_symbol("Z0") - pda.add_final_state("q2") + pda = pda_example pda_final_state = pda.to_final_state() assert pda_final_state is not None cfg = pda.to_empty_stack().to_cfg() @@ -352,3 +358,16 @@ def test_pda_paper(self): pda_networkx.write_as_dot("pda.dot") assert cfg.contains(["0", "1"]) assert path.exists("pda.dot") + + def test_copy(self, pda_example: PDA): + """ Tests the copying of PDA """ + pda = pda_example + pda_copy = pda.copy() + assert pda.states == pda_copy.states + assert pda.input_symbols == pda_copy.input_symbols + assert pda.stack_symbols == pda_copy.stack_symbols + assert pda.to_dict() == pda_copy.to_dict() + assert pda.start_state == pda_copy.start_state + assert pda.start_stack_symbol == pda_copy.start_stack_symbol + assert pda.final_states == pda_copy.final_states + assert pda is not pda_copy diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index c8d3743..ff3dcf4 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -70,6 +70,9 @@ def copy(self) -> "TransitionFunction": new_tf.add_transition(*temp_in, *temp_out) return new_tf + def __copy__(self) -> "TransitionFunction": + return self.copy() + def __call__(self, s_from: State, input_symbol: Symbol, From fd9ae41c96317d53dba16dec6ff30dc31e32962d Mon Sep 17 00:00:00 2001 From: bygu4 Date: Tue, 3 Dec 2024 21:07:29 +0300 Subject: [PATCH 21/30] use Hashable in feature_structure --- pyformlang/fcfg/feature_structure.py | 66 ++++++++++++++-------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/pyformlang/fcfg/feature_structure.py b/pyformlang/fcfg/feature_structure.py index 7dde5a9..c7e7651 100644 --- a/pyformlang/fcfg/feature_structure.py +++ b/pyformlang/fcfg/feature_structure.py @@ -1,6 +1,6 @@ """Feature Structure""" -from typing import Dict, List, Iterable, Tuple, Optional, Any +from typing import Dict, List, Iterable, Tuple, Optional, Hashable class ContentAlreadyExistsException(Exception): @@ -25,40 +25,11 @@ class FeatureStructure: """ - def __init__(self, value: Any = None) -> None: + def __init__(self, value: Hashable = None) -> None: self._content: Dict[str, FeatureStructure] = {} self._value = value self._pointer: Optional[FeatureStructure] = None - def copy(self, already_copied: Dict["FeatureStructure", - "FeatureStructure"] = None) \ - -> "FeatureStructure": - """Copies the current feature structure - - Parameters - ---------- - already_copied : dict - A dictionary containing the parts already copied. - For internal usage. - - Returns - ---------- - fs : :class:`~pyformlang.fcfg.FeatureStructure` - The copied feature structure - """ - if already_copied is None: - already_copied = {} - if self in already_copied: - return already_copied[self] - new_fs = FeatureStructure(self.value) - if self._pointer is not None: - pointer_copy = self._pointer.copy(already_copied) - new_fs.pointer = pointer_copy - for feature, content in self._content.items(): - new_fs.content[feature] = content.copy(already_copied) - already_copied[self] = new_fs - return new_fs - @property def content(self) -> Dict[str, "FeatureStructure"]: """Gets the content of the current node""" @@ -75,12 +46,12 @@ def pointer(self, new_pointer: "FeatureStructure") -> None: self._pointer = new_pointer @property - def value(self) -> Any: + def value(self) -> Hashable: """Gets the value associated to the current node""" return self._value if self.pointer is None else self.pointer.value @value.setter - def value(self, new_value: Any) -> None: + def value(self, new_value: Hashable) -> None: """Gets the value associated to the current node""" self._value = new_value @@ -256,6 +227,35 @@ def __repr__(self) -> str: res.append(".".join(path) + "=" + str(value)) return " | ".join(res) + def copy(self, already_copied: Dict["FeatureStructure", + "FeatureStructure"] = None) \ + -> "FeatureStructure": + """Copies the current feature structure + + Parameters + ---------- + already_copied : dict + A dictionary containing the parts already copied. + For internal usage. + + Returns + ---------- + fs : :class:`~pyformlang.fcfg.FeatureStructure` + The copied feature structure + """ + if already_copied is None: + already_copied = {} + if self in already_copied: + return already_copied[self] + new_fs = FeatureStructure(self.value) + if self._pointer is not None: + pointer_copy = self._pointer.copy(already_copied) + new_fs.pointer = pointer_copy + for feature, content in self._content.items(): + new_fs.content[feature] = content.copy(already_copied) + already_copied[self] = new_fs + return new_fs + @classmethod def from_text(cls, text: str, From b73cae42efc5f13fbe7f8b8249a593c0c34eb486 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 4 Dec 2024 00:05:10 +0300 Subject: [PATCH 22/30] correct epsilon checks --- pyformlang/cfg/cfg.py | 2 -- pyformlang/finite_automaton/tests/test_epsilon.py | 1 + pyformlang/objects/cfg_objects/epsilon.py | 3 ++- pyformlang/objects/cfg_objects/utils.py | 4 ++-- pyformlang/objects/finite_automaton_objects/epsilon.py | 6 ++++-- pyformlang/objects/finite_automaton_objects/utils.py | 4 ++-- pyformlang/objects/pda_objects/epsilon.py | 3 ++- pyformlang/objects/pda_objects/utils.py | 6 +++--- 8 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 020eb17..7584713 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -659,8 +659,6 @@ def contains(self, word: Iterable[Hashable]) -> bool: """ # Remove epsilons word = [to_terminal(x) for x in word if x != Epsilon()] - if not word: - return self.generate_epsilon() cyk_table = CYKTable(self, word) return cyk_table.generate_word() diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 955ac6f..19a70a3 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -16,3 +16,4 @@ def test_epsilon(self): symb = Symbol(0) assert eps0 == eps1 assert eps0 != symb + assert "epsilon" == Epsilon() diff --git a/pyformlang/objects/cfg_objects/epsilon.py b/pyformlang/objects/cfg_objects/epsilon.py index 3680ad1..5be1d94 100644 --- a/pyformlang/objects/cfg_objects/epsilon.py +++ b/pyformlang/objects/cfg_objects/epsilon.py @@ -3,6 +3,7 @@ from typing import Any from .terminal import Terminal +from ..finite_automaton_objects.epsilon import EPSILON_SYMBOLS class Epsilon(Terminal): @@ -12,7 +13,7 @@ def __init__(self) -> None: super().__init__("epsilon") def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) + return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/cfg_objects/utils.py b/pyformlang/objects/cfg_objects/utils.py index 4f65edc..5231a27 100644 --- a/pyformlang/objects/cfg_objects/utils.py +++ b/pyformlang/objects/cfg_objects/utils.py @@ -4,7 +4,7 @@ from .variable import Variable from .terminal import Terminal -from .epsilon import Epsilon +from .epsilon import Epsilon, EPSILON_SYMBOLS def to_variable(given: Hashable) -> Variable: @@ -18,6 +18,6 @@ def to_terminal(given: Hashable) -> Terminal: """ Transformation into a terminal """ if isinstance(given, Terminal): return given - if given == "epsilon": + if given in EPSILON_SYMBOLS: return Epsilon() return Terminal(given) diff --git a/pyformlang/objects/finite_automaton_objects/epsilon.py b/pyformlang/objects/finite_automaton_objects/epsilon.py index 88a656e..0231cf8 100644 --- a/pyformlang/objects/finite_automaton_objects/epsilon.py +++ b/pyformlang/objects/finite_automaton_objects/epsilon.py @@ -6,6 +6,8 @@ from .symbol import Symbol +EPSILON_SYMBOLS = ["epsilon", "ɛ"] + class Epsilon(Symbol): """ An epsilon transition @@ -21,7 +23,7 @@ def __init__(self) -> None: super().__init__("epsilon") def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) + return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS def __hash__(self) -> int: - return hash("EPSILON TRANSITION") + return super().__hash__() diff --git a/pyformlang/objects/finite_automaton_objects/utils.py b/pyformlang/objects/finite_automaton_objects/utils.py index 1e342cf..a1382a8 100644 --- a/pyformlang/objects/finite_automaton_objects/utils.py +++ b/pyformlang/objects/finite_automaton_objects/utils.py @@ -4,7 +4,7 @@ from .state import State from .symbol import Symbol -from .epsilon import Epsilon +from .epsilon import Epsilon, EPSILON_SYMBOLS def to_state(given: Hashable) -> State: @@ -30,6 +30,6 @@ def to_symbol(given: Hashable) -> Symbol: """ if isinstance(given, Symbol): return given - if given in ("epsilon", "ɛ"): + if given in EPSILON_SYMBOLS: return Epsilon() return Symbol(given) diff --git a/pyformlang/objects/pda_objects/epsilon.py b/pyformlang/objects/pda_objects/epsilon.py index d4e57b8..35d287b 100644 --- a/pyformlang/objects/pda_objects/epsilon.py +++ b/pyformlang/objects/pda_objects/epsilon.py @@ -3,6 +3,7 @@ from typing import Any from .stack_symbol import StackSymbol +from ..finite_automaton_objects.epsilon import EPSILON_SYMBOLS class Epsilon(StackSymbol): @@ -12,7 +13,7 @@ def __init__(self) -> None: super().__init__("epsilon") def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) + return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/pda_objects/utils.py b/pyformlang/objects/pda_objects/utils.py index 68f0fa0..cd8202a 100644 --- a/pyformlang/objects/pda_objects/utils.py +++ b/pyformlang/objects/pda_objects/utils.py @@ -5,7 +5,7 @@ from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol -from .epsilon import Epsilon +from .epsilon import Epsilon, EPSILON_SYMBOLS def to_state(given: Hashable) -> State: @@ -19,7 +19,7 @@ def to_symbol(given: Hashable) -> Symbol: """ Convert to a symbol """ if isinstance(given, Symbol): return given - if given == "epsilon": + if given in EPSILON_SYMBOLS: return Epsilon() return Symbol(given) @@ -28,6 +28,6 @@ def to_stack_symbol(given: Hashable) -> StackSymbol: """ Convert to a stack symbol """ if isinstance(given, StackSymbol): return given - if given == "epsilon": + if given in EPSILON_SYMBOLS: return Epsilon() return StackSymbol(given) From 73aa2e7be52e177488331fbe378e4f54a6960a11 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 4 Dec 2024 19:11:20 +0300 Subject: [PATCH 23/30] correct object equality checks, use Tuple as input for transition function of pda --- pyformlang/cfg/__init__.py | 3 +-- pyformlang/cfg/cfg.py | 4 +-- pyformlang/cfg/cyk_table.py | 20 +++++--------- .../cfg/{grammar.py => formal_grammar.py} | 6 ++--- pyformlang/cfg/parse_tree.py | 4 +++ pyformlang/cfg/tests/test_terminal.py | 6 ++++- pyformlang/cfg/tests/test_variable.py | 1 + .../finite_automaton/tests/test_epsilon.py | 1 + .../finite_automaton/tests/test_state.py | 2 +- .../finite_automaton/tests/test_symbol.py | 4 ++- pyformlang/objects/cfg_objects/cfg_object.py | 5 ++-- pyformlang/objects/cfg_objects/terminal.py | 12 +++++++++ pyformlang/objects/cfg_objects/variable.py | 11 ++++++++ .../finite_automaton_object.py | 7 ++++- .../objects/finite_automaton_objects/state.py | 2 ++ .../finite_automaton_objects/symbol.py | 2 ++ .../objects/pda_objects/stack_symbol.py | 7 +++-- pyformlang/objects/pda_objects/state.py | 6 +++-- pyformlang/objects/pda_objects/symbol.py | 6 +++-- pyformlang/objects/pyformlang_object.py | 0 pyformlang/pda/pda.py | 26 +++++++++---------- pyformlang/pda/tests/test_pda.py | 6 ++--- pyformlang/pda/transition_function.py | 6 ++--- 23 files changed, 94 insertions(+), 53 deletions(-) rename pyformlang/cfg/{grammar.py => formal_grammar.py} (97%) create mode 100644 pyformlang/objects/pyformlang_object.py diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index d5b89db..7e3e283 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -21,8 +21,7 @@ """ from .cfg import CFG, CFGObject, Variable, Terminal, Epsilon, Production -from .parse_tree import ParseTree -from .cyk_table import DerivationDoesNotExist +from .parse_tree import ParseTree, DerivationDoesNotExist __all__ = ["CFGObject", diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 7584713..d36126c 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -9,7 +9,7 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State -from .grammar import Grammar +from .formal_grammar import FormalGrammar from .parse_tree import ParseTree from .cyk_table import CYKTable from .cfg_variable_converter import CFGVariableConverter @@ -30,7 +30,7 @@ def is_special_text(text: str) -> bool: text[-1] == '"' -class CFG(Grammar): +class CFG(FormalGrammar): """ A class representing a context free grammar Parameters diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 24abae4..eb2b692 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -4,12 +4,12 @@ from typing import Dict, List, Set, Iterable, Tuple, Any -from .grammar import Grammar -from .parse_tree import ParseTree, NotParsableException +from .formal_grammar import FormalGrammar +from .parse_tree import ParseTree, DerivationDoesNotExist from ..objects.cfg_objects import CFGObject, Terminal ProductionsDict = Dict[Tuple[CFGObject, ...], List[CFGObject]] -Table = Dict[Tuple[int, int], Set["CYKNode"]] +ParsingTable = Dict[Tuple[int, int], Set["CYKNode"]] class CYKTable: @@ -23,12 +23,12 @@ class CYKTable: The word from which we construct the CYK table """ - def __init__(self, grammar: Grammar, word: List[Terminal]) -> None: - self._normal_form: Grammar = grammar.to_normal_form() + def __init__(self, grammar: FormalGrammar, word: List[Terminal]) -> None: + self._normal_form: FormalGrammar = grammar.to_normal_form() self._generate_epsilon: bool = grammar.generate_epsilon() self._word: List[Terminal] = word self._productions_d: ProductionsDict = {} - self._cyk_table: Table = {} + self._cyk_table: ParsingTable = {} self._set_productions_by_body() if not self._generates_all_terminals(): self._cyk_table[(0, len(self._word))] = set() @@ -110,9 +110,7 @@ def get_parse_tree(self) -> ParseTree: ------- parse_tree : :class:`~pyformlang.cfg.ParseTree` """ - if not self._normal_form.start_symbol: - raise NotParsableException - if not self.generate_word(): + if not self._normal_form.start_symbol or not self.generate_word(): raise DerivationDoesNotExist if not self._word: return ParseTree(self._normal_form.start_symbol) @@ -146,7 +144,3 @@ def __eq__(self, other: Any) -> bool: def __hash__(self) -> int: return hash(self.value) - - -class DerivationDoesNotExist(Exception): - """Exception raised when the word cannot be derived""" diff --git a/pyformlang/cfg/grammar.py b/pyformlang/cfg/formal_grammar.py similarity index 97% rename from pyformlang/cfg/grammar.py rename to pyformlang/cfg/formal_grammar.py index 2a28e1c..edc7cf3 100644 --- a/pyformlang/cfg/grammar.py +++ b/pyformlang/cfg/formal_grammar.py @@ -5,10 +5,10 @@ from ..objects.cfg_objects import Variable, Terminal, Production -GrammarT = TypeVar("GrammarT", bound="Grammar") +GrammarT = TypeVar("GrammarT", bound="FormalGrammar") -class Grammar: +class FormalGrammar: """ Basic grammar representation """ @abstractmethod @@ -87,7 +87,7 @@ def generate_epsilon(self) -> bool: raise NotImplementedError @abstractmethod - def to_normal_form(self) -> "Grammar": + def to_normal_form(self) -> "FormalGrammar": """ Gets Chomsky normal form of the grammar """ raise NotImplementedError diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index 45f232d..74c6b29 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -114,5 +114,9 @@ def write_as_dot(self, filename: str) -> None: write_dot(self.to_networkx(), filename) +class DerivationDoesNotExist(Exception): + """Exception raised when the word cannot be derived""" + + class NotParsableException(Exception): """When the grammar cannot be parsed (parser not powerful enough)""" diff --git a/pyformlang/cfg/tests/test_terminal.py b/pyformlang/cfg/tests/test_terminal.py index 358e38f..17b340e 100644 --- a/pyformlang/cfg/tests/test_terminal.py +++ b/pyformlang/cfg/tests/test_terminal.py @@ -1,5 +1,5 @@ """ Tests the terminal """ -from pyformlang.cfg import Terminal, Epsilon +from pyformlang.cfg import Variable, Terminal, Epsilon class TestTerminal: @@ -22,3 +22,7 @@ def test_creation(self): epsilon = Epsilon() assert epsilon.to_text() == "epsilon" assert Terminal("C").to_text() == '"TER:C"' + assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" + assert Terminal("A") != Variable("A") + assert "A" == Terminal("A") diff --git a/pyformlang/cfg/tests/test_variable.py b/pyformlang/cfg/tests/test_variable.py index 2bdc78b..56c186e 100644 --- a/pyformlang/cfg/tests/test_variable.py +++ b/pyformlang/cfg/tests/test_variable.py @@ -19,3 +19,4 @@ def test_creation(self): assert str(variable0) == str(variable2) assert str(variable0) == str(variable3) assert str(variable0) != str(variable1) + assert "A" == Variable("A") diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 19a70a3..9ae201f 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -17,3 +17,4 @@ def test_epsilon(self): assert eps0 == eps1 assert eps0 != symb assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" diff --git a/pyformlang/finite_automaton/tests/test_state.py b/pyformlang/finite_automaton/tests/test_state.py index 0d3d150..8046f88 100644 --- a/pyformlang/finite_automaton/tests/test_state.py +++ b/pyformlang/finite_automaton/tests/test_state.py @@ -34,6 +34,7 @@ def test_eq(self): assert state2 != state3 assert state2 == 1 assert state1 != state2 + assert State("ABC") == "ABC" def test_hash(self): """ Tests the hashing of states @@ -44,4 +45,3 @@ def test_hash(self): assert isinstance(state1, int) assert state1 == state3 assert state2 != state3 - assert state1 != state2 diff --git a/pyformlang/finite_automaton/tests/test_symbol.py b/pyformlang/finite_automaton/tests/test_symbol.py index 5d7be9a..fcb114c 100644 --- a/pyformlang/finite_automaton/tests/test_symbol.py +++ b/pyformlang/finite_automaton/tests/test_symbol.py @@ -2,7 +2,7 @@ Tests for the symbols """ -from pyformlang.finite_automaton import Symbol +from pyformlang.finite_automaton import State, Symbol class TestSymbol: @@ -33,6 +33,8 @@ def test_eq(self): assert symbol2 == 1 assert symbol2 != symbol3 assert symbol1 != symbol2 + assert "A" == Symbol("A") + assert State("A") != Symbol("A") def test_hash(self): """ Tests the hashing of symbols diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index e5cce96..ba013b1 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -27,10 +27,9 @@ def value(self) -> Hashable: """Gets the value of the object""" return self._value + @abstractmethod def __eq__(self, other: Any) -> bool: - if isinstance(other, CFGObject): - return self.value == other.value - return self.value == other + raise NotImplementedError def __hash__(self) -> int: if self._hash is None: diff --git a/pyformlang/objects/cfg_objects/terminal.py b/pyformlang/objects/cfg_objects/terminal.py index f54f5b1..0c4b5b1 100644 --- a/pyformlang/objects/cfg_objects/terminal.py +++ b/pyformlang/objects/cfg_objects/terminal.py @@ -1,11 +1,23 @@ """ A terminal in a CFG """ +from typing import Any + from .cfg_object import CFGObject class Terminal(CFGObject): """ A terminal in a CFG """ + def __eq__(self, other: Any) -> bool: + if isinstance(other, Terminal): + return self.value == other.value + if isinstance(other, CFGObject): + return False + return self.value == other + + def __hash__(self) -> int: + return super().__hash__() + def __repr__(self) -> str: return "Terminal(" + str(self.value) + ")" diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py index 6281e16..7e8d4c1 100644 --- a/pyformlang/objects/cfg_objects/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -1,5 +1,6 @@ """ A variable in a CFG """ +from typing import Any from string import ascii_uppercase from .cfg_object import CFGObject @@ -14,6 +15,16 @@ class Variable(CFGObject): The value of the variable """ + def __eq__(self, other: Any) -> bool: + if isinstance(other, Variable): + return self.value == other.value + if isinstance(other, CFGObject): + return False + return self.value == other + + def __hash__(self) -> int: + return super().__hash__() + def __repr__(self) -> str: return "Variable(" + str(self.value) + ")" diff --git a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py index 4eaaad7..fdc5b99 100644 --- a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py +++ b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py @@ -2,7 +2,8 @@ Represents an object of a finite state automaton """ -from typing import Hashable +from typing import Hashable, Any +from abc import abstractmethod class FiniteAutomatonObject: @@ -29,6 +30,10 @@ def value(self) -> Hashable: """ return self._value + @abstractmethod + def __eq__(self, other: Any) -> bool: + raise NotImplementedError + def __hash__(self) -> int: if self._hash is None: self._hash = hash(self._value) diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index 7699a29..a6d8c4a 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -27,6 +27,8 @@ class State(CFGConvertible, FiniteAutomatonObject): def __eq__(self, other: Any) -> bool: if isinstance(other, State): return self.value == other.value + if isinstance(other, FiniteAutomatonObject): + return False return self.value == other def __hash__(self) -> int: diff --git a/pyformlang/objects/finite_automaton_objects/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py index 28667e2..48b9854 100644 --- a/pyformlang/objects/finite_automaton_objects/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -25,6 +25,8 @@ class Symbol(FiniteAutomatonObject): def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self.value == other.value + if isinstance(other, FiniteAutomatonObject): + return False return self.value == other def __hash__(self) -> int: diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 9f7c8a7..1ba7780 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -2,6 +2,7 @@ from typing import Any +from .pda_object import PDAObject from .symbol import Symbol from ..cfg_objects import CFGConvertible @@ -17,9 +18,11 @@ class StackSymbol(CFGConvertible, Symbol): """ def __eq__(self, other: Any) -> bool: - if not isinstance(other, StackSymbol): + if isinstance(other, StackSymbol): + return self.value == other.value + if isinstance(other, PDAObject): return False - return self._value == other.value + return self.value == other def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 86562b1..908782d 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -17,9 +17,11 @@ class State(CFGConvertible, PDAObject): """ def __eq__(self, other: Any) -> bool: - if not isinstance(other, State): + if isinstance(other, State): + return self.value == other.value + if isinstance(other, PDAObject): return False - return self._value == other.value + return self.value == other def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/pda_objects/symbol.py b/pyformlang/objects/pda_objects/symbol.py index 4616932..62a28c1 100644 --- a/pyformlang/objects/pda_objects/symbol.py +++ b/pyformlang/objects/pda_objects/symbol.py @@ -16,9 +16,11 @@ class Symbol(PDAObject): """ def __eq__(self, other: Any) -> bool: - if not isinstance(other, Symbol): + if isinstance(other, Symbol): + return self.value == other.value + if isinstance(other, PDAObject): return False - return self._value == other.value + return self.value == other def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/pyformlang_object.py b/pyformlang/objects/pyformlang_object.py new file mode 100644 index 0000000..e69de29 diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index a33c054..e4d9ea4 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -225,7 +225,7 @@ def add_transition(self, input_symbol = to_symbol(input_symbol) stack_from = to_stack_symbol(stack_from) s_to = to_state(s_to) - stack_to = [to_stack_symbol(x) for x in stack_to] + stack_to = tuple(to_stack_symbol(x) for x in stack_to) self._states.add(s_from) self._states.add(s_to) if input_symbol != PDAEpsilon(): @@ -304,11 +304,11 @@ def to_final_state(self) -> "PDA": new_tf = self._transition_function.copy() if self.start_state and self.start_stack_symbol: new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self.start_state, [self.start_stack_symbol, - new_stack_symbol]) + self.start_state, (self.start_stack_symbol, + new_stack_symbol)) for state in self._states: new_tf.add_transition(state, PDAEpsilon(), new_stack_symbol, - new_end, []) + new_end, tuple()) return PDA(new_states, self._input_symbols.copy(), new_stack_alphabet, @@ -344,15 +344,15 @@ def to_empty_stack(self) -> "PDA": new_tf = self._transition_function.copy() if self.start_state and self.start_stack_symbol: new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, - self.start_state, [self.start_stack_symbol, - new_stack_symbol]) + self.start_state, (self.start_stack_symbol, + new_stack_symbol)) for state in self._final_states: for stack_symbol in new_stack_alphabet: new_tf.add_transition(state, PDAEpsilon(), stack_symbol, - new_end, []) + new_end, tuple()) for stack_symbol in new_stack_alphabet: new_tf.add_transition(new_end, PDAEpsilon(), stack_symbol, - new_end, []) + new_end, tuple()) return PDA(new_states, self._input_symbols.copy(), new_stack_alphabet, @@ -376,13 +376,13 @@ def to_cfg(self) -> CFG: productions = self._initialize_production_from_start_in_to_cfg( start, variable_converter) states = self._states - for transition in self._transition_function: + for transition in self: for state in states: variable_converter.set_valid( transition[INPUT][STATE], transition[INPUT][STACK_FROM], state) - for transition in self._transition_function: + for transition in self: for state in states: self._process_transition_and_state_to_cfg(productions, state, @@ -606,9 +606,7 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": if not next_state_dfa: continue for stack_symbol in self._stack_alphabet: - next_states_self = self._transition_function(state_in, - symbol, - stack_symbol) + next_states_self = self(state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: pda.add_transition( pda_state_converter.to_pda_combined_state( @@ -684,7 +682,7 @@ def to_networkx(self) -> MultiDiGraph: shape=None, height=.0, width=.0) - for key, value in self._transition_function: + for key, value in self: s_from, in_symbol, stack_from = key s_to, stack_to = value graph.add_edge( diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 89c81ba..9b6ffe1 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -46,10 +46,10 @@ def test_creation(self): pda = PDA(final_states={State("A"), State("A"), State("B"), Symbol("B")}) assert pda is not None - assert len(pda.states) == 3 + assert len(pda.states) == 2 assert len(pda.input_symbols) == 0 assert len(pda.stack_symbols) == 0 - assert len(pda.final_states) == 3 + assert len(pda.final_states) == 2 pda = PDA(input_symbols={Symbol("A"), Symbol("B"), Symbol("A"), State("A")}) @@ -67,7 +67,7 @@ def test_creation(self): assert len(pda.final_states) == 0 pda = PDA(stack_alphabet={StackSymbol("A"), StackSymbol("A"), - StackSymbol("B")}) + StackSymbol("B"), Symbol("B")}) assert pda is not None assert len(pda.states) == 0 assert len(pda.input_symbols) == 0 diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index ff3dcf4..c1e34f8 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -1,7 +1,7 @@ """ A transition function in a pushdown automaton """ from copy import deepcopy -from typing import Dict, Set, Sequence, Iterator, Iterable, Tuple +from typing import Dict, Set, Iterator, Iterable, Tuple from ..objects.pda_objects import State, Symbol, StackSymbol @@ -33,7 +33,7 @@ def add_transition(self, input_symbol: Symbol, stack_from: StackSymbol, s_to: State, - stack_to: Sequence[StackSymbol]) -> None: + stack_to: Tuple[StackSymbol, ...]) -> None: """ Add a transition to the function Parameters @@ -50,7 +50,7 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ temp_in = (s_from, input_symbol, stack_from) - temp_out = (s_to, tuple(stack_to)) + temp_out = (s_to, stack_to) if temp_in in self._transitions: self._transitions[temp_in].add(temp_out) else: From 7e027f314e1002734ee864f10988eb8fb5849349 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 4 Dec 2024 20:36:34 +0300 Subject: [PATCH 24/30] rework objects, correct equality checks, add more general representations, add base epsilon class --- pyformlang/cfg/tests/test_terminal.py | 10 +++++ pyformlang/objects/base_epsilon.py | 30 +++++++++++++ pyformlang/objects/cfg_objects/cfg_object.py | 28 +----------- pyformlang/objects/cfg_objects/epsilon.py | 18 +------- pyformlang/objects/cfg_objects/production.py | 6 +-- pyformlang/objects/cfg_objects/terminal.py | 5 ++- pyformlang/objects/cfg_objects/utils.py | 3 +- pyformlang/objects/cfg_objects/variable.py | 5 ++- .../finite_automaton_objects/epsilon.py | 16 +------ .../finite_automaton_object.py | 30 ++----------- .../objects/finite_automaton_objects/state.py | 6 ++- .../finite_automaton_objects/symbol.py | 6 ++- .../objects/finite_automaton_objects/utils.py | 3 +- pyformlang/objects/formal_object.py | 39 ++++++++++++++++ pyformlang/objects/pda_objects/epsilon.py | 15 +------ pyformlang/objects/pda_objects/pda_object.py | 22 +-------- .../objects/pda_objects/stack_symbol.py | 6 +-- pyformlang/objects/pda_objects/state.py | 5 ++- pyformlang/objects/pda_objects/symbol.py | 5 ++- pyformlang/objects/pda_objects/utils.py | 3 +- pyformlang/objects/pyformlang_object.py | 0 pyformlang/pda/tests/test_pda.py | 45 +++++++++++++------ 22 files changed, 158 insertions(+), 148 deletions(-) create mode 100644 pyformlang/objects/base_epsilon.py create mode 100644 pyformlang/objects/formal_object.py delete mode 100644 pyformlang/objects/pyformlang_object.py diff --git a/pyformlang/cfg/tests/test_terminal.py b/pyformlang/cfg/tests/test_terminal.py index 17b340e..ee44461 100644 --- a/pyformlang/cfg/tests/test_terminal.py +++ b/pyformlang/cfg/tests/test_terminal.py @@ -1,5 +1,6 @@ """ Tests the terminal """ from pyformlang.cfg import Variable, Terminal, Epsilon +from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon class TestTerminal: @@ -22,7 +23,16 @@ def test_creation(self): epsilon = Epsilon() assert epsilon.to_text() == "epsilon" assert Terminal("C").to_text() == '"TER:C"' + assert repr(Epsilon()) == "epsilon" + + def test_eq(self): assert "epsilon" == Epsilon() assert Epsilon() == "ɛ" assert Terminal("A") != Variable("A") + assert Variable("S") == Variable("S") + assert Terminal("A") != Terminal("B") assert "A" == Terminal("A") + assert Variable(1) == 1 + assert Epsilon() == FAEpsilon() + assert Terminal("ABC") != Symbol("ABC") + assert State("S") != Variable("S") diff --git a/pyformlang/objects/base_epsilon.py b/pyformlang/objects/base_epsilon.py new file mode 100644 index 0000000..a245a92 --- /dev/null +++ b/pyformlang/objects/base_epsilon.py @@ -0,0 +1,30 @@ +""" General epsilon representation """ + +from typing import Any + +from .formal_object import FormalObject + +EPSILON_SYMBOLS = ["epsilon", "ɛ"] + + +class BaseEpsilon(FormalObject): + """ An epsilon transition + + Examples + -------- + + >>> epsilon = Epsilon() + + """ + + def __init__(self) -> None: + super().__init__("epsilon") + + def __eq__(self, other: Any) -> bool: + return isinstance(other, BaseEpsilon) or other in EPSILON_SYMBOLS + + def __hash__(self) -> int: + return super().__hash__() + + def __repr__(self) -> str: + return "epsilon" diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index ba013b1..49466dd 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -1,12 +1,12 @@ """ An object in a CFG (Variable and Terminal)""" -from typing import Hashable, Any from abc import abstractmethod from .cfg_convertible import CFGConvertible +from ..formal_object import FormalObject -class CFGObject(CFGConvertible): +class CFGObject(CFGConvertible, FormalObject): """ An object in a CFG Parameters @@ -15,30 +15,6 @@ class CFGObject(CFGConvertible): The value of the object """ - __slots__ = ["_value", "_hash"] - - def __init__(self, value: Hashable) -> None: - super().__init__() - self._value = value - self._hash = None - - @property - def value(self) -> Hashable: - """Gets the value of the object""" - return self._value - - @abstractmethod - def __eq__(self, other: Any) -> bool: - raise NotImplementedError - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - def __str__(self) -> str: - return str(self._value) - @abstractmethod def to_text(self) -> str: """ Turns the object into a text format """ diff --git a/pyformlang/objects/cfg_objects/epsilon.py b/pyformlang/objects/cfg_objects/epsilon.py index 5be1d94..35eba40 100644 --- a/pyformlang/objects/cfg_objects/epsilon.py +++ b/pyformlang/objects/cfg_objects/epsilon.py @@ -1,22 +1,8 @@ """ An epsilon terminal """ -from typing import Any - from .terminal import Terminal -from ..finite_automaton_objects.epsilon import EPSILON_SYMBOLS +from ..base_epsilon import BaseEpsilon -class Epsilon(Terminal): +class Epsilon(BaseEpsilon, Terminal): """ An epsilon terminal """ - - def __init__(self) -> None: - super().__init__("epsilon") - - def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS - - def __hash__(self) -> int: - return super().__hash__() - - def to_text(self) -> str: - return "epsilon" diff --git a/pyformlang/objects/cfg_objects/production.py b/pyformlang/objects/cfg_objects/production.py index bb2becf..91ac63e 100644 --- a/pyformlang/objects/cfg_objects/production.py +++ b/pyformlang/objects/cfg_objects/production.py @@ -52,9 +52,6 @@ def body_terminals(self) -> Set[Terminal]: """Gets terminals of body of the production""" return {object for object in self.body if isinstance(object, Terminal)} - def __repr__(self) -> str: - return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) - def __eq__(self, other: Any) -> bool: if not isinstance(other, Production): return False @@ -65,6 +62,9 @@ def __hash__(self) -> int: self._hash = sum(map(hash, self._body)) + hash(self._head) return self._hash + def __repr__(self) -> str: + return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) + def is_normal_form(self) -> bool: """ Tells is the production is in Chomsky Normal Form diff --git a/pyformlang/objects/cfg_objects/terminal.py b/pyformlang/objects/cfg_objects/terminal.py index 0c4b5b1..9a8a595 100644 --- a/pyformlang/objects/cfg_objects/terminal.py +++ b/pyformlang/objects/cfg_objects/terminal.py @@ -3,6 +3,7 @@ from typing import Any from .cfg_object import CFGObject +from ..formal_object import FormalObject class Terminal(CFGObject): @@ -11,7 +12,7 @@ class Terminal(CFGObject): def __eq__(self, other: Any) -> bool: if isinstance(other, Terminal): return self.value == other.value - if isinstance(other, CFGObject): + if isinstance(other, FormalObject): return False return self.value == other @@ -19,7 +20,7 @@ def __hash__(self) -> int: return super().__hash__() def __repr__(self) -> str: - return "Terminal(" + str(self.value) + ")" + return f"Terminal({self})" def to_text(self) -> str: text = str(self._value) diff --git a/pyformlang/objects/cfg_objects/utils.py b/pyformlang/objects/cfg_objects/utils.py index 5231a27..1c26336 100644 --- a/pyformlang/objects/cfg_objects/utils.py +++ b/pyformlang/objects/cfg_objects/utils.py @@ -4,7 +4,8 @@ from .variable import Variable from .terminal import Terminal -from .epsilon import Epsilon, EPSILON_SYMBOLS +from .epsilon import Epsilon +from ..base_epsilon import EPSILON_SYMBOLS def to_variable(given: Hashable) -> Variable: diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py index 7e8d4c1..46a031a 100644 --- a/pyformlang/objects/cfg_objects/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -4,6 +4,7 @@ from string import ascii_uppercase from .cfg_object import CFGObject +from ..formal_object import FormalObject class Variable(CFGObject): @@ -18,7 +19,7 @@ class Variable(CFGObject): def __eq__(self, other: Any) -> bool: if isinstance(other, Variable): return self.value == other.value - if isinstance(other, CFGObject): + if isinstance(other, FormalObject): return False return self.value == other @@ -26,7 +27,7 @@ def __hash__(self) -> int: return super().__hash__() def __repr__(self) -> str: - return "Variable(" + str(self.value) + ")" + return f"Variable({self})" def to_text(self) -> str: text = str(self._value) diff --git a/pyformlang/objects/finite_automaton_objects/epsilon.py b/pyformlang/objects/finite_automaton_objects/epsilon.py index 0231cf8..1c4f887 100644 --- a/pyformlang/objects/finite_automaton_objects/epsilon.py +++ b/pyformlang/objects/finite_automaton_objects/epsilon.py @@ -2,14 +2,11 @@ Represents an epsilon transition """ -from typing import Any - from .symbol import Symbol - -EPSILON_SYMBOLS = ["epsilon", "ɛ"] +from ..base_epsilon import BaseEpsilon -class Epsilon(Symbol): +class Epsilon(BaseEpsilon, Symbol): """ An epsilon transition Examples @@ -18,12 +15,3 @@ class Epsilon(Symbol): >>> epsilon = Epsilon() """ - - def __init__(self) -> None: - super().__init__("epsilon") - - def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS - - def __hash__(self) -> int: - return super().__hash__() diff --git a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py index fdc5b99..4666636 100644 --- a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py +++ b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py @@ -2,11 +2,12 @@ Represents an object of a finite state automaton """ -from typing import Hashable, Any from abc import abstractmethod +from ..formal_object import FormalObject -class FiniteAutomatonObject: + +class FiniteAutomatonObject(FormalObject): """ Represents an object in a finite state automaton Parameters @@ -15,29 +16,6 @@ class FiniteAutomatonObject: The value of the object """ - def __init__(self, value: Hashable) -> None: - self._value = value - self._hash = None - - @property - def value(self) -> Hashable: - """ Gets the value of the object - - Returns - --------- - value : any - The value of the object - """ - return self._value - @abstractmethod - def __eq__(self, other: Any) -> bool: - raise NotImplementedError - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - def __repr__(self) -> str: - return str(self._value) + raise NotImplementedError diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index a6d8c4a..6939abb 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -6,6 +6,7 @@ from .finite_automaton_object import FiniteAutomatonObject from ..cfg_objects import CFGConvertible +from ..formal_object import FormalObject class State(CFGConvertible, FiniteAutomatonObject): @@ -27,9 +28,12 @@ class State(CFGConvertible, FiniteAutomatonObject): def __eq__(self, other: Any) -> bool: if isinstance(other, State): return self.value == other.value - if isinstance(other, FiniteAutomatonObject): + if isinstance(other, FormalObject): return False return self.value == other def __hash__(self) -> int: return super().__hash__() + + def __repr__(self) -> str: + return f"State({self})" diff --git a/pyformlang/objects/finite_automaton_objects/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py index 48b9854..5db815a 100644 --- a/pyformlang/objects/finite_automaton_objects/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -5,6 +5,7 @@ from typing import Any from .finite_automaton_object import FiniteAutomatonObject +from ..formal_object import FormalObject class Symbol(FiniteAutomatonObject): @@ -25,9 +26,12 @@ class Symbol(FiniteAutomatonObject): def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self.value == other.value - if isinstance(other, FiniteAutomatonObject): + if isinstance(other, FormalObject): return False return self.value == other def __hash__(self) -> int: return super().__hash__() + + def __repr__(self) -> str: + return f"Symbol({self})" diff --git a/pyformlang/objects/finite_automaton_objects/utils.py b/pyformlang/objects/finite_automaton_objects/utils.py index a1382a8..4ad3655 100644 --- a/pyformlang/objects/finite_automaton_objects/utils.py +++ b/pyformlang/objects/finite_automaton_objects/utils.py @@ -4,7 +4,8 @@ from .state import State from .symbol import Symbol -from .epsilon import Epsilon, EPSILON_SYMBOLS +from .epsilon import Epsilon +from ..base_epsilon import EPSILON_SYMBOLS def to_state(given: Hashable) -> State: diff --git a/pyformlang/objects/formal_object.py b/pyformlang/objects/formal_object.py new file mode 100644 index 0000000..3ec8cc7 --- /dev/null +++ b/pyformlang/objects/formal_object.py @@ -0,0 +1,39 @@ +""" General object representation """ + +from typing import Hashable, Any +from abc import abstractmethod + + +class FormalObject: + """ General object representation """ + + def __init__(self, value: Hashable) -> None: + self._value = value + self._hash = None + + @property + def value(self) -> Hashable: + """ Gets the value of the object + + Returns + --------- + value : any + The value of the object + """ + return self._value + + @abstractmethod + def __eq__(self, other: Any) -> bool: + raise NotImplementedError + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + def __str__(self) -> str: + return str(self._value) + + @abstractmethod + def __repr__(self) -> str: + raise NotImplementedError diff --git a/pyformlang/objects/pda_objects/epsilon.py b/pyformlang/objects/pda_objects/epsilon.py index 35d287b..710d922 100644 --- a/pyformlang/objects/pda_objects/epsilon.py +++ b/pyformlang/objects/pda_objects/epsilon.py @@ -1,19 +1,8 @@ """ An epsilon symbol """ -from typing import Any - from .stack_symbol import StackSymbol -from ..finite_automaton_objects.epsilon import EPSILON_SYMBOLS +from ..base_epsilon import BaseEpsilon -class Epsilon(StackSymbol): +class Epsilon(BaseEpsilon, StackSymbol): """ An epsilon symbol """ - - def __init__(self) -> None: - super().__init__("epsilon") - - def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) or other in EPSILON_SYMBOLS - - def __hash__(self) -> int: - return super().__hash__() diff --git a/pyformlang/objects/pda_objects/pda_object.py b/pyformlang/objects/pda_objects/pda_object.py index 1947693..ce70176 100644 --- a/pyformlang/objects/pda_objects/pda_object.py +++ b/pyformlang/objects/pda_objects/pda_object.py @@ -2,30 +2,12 @@ from abc import abstractmethod -from typing import Hashable, Any +from ..formal_object import FormalObject -class PDAObject: +class PDAObject(FormalObject): """ Basic PDA object representation """ - def __init__(self, value: Hashable) -> None: - self._value = value - self._hash = None - - @property - def value(self) -> Hashable: - """ Returns the value of the object """ - return self._value - - @abstractmethod - def __eq__(self, other: Any) -> bool: - raise NotImplementedError - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - @abstractmethod def __repr__(self) -> str: raise NotImplementedError diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 1ba7780..7249ac7 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -2,9 +2,9 @@ from typing import Any -from .pda_object import PDAObject from .symbol import Symbol from ..cfg_objects import CFGConvertible +from ..formal_object import FormalObject class StackSymbol(CFGConvertible, Symbol): @@ -20,7 +20,7 @@ class StackSymbol(CFGConvertible, Symbol): def __eq__(self, other: Any) -> bool: if isinstance(other, StackSymbol): return self.value == other.value - if isinstance(other, PDAObject): + if isinstance(other, FormalObject): return False return self.value == other @@ -28,4 +28,4 @@ def __hash__(self) -> int: return super().__hash__() def __repr__(self) -> str: - return "StackSymbol(" + str(self._value) + ")" + return f"StackSymbol({self})" diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 908782d..4d6b595 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -4,6 +4,7 @@ from .pda_object import PDAObject from ..cfg_objects import CFGConvertible +from ..formal_object import FormalObject class State(CFGConvertible, PDAObject): @@ -19,7 +20,7 @@ class State(CFGConvertible, PDAObject): def __eq__(self, other: Any) -> bool: if isinstance(other, State): return self.value == other.value - if isinstance(other, PDAObject): + if isinstance(other, FormalObject): return False return self.value == other @@ -27,4 +28,4 @@ def __hash__(self) -> int: return super().__hash__() def __repr__(self) -> str: - return "State(" + str(self._value) + ")" + return f"State({self})" diff --git a/pyformlang/objects/pda_objects/symbol.py b/pyformlang/objects/pda_objects/symbol.py index 62a28c1..9b853d8 100644 --- a/pyformlang/objects/pda_objects/symbol.py +++ b/pyformlang/objects/pda_objects/symbol.py @@ -3,6 +3,7 @@ from typing import Any from .pda_object import PDAObject +from ..formal_object import FormalObject class Symbol(PDAObject): @@ -18,7 +19,7 @@ class Symbol(PDAObject): def __eq__(self, other: Any) -> bool: if isinstance(other, Symbol): return self.value == other.value - if isinstance(other, PDAObject): + if isinstance(other, FormalObject): return False return self.value == other @@ -26,4 +27,4 @@ def __hash__(self) -> int: return super().__hash__() def __repr__(self) -> str: - return "Symbol(" + str(self._value) + ")" + return f"Symbol({self})" diff --git a/pyformlang/objects/pda_objects/utils.py b/pyformlang/objects/pda_objects/utils.py index cd8202a..862c871 100644 --- a/pyformlang/objects/pda_objects/utils.py +++ b/pyformlang/objects/pda_objects/utils.py @@ -5,7 +5,8 @@ from .state import State from .symbol import Symbol from .stack_symbol import StackSymbol -from .epsilon import Epsilon, EPSILON_SYMBOLS +from .epsilon import Epsilon +from ..base_epsilon import EPSILON_SYMBOLS def to_state(given: Hashable) -> State: diff --git a/pyformlang/objects/pyformlang_object.py b/pyformlang/objects/pyformlang_object.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 9b6ffe1..c9dd394 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -12,16 +12,16 @@ @pytest.fixture def pda_example() -> PDA: - pda = PDA() - pda.add_transitions([ - ("q0", "0", "Z0", "q1", ("Z1", "Z0")), - ("q1", "1", "Z1", "q2", []), - ("q0", "epsilon", "Z1", "q2", []) - ]) - pda.set_start_state("q0") - pda.set_start_stack_symbol("Z0") - pda.add_final_state("q2") - return pda + pda = PDA() + pda.add_transitions([ + ("q0", "0", "Z0", "q1", ("Z1", "Z0")), + ("q1", "1", "Z1", "q2", []), + ("q0", "epsilon", "Z1", "q2", []) + ]) + pda.set_start_state("q0") + pda.set_start_stack_symbol("Z0") + pda.add_final_state("q2") + return pda class TestPDA: @@ -55,7 +55,7 @@ def test_creation(self): Symbol("A"), State("A")}) assert pda is not None assert len(pda.states) == 0 - assert len(pda.input_symbols) == 3 + assert len(pda.input_symbols) == 2 assert len(pda.stack_symbols) == 0 assert len(pda.final_states) == 0 @@ -85,11 +85,13 @@ def test_creation(self): def test_represent(self): """ Tests representations """ symb = Symbol("S") - assert str(symb) == "Symbol(S)" + assert repr(symb) == "Symbol(S)" state = State("T") - assert str(state) == "State(T)" + assert repr(state) == "State(T)" stack_symb = StackSymbol("U") - assert str(stack_symb) == "StackSymbol(U)" + assert repr(stack_symb) == "StackSymbol(U)" + assert repr(Epsilon()) == "epsilon" + assert str(Epsilon()) == "epsilon" def test_transition(self): """ Tests the creation of transition """ @@ -371,3 +373,18 @@ def test_copy(self, pda_example: PDA): assert pda.start_stack_symbol == pda_copy.start_stack_symbol assert pda.final_states == pda_copy.final_states assert pda is not pda_copy + + def test_object_eq(self): + """ Tests the equality of pda objects """ + assert StackSymbol("c") == StackSymbol("c") + assert State("a") == "a" + assert "C" == Symbol("C") + assert Epsilon() == Symbol("epsilon") + assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" + assert State("A") != State("B") + assert State("A") != Symbol("A") + assert Symbol("A") != StackSymbol("A") + assert StackSymbol("ABC") != Symbol("ABC") + assert State("ABC") != FAState("ABC") + assert Symbol("s") != Terminal("s") From 1e73e4b3b6700eb97039ee5e5eebae1b11d0b8d2 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 4 Dec 2024 21:28:18 +0300 Subject: [PATCH 25/30] add remove_transition method for pda, update tests --- pyformlang/fcfg/__init__.py | 4 +++- pyformlang/pda/pda.py | 18 +++++++++++++++++ pyformlang/pda/tests/test_pda.py | 29 +++++++++++++++++++++++++++ pyformlang/pda/transition_function.py | 11 ++++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/pyformlang/fcfg/__init__.py b/pyformlang/fcfg/__init__.py index 66694b5..3824940 100644 --- a/pyformlang/fcfg/__init__.py +++ b/pyformlang/fcfg/__init__.py @@ -15,7 +15,8 @@ """ -from .fcfg import FCFG, CFGObject, Variable, Terminal, Epsilon, ParseTree +from .fcfg import FCFG, CFGObject, \ + Variable, Terminal, Epsilon, ParseTree, NotParsableException from .feature_production import FeatureProduction from .feature_structure import FeatureStructure, \ ContentAlreadyExistsException, \ @@ -31,6 +32,7 @@ "Terminal", "Epsilon", "ParseTree", + "NotParsableException", "ContentAlreadyExistsException", "FeatureStructuresNotCompatibleException", "PathDoesNotExistsException"] diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index e4d9ea4..f170d02 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -253,6 +253,24 @@ def add_transitions(self, transitions: Iterable[InputTransition]) -> None: self.add_transition(s_from, input_symbol, stack_from, s_to, stack_to) + def remove_transition(self, + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable, + s_to: Hashable, + stack_to: Iterable[Hashable]) -> None: + """ Remove the given transition from the PDA """ + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = tuple(to_stack_symbol(x) for x in stack_to) + self._transition_function.remove_transition(s_from, + input_symbol, + stack_from, + s_to, + stack_to) + def __call__(self, s_from: Hashable, input_symbol: Hashable, diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index c9dd394..4729f80 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -388,3 +388,32 @@ def test_object_eq(self): assert StackSymbol("ABC") != Symbol("ABC") assert State("ABC") != FAState("ABC") assert Symbol("s") != Terminal("s") + + def test_contains(self, pda_example: PDA): + """ Tests the transition containment checks """ + pda = pda_example + assert ("q1", "1", "Z1", "q2", []) in pda + assert ("q0", "epsilon", "Z1", "q2", tuple()) in pda + assert ("a", "b", "c", "d", ["e"]) not in pda + pda.add_transition("q1", "1", "Z1", "q5", ["a"]) + assert ("q1", "1", "Z1", "q5", ["a"]) in pda + + def test_remove_transition(self, pda_example: PDA): + """ Tests the pda transition removal """ + pda = pda_example + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) in pda + pda.remove_transition("q0", "0", "Z0", "q1", ("Z1", "Z0")) + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) not in pda + pda.remove_transition("q0", "0", "Z0", "q1", ("Z1", "Z0")) + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) not in pda + pda.remove_transition("a", "b", "c", "d", ["e"]) + assert pda.get_number_transitions() == 2 + + def test_iteration(self, pda_example: PDA): + """ Tests the iteration of pda transitions """ + pda = pda_example + transitions = list(iter(pda)) + assert (("q0", "0", "Z0"), ("q1", ("Z1", "Z0"))) in transitions + assert (("q1", "1", "Z1"), ("q2", tuple())) in transitions + assert (("q0", "epsilon", "Z1"), ("q2", tuple())) in transitions + assert len(transitions) == 3 diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index c1e34f8..fa52bec 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -56,6 +56,17 @@ def add_transition(self, else: self._transitions[temp_in] = {temp_out} + def remove_transition(self, + s_from: State, + input_symbol: Symbol, + stack_from: StackSymbol, + s_to: State, + stack_to: Tuple[StackSymbol, ...]) -> None: + """ Remove the given transition from the function """ + key = (s_from, input_symbol, stack_from) + if key in self._transitions: + self._transitions[key].discard((s_to, stack_to)) + def copy(self) -> "TransitionFunction": """ Copy the current transition function From 9b057ae074d698c864a74b43b3c608217f4c3d64 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Wed, 4 Dec 2024 22:50:55 +0300 Subject: [PATCH 26/30] add start symbol setter for cfg --- pyformlang/cfg/cfg.py | 10 ++-------- pyformlang/cfg/formal_grammar.py | 7 +++++++ pyformlang/cfg/utils.py | 7 +++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index d36126c..fcbba77 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -13,7 +13,8 @@ from .parse_tree import ParseTree from .cyk_table import CYKTable from .cfg_variable_converter import CFGVariableConverter -from .utils import remove_nullable_production, get_productions_d +from .utils import remove_nullable_production, get_productions_d, \ + is_special_text from ..objects.cfg_objects import CFGObject, \ Variable, Terminal, Epsilon, Production from ..objects.cfg_objects.utils import to_variable, to_terminal @@ -23,13 +24,6 @@ SUBS_SUFFIX = "#SUBS#" -def is_special_text(text: str) -> bool: - """ Check if the input is given an explicit type """ - return len(text) > 5 and \ - (text[0:5] == '"VAR:' or text[0:5] == '"TER:') and \ - text[-1] == '"' - - class CFG(FormalGrammar): """ A class representing a context free grammar diff --git a/pyformlang/cfg/formal_grammar.py b/pyformlang/cfg/formal_grammar.py index edc7cf3..3115a14 100644 --- a/pyformlang/cfg/formal_grammar.py +++ b/pyformlang/cfg/formal_grammar.py @@ -4,6 +4,7 @@ from abc import abstractmethod from ..objects.cfg_objects import Variable, Terminal, Production +from ..objects.cfg_objects.utils import to_variable GrammarT = TypeVar("GrammarT", bound="FormalGrammar") @@ -66,6 +67,12 @@ def start_symbol(self) -> Optional[Variable]: """ return self._start_symbol + @start_symbol.setter + def start_symbol(self, symbol: Hashable) -> None: + """ Sets the start symbol of the grammar """ + self._start_symbol = to_variable(symbol) \ + if symbol is not None else None + @abstractmethod def copy(self: GrammarT) -> GrammarT: """ Copies the grammar """ diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index b3dc56b..590ac39 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -5,6 +5,13 @@ from ..objects.cfg_objects import CFGObject, Variable, Epsilon, Production +def is_special_text(text: str) -> bool: + """ Check if the input is given an explicit type """ + return len(text) > 5 and \ + (text[0:5] == '"VAR:' or text[0:5] == '"TER:') and \ + text[-1] == '"' + + def remove_nullable_production_sub(body: List[CFGObject], nullables: AbstractSet[CFGObject]) \ -> List[List[CFGObject]]: From 626711ecc008665a0b5af0939b843479e33acd10 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Thu, 5 Dec 2024 11:39:53 +0300 Subject: [PATCH 27/30] add production and start symbol adding for cfg, correct epsilon equality --- pyformlang/cfg/cfg.py | 23 ++++------- pyformlang/cfg/cfg_variable_converter.py | 38 +++++++++---------- pyformlang/cfg/formal_grammar.py | 20 +++++++--- pyformlang/cfg/tests/test_cfg.py | 25 ++++++++++++ .../finite_automaton/tests/test_epsilon.py | 4 +- pyformlang/objects/base_epsilon.py | 3 +- pyformlang/objects/cfg_objects/__init__.py | 4 +- pyformlang/objects/cfg_objects/cfg_object.py | 5 +-- ...nvertible.py => cfg_object_convertible.py} | 9 ++++- pyformlang/objects/cfg_objects/production.py | 14 ++++--- pyformlang/objects/cfg_objects/utils.py | 5 +-- .../objects/finite_automaton_objects/state.py | 4 +- .../objects/finite_automaton_objects/utils.py | 5 +-- .../objects/pda_objects/stack_symbol.py | 4 +- pyformlang/objects/pda_objects/state.py | 4 +- pyformlang/objects/pda_objects/utils.py | 9 ++--- pyformlang/pda/tests/test_pda.py | 5 ++- 17 files changed, 108 insertions(+), 73 deletions(-) rename pyformlang/objects/cfg_objects/{cfg_convertible.py => cfg_object_convertible.py} (62%) diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index fcbba77..1517440 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -57,23 +57,13 @@ def __init__(self, start_symbol = to_variable(start_symbol) self._variables.add(start_symbol) self._start_symbol = start_symbol - if productions is not None: - productions = set(productions) - self._productions = productions or set() - for production in self._productions: - self.__initialize_production_in_cfg(production) + self._productions = set() + for production in productions or set(): + self.add_production(production) self._impacts: Dict[CFGObject, List[Tuple[CFGObject, int]]] = {} self._remaining_lists: Dict[CFGObject, List[int]] = {} self._added_impacts: Set[CFGObject] = set() - def __initialize_production_in_cfg(self, production: Production) -> None: - self._variables.add(production.head) - for cfg_object in production.body: - if isinstance(cfg_object, Terminal): - self._terminals.add(cfg_object) - elif isinstance(cfg_object, Variable): - self._variables.add(cfg_object) - def get_generating_symbols(self) -> Set[CFGObject]: """ Gives the objects which are generating in the CFG @@ -846,12 +836,13 @@ def get_words(self, max_length: int = -1) -> Iterable[List[Terminal]]: for production in productions: body = production.body if len(body) == 1 and isinstance(body[0], Terminal): + word = [body[0]] if len(gen_d[production.head]) == 1: gen_d[production.head].append([]) - if [body[0]] not in gen_d[production.head][-1]: - gen_d[production.head][-1].append([body[0]]) + if word not in gen_d[production.head][-1]: + gen_d[production.head][-1].append(word) if production.head == cfg.start_symbol: - yield [body[0]] + yield word # Complete what is missing current_length = 2 total_no_modification = 0 diff --git a/pyformlang/cfg/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py index 79b236f..35d33fc 100644 --- a/pyformlang/cfg/cfg_variable_converter.py +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -2,23 +2,23 @@ from typing import Dict, List, AbstractSet, Tuple, Optional, Hashable -from ..objects.cfg_objects import Variable, CFGConvertible +from ..objects.cfg_objects import Variable, CFGObjectConvertible class CFGVariableConverter: """A CFG Variable Converter""" def __init__(self, - states: AbstractSet[CFGConvertible], - stack_symbols: AbstractSet[CFGConvertible]) -> None: + states: AbstractSet[CFGObjectConvertible], + stack_symbols: AbstractSet[CFGObjectConvertible]) -> None: self._counter = 0 - self._inverse_states_d: Dict[CFGConvertible, int] = {} + self._inverse_states_d: Dict[CFGObjectConvertible, int] = {} self._counter_state = 0 for self._counter_state, state in enumerate(states): self._inverse_states_d[state] = self._counter_state state.index_cfg_converter = self._counter_state self._counter_state += 1 - self._inverse_stack_symbol_d: Dict[CFGConvertible, int] = {} + self._inverse_stack_symbol_d: Dict[CFGObjectConvertible, int] = {} self._counter_symbol = 0 for self._counter_symbol, symbol in enumerate(stack_symbols): self._inverse_stack_symbol_d[symbol] = self._counter_symbol @@ -29,7 +29,7 @@ def __init__(self, for _ in range(len(stack_symbols))] for _ in range(len(states))] - def _get_state_index(self, state: CFGConvertible) -> int: + def _get_state_index(self, state: CFGObjectConvertible) -> int: """Get the state index""" if state.index_cfg_converter is None: if state not in self._inverse_states_d: @@ -38,7 +38,7 @@ def _get_state_index(self, state: CFGConvertible) -> int: state.index_cfg_converter = self._inverse_states_d[state] return state.index_cfg_converter - def _get_symbol_index(self, symbol: CFGConvertible) -> int: + def _get_symbol_index(self, symbol: CFGObjectConvertible) -> int: """Get the symbol index""" if symbol.index_cfg_converter is None: if symbol not in self._inverse_stack_symbol_d: @@ -48,9 +48,9 @@ def _get_symbol_index(self, symbol: CFGConvertible) -> int: return symbol.index_cfg_converter def to_cfg_combined_variable(self, - state0: CFGConvertible, - stack_symbol: CFGConvertible, - state1: CFGConvertible) -> Variable: + state0: CFGObjectConvertible, + stack_symbol: CFGObjectConvertible, + state1: CFGObjectConvertible) -> Variable: """ Conversion used in the to_pda method """ i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -74,9 +74,9 @@ def _create_new_variable(self, return temp def set_valid(self, - state0: CFGConvertible, - stack_symbol: CFGConvertible, - state1: CFGConvertible) -> None: + state0: CFGObjectConvertible, + stack_symbol: CFGObjectConvertible, + state1: CFGObjectConvertible) -> None: """Set valid""" i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -84,9 +84,9 @@ def set_valid(self, self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) def is_valid_and_get(self, - state0: CFGConvertible, - stack_symbol: CFGConvertible, - state1: CFGConvertible) -> Optional[Variable]: + state0: CFGObjectConvertible, + stack_symbol: CFGObjectConvertible, + state1: CFGObjectConvertible) -> Optional[Variable]: """Check if valid and get""" i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) @@ -102,9 +102,9 @@ def is_valid_and_get(self, return current[1] def _get_indexes(self, - stack_symbol: CFGConvertible, - state0: CFGConvertible, - state1: CFGConvertible) \ + stack_symbol: CFGObjectConvertible, + state0: CFGObjectConvertible, + state1: CFGObjectConvertible) \ -> Tuple[int, int, int]: i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) diff --git a/pyformlang/cfg/formal_grammar.py b/pyformlang/cfg/formal_grammar.py index 3115a14..75300d8 100644 --- a/pyformlang/cfg/formal_grammar.py +++ b/pyformlang/cfg/formal_grammar.py @@ -67,11 +67,21 @@ def start_symbol(self) -> Optional[Variable]: """ return self._start_symbol - @start_symbol.setter - def start_symbol(self, symbol: Hashable) -> None: - """ Sets the start symbol of the grammar """ - self._start_symbol = to_variable(symbol) \ - if symbol is not None else None + def add_production(self, production: Production) -> None: + """ Adds the given production to the grammar """ + self.variables.update(production.variables) + self.terminals.update(production.terminals) + self.productions.add(production) + + def add_start_symbol(self, symbol: Hashable) -> None: + """ Adds the start symbol to the grammar """ + symbol = to_variable(symbol) + self.variables.add(symbol) + self._start_symbol = symbol + + def remove_start_symbol(self) -> None: + """ Removes the start symbol from the grammar """ + self._start_symbol = None @abstractmethod def copy(self: GrammarT) -> GrammarT: diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index c38d66a..dfb11e6 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -847,6 +847,31 @@ def test_copy(self): assert cfg.start_symbol == cfg_copy.start_symbol assert cfg is not cfg_copy + def test_add_production(self): + text_example = get_example_text_duplicate() + cfg = CFG.from_text(text_example) + assert Epsilon() not in cfg.terminals + production = Production(Variable("K"), + [Epsilon(), Terminal("a"), Variable("B")]) + cfg.add_production(production) + assert production in cfg.productions + assert "K" in cfg.variables + assert "a" in cfg.terminals + assert "B" in cfg.variables + assert Epsilon() not in cfg.terminals + + def test_start_symbol(self): + cfg = CFG() + assert not cfg.variables + assert not cfg.start_symbol + cfg.add_start_symbol("S") + assert cfg.start_symbol == "S" + assert "S" in cfg.variables + cfg.remove_start_symbol() + assert not cfg.start_symbol + cfg.remove_start_symbol() + assert not cfg.start_symbol + def get_example_text_duplicate(): """ Duplicate text """ diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 9ae201f..7dd5add 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -3,7 +3,7 @@ """ from pyformlang.finite_automaton import Epsilon -from pyformlang.finite_automaton import Symbol +from pyformlang.finite_automaton import State, Symbol class TestEpsilon: @@ -18,3 +18,5 @@ def test_epsilon(self): assert eps0 != symb assert "epsilon" == Epsilon() assert Epsilon() == "ɛ" + assert Symbol("ɛ") != Epsilon() + assert Epsilon() != State("epsilon") diff --git a/pyformlang/objects/base_epsilon.py b/pyformlang/objects/base_epsilon.py index a245a92..1d5aa99 100644 --- a/pyformlang/objects/base_epsilon.py +++ b/pyformlang/objects/base_epsilon.py @@ -21,7 +21,8 @@ def __init__(self) -> None: super().__init__("epsilon") def __eq__(self, other: Any) -> bool: - return isinstance(other, BaseEpsilon) or other in EPSILON_SYMBOLS + return isinstance(other, BaseEpsilon) \ + or not isinstance(other, FormalObject) and other in EPSILON_SYMBOLS def __hash__(self) -> int: return super().__hash__() diff --git a/pyformlang/objects/cfg_objects/__init__.py b/pyformlang/objects/cfg_objects/__init__.py index 11c3d1b..726db30 100644 --- a/pyformlang/objects/cfg_objects/__init__.py +++ b/pyformlang/objects/cfg_objects/__init__.py @@ -5,7 +5,7 @@ from .terminal import Terminal from .epsilon import Epsilon from .production import Production -from .cfg_convertible import CFGConvertible +from .cfg_object_convertible import CFGObjectConvertible __all__ = ["CFGObject", @@ -13,4 +13,4 @@ "Terminal", "Epsilon", "Production", - "CFGConvertible"] + "CFGObjectConvertible"] diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index 49466dd..1e27841 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -2,11 +2,10 @@ from abc import abstractmethod -from .cfg_convertible import CFGConvertible -from ..formal_object import FormalObject +from .cfg_object_convertible import CFGObjectConvertible -class CFGObject(CFGConvertible, FormalObject): +class CFGObject(CFGObjectConvertible): """ An object in a CFG Parameters diff --git a/pyformlang/objects/cfg_objects/cfg_convertible.py b/pyformlang/objects/cfg_objects/cfg_object_convertible.py similarity index 62% rename from pyformlang/objects/cfg_objects/cfg_convertible.py rename to pyformlang/objects/cfg_objects/cfg_object_convertible.py index 70daed2..dbd6b8e 100644 --- a/pyformlang/objects/cfg_objects/cfg_convertible.py +++ b/pyformlang/objects/cfg_objects/cfg_object_convertible.py @@ -1,11 +1,18 @@ """ Interface representing the ability of conversion to cfg object """ from typing import Optional, Any +from abc import abstractmethod +from ..formal_object import FormalObject -class CFGConvertible: + +class CFGObjectConvertible(FormalObject): """ Interface representing the ability of conversion to cfg object """ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.index_cfg_converter: Optional[int] = None + + @abstractmethod + def __repr__(self) -> str: + raise NotImplementedError diff --git a/pyformlang/objects/cfg_objects/production.py b/pyformlang/objects/cfg_objects/production.py index 91ac63e..5e9bd2e 100644 --- a/pyformlang/objects/cfg_objects/production.py +++ b/pyformlang/objects/cfg_objects/production.py @@ -43,14 +43,16 @@ def body(self) -> List[CFGObject]: return self._body @property - def body_variables(self) -> Set[Variable]: - """Gets variables of body of the production""" - return {object for object in self.body if isinstance(object, Variable)} + def variables(self) -> Set[Variable]: + """Gets variables used in the production""" + return {self.head} | {object for object in self.body + if isinstance(object, Variable)} @property - def body_terminals(self) -> Set[Terminal]: - """Gets terminals of body of the production""" - return {object for object in self.body if isinstance(object, Terminal)} + def terminals(self) -> Set[Terminal]: + """Gets terminals used in the production""" + return {object for object in self.body + if isinstance(object, Terminal) and object != Epsilon()} def __eq__(self, other: Any) -> bool: if not isinstance(other, Production): diff --git a/pyformlang/objects/cfg_objects/utils.py b/pyformlang/objects/cfg_objects/utils.py index 1c26336..76dbb96 100644 --- a/pyformlang/objects/cfg_objects/utils.py +++ b/pyformlang/objects/cfg_objects/utils.py @@ -5,7 +5,6 @@ from .variable import Variable from .terminal import Terminal from .epsilon import Epsilon -from ..base_epsilon import EPSILON_SYMBOLS def to_variable(given: Hashable) -> Variable: @@ -17,8 +16,8 @@ def to_variable(given: Hashable) -> Variable: def to_terminal(given: Hashable) -> Terminal: """ Transformation into a terminal """ + if given == Epsilon(): + return Epsilon() if isinstance(given, Terminal): return given - if given in EPSILON_SYMBOLS: - return Epsilon() return Terminal(given) diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index 6939abb..ad22c2e 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -5,11 +5,11 @@ from typing import Any from .finite_automaton_object import FiniteAutomatonObject -from ..cfg_objects import CFGConvertible +from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class State(CFGConvertible, FiniteAutomatonObject): +class State(CFGObjectConvertible, FiniteAutomatonObject): """ A state in a finite automaton Parameters diff --git a/pyformlang/objects/finite_automaton_objects/utils.py b/pyformlang/objects/finite_automaton_objects/utils.py index 4ad3655..462596e 100644 --- a/pyformlang/objects/finite_automaton_objects/utils.py +++ b/pyformlang/objects/finite_automaton_objects/utils.py @@ -5,7 +5,6 @@ from .state import State from .symbol import Symbol from .epsilon import Epsilon -from ..base_epsilon import EPSILON_SYMBOLS def to_state(given: Hashable) -> State: @@ -29,8 +28,8 @@ def to_symbol(given: Hashable) -> Symbol: given : any What we want to transform """ + if given == Epsilon(): + return Epsilon() if isinstance(given, Symbol): return given - if given in EPSILON_SYMBOLS: - return Epsilon() return Symbol(given) diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 7249ac7..156e49a 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -3,11 +3,11 @@ from typing import Any from .symbol import Symbol -from ..cfg_objects import CFGConvertible +from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class StackSymbol(CFGConvertible, Symbol): +class StackSymbol(CFGObjectConvertible, Symbol): """ A StackSymbol in a pushdown automaton Parameters diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 4d6b595..2a8034b 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -3,11 +3,11 @@ from typing import Any from .pda_object import PDAObject -from ..cfg_objects import CFGConvertible +from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class State(CFGConvertible, PDAObject): +class State(CFGObjectConvertible, PDAObject): """ A State in a pushdown automaton Parameters diff --git a/pyformlang/objects/pda_objects/utils.py b/pyformlang/objects/pda_objects/utils.py index 862c871..6fc0f37 100644 --- a/pyformlang/objects/pda_objects/utils.py +++ b/pyformlang/objects/pda_objects/utils.py @@ -6,7 +6,6 @@ from .symbol import Symbol from .stack_symbol import StackSymbol from .epsilon import Epsilon -from ..base_epsilon import EPSILON_SYMBOLS def to_state(given: Hashable) -> State: @@ -18,17 +17,17 @@ def to_state(given: Hashable) -> State: def to_symbol(given: Hashable) -> Symbol: """ Convert to a symbol """ + if given == Epsilon(): + return Epsilon() if isinstance(given, Symbol): return given - if given in EPSILON_SYMBOLS: - return Epsilon() return Symbol(given) def to_stack_symbol(given: Hashable) -> StackSymbol: """ Convert to a stack symbol """ + if given == Epsilon(): + return Epsilon() if isinstance(given, StackSymbol): return given - if given in EPSILON_SYMBOLS: - return Epsilon() return StackSymbol(given) diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 4729f80..e4e0640 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -4,7 +4,7 @@ from os import path from pyformlang.pda import PDA, State, StackSymbol, Symbol, Epsilon -from pyformlang.cfg import Terminal +from pyformlang.cfg import Terminal, Epsilon as CFGEpsilon from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State as FAState, Symbol as FASymbol from pyformlang.regular_expression import Regex @@ -379,7 +379,8 @@ def test_object_eq(self): assert StackSymbol("c") == StackSymbol("c") assert State("a") == "a" assert "C" == Symbol("C") - assert Epsilon() == Symbol("epsilon") + assert Epsilon() != Symbol("epsilon") + assert Epsilon() == CFGEpsilon() assert "epsilon" == Epsilon() assert Epsilon() == "ɛ" assert State("A") != State("B") From a65396652dccb5a555b1d981e973cef2240c75a7 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sat, 7 Dec 2024 16:04:50 +0300 Subject: [PATCH 28/30] correct fcfg production properties and adding --- pyformlang/fcfg/fcfg.py | 54 +++++++++++++++++++----------- pyformlang/fcfg/tests/test_fcfg.py | 52 +++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 21 deletions(-) diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index 5b502a7..a2c82c3 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -59,29 +59,26 @@ class FCFG(CFG): """ def __init__(self, - variables: AbstractSet[Variable] = None, - terminals: AbstractSet[Terminal] = None, - start_symbol: Variable = None, - productions: Iterable[FeatureProduction] = None) -> None: + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, + productions: Iterable[Production] = None) -> None: super().__init__(variables, terminals, start_symbol, productions) self._productions: Set[FeatureProduction] - def __predictor(self, - state: State, - chart: List[List[State]], - processed: StateProcessed) -> None: - # We have an incomplete state and the next token is a variable - # We must ask to process the variable with another rule - end_idx = state.positions[1] - next_var = state.production.body[state.positions[2]] - for production in self._productions: - if production.head == next_var: - new_state = State(production, - (end_idx, end_idx, 0), - production.features, - ParseTree(production.head)) - if processed.add(end_idx, new_state): - chart[end_idx].append(new_state) + @property + def feature_productions(self) -> Set[FeatureProduction]: + """ Gets the feature productions of the grammar """ + return self._productions + + def add_production(self, production: Production) -> None: + """ Adds given production to the grammar """ + if not isinstance(production, FeatureProduction): + production = FeatureProduction(production.head, + production.body, + FeatureStructure(), + [FeatureStructure()]) + super().add_production(production) def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar @@ -212,6 +209,23 @@ def _read_line(cls, production = FeatureProduction(head, body, head_fs, all_body_fs) productions.add(production) + def __predictor(self, + state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: + # We have an incomplete state and the next token is a variable + # We must ask to process the variable with another rule + end_idx = state.positions[1] + next_var = state.production.body[state.positions[2]] + for production in self._productions: + if production.head == next_var: + new_state = State(production, + (end_idx, end_idx, 0), + production.features, + ParseTree(production.head)) + if processed.add(end_idx, new_state): + chart[end_idx].append(new_state) + def _split_text_conditions(head_text: str) -> Tuple[str, str]: if head_text[-1] != "]": diff --git a/pyformlang/fcfg/tests/test_fcfg.py b/pyformlang/fcfg/tests/test_fcfg.py index ed5c8d9..7163f35 100644 --- a/pyformlang/fcfg/tests/test_fcfg.py +++ b/pyformlang/fcfg/tests/test_fcfg.py @@ -1,6 +1,7 @@ """Test a FCFG""" -from pyformlang.cfg import Variable, Terminal +from pyformlang.cfg import Variable, Terminal, Production +from pyformlang.cfg import DerivationDoesNotExist from pyformlang.cfg.parse_tree import ParseTree from pyformlang.cfg.llone_parser import NotParsableException from pyformlang.fcfg.fcfg import FCFG @@ -32,6 +33,30 @@ def fcfg_text() -> str: class TestFCFG: """Test a FCFG""" + def test_creation(self): + """ Tests creation of FCFG """ + variable0 = Variable(0) + terminal0 = Terminal("a") + prod0 = Production(variable0, [terminal0, Terminal("A"), Variable(1)]) + fcfg = FCFG({variable0}, {terminal0}, variable0, {prod0}) + assert fcfg is not None + assert len(fcfg.variables) == 2 + assert len(fcfg.terminals) == 2 + assert len(fcfg.productions) == 1 + assert len(fcfg.feature_productions) == 1 + assert fcfg.productions == fcfg.feature_productions + assert fcfg.is_empty() + assert all(isinstance(prod, FeatureProduction) + for prod in fcfg.productions) + + fcfg = FCFG() + assert fcfg is not None + assert len(fcfg.variables) == 0 + assert len(fcfg.terminals) == 0 + assert len(fcfg.productions) == 0 + assert len(fcfg.feature_productions) == 0 + assert fcfg.is_empty() + def test_contains(self): """Test containment""" # 1st: S -> NP VP @@ -231,3 +256,28 @@ def test_copy(self, fcfg_text: str): assert fcfg.productions == fcfg_copy.productions assert fcfg.start_symbol == fcfg_copy.start_symbol assert fcfg is not fcfg_copy + + def test_get_leftmost_derivation(self): + ter_a = Terminal("a") + ter_b = Terminal("b") + var_s = Variable("S") + var_a = Variable("A") + var_b = Variable("B") + var_c = Variable("C") + productions = [Production(var_s, [var_c, var_b]), + Production(var_c, [var_a, var_a]), + Production(var_a, [ter_a]), + Production(var_b, [ter_b]) + ] + fcfg = FCFG(productions=productions, start_symbol=var_s) + parse_tree = fcfg.get_cnf_parse_tree([ter_a, ter_a, ter_b]) + derivation = parse_tree.get_leftmost_derivation() + assert derivation == \ + [[var_s], + [var_c, var_b], + [var_a, var_a, var_b], + [ter_a, var_a, var_b], + [ter_a, ter_a, var_b], + [ter_a, ter_a, ter_b]] + with pytest.raises(DerivationDoesNotExist): + fcfg.get_cnf_parse_tree([]) From f4189135dc323243d88214d94240b7522ee21b26 Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Dec 2024 17:13:29 +0300 Subject: [PATCH 29/30] unify terminal representations --- pyformlang/cfg/cfg_variable_converter.py | 55 ++++++++++--------- pyformlang/cfg/tests/test_terminal.py | 2 +- pyformlang/objects/base_epsilon.py | 3 +- pyformlang/objects/base_terminal.py | 24 ++++++++ pyformlang/objects/cfg_objects/__init__.py | 4 +- pyformlang/objects/cfg_objects/cfg_object.py | 4 +- .../cfg_objects/cfg_object_convertible.py | 18 ------ pyformlang/objects/cfg_objects/terminal.py | 16 +----- .../objects/finite_automaton_objects/state.py | 3 +- .../finite_automaton_objects/symbol.py | 16 +----- pyformlang/objects/formal_object.py | 3 +- .../objects/pda_objects/stack_symbol.py | 3 +- pyformlang/objects/pda_objects/state.py | 3 +- pyformlang/objects/pda_objects/symbol.py | 16 +----- pyformlang/pda/tests/test_pda.py | 2 +- 15 files changed, 70 insertions(+), 102 deletions(-) create mode 100644 pyformlang/objects/base_terminal.py delete mode 100644 pyformlang/objects/cfg_objects/cfg_object_convertible.py diff --git a/pyformlang/cfg/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py index 35d33fc..bd8dd10 100644 --- a/pyformlang/cfg/cfg_variable_converter.py +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -2,55 +2,56 @@ from typing import Dict, List, AbstractSet, Tuple, Optional, Hashable -from ..objects.cfg_objects import Variable, CFGObjectConvertible +from ..objects.formal_object import FormalObject +from ..objects.cfg_objects import Variable class CFGVariableConverter: """A CFG Variable Converter""" def __init__(self, - states: AbstractSet[CFGObjectConvertible], - stack_symbols: AbstractSet[CFGObjectConvertible]) -> None: + states: AbstractSet[FormalObject], + stack_symbols: AbstractSet[FormalObject]) -> None: self._counter = 0 - self._inverse_states_d: Dict[CFGObjectConvertible, int] = {} + self._inverse_states_d: Dict[FormalObject, int] = {} self._counter_state = 0 for self._counter_state, state in enumerate(states): self._inverse_states_d[state] = self._counter_state - state.index_cfg_converter = self._counter_state + state.index = self._counter_state self._counter_state += 1 - self._inverse_stack_symbol_d: Dict[CFGObjectConvertible, int] = {} + self._inverse_stack_symbol_d: Dict[FormalObject, int] = {} self._counter_symbol = 0 for self._counter_symbol, symbol in enumerate(stack_symbols): self._inverse_stack_symbol_d[symbol] = self._counter_symbol - symbol.index_cfg_converter = self._counter_symbol + symbol.index = self._counter_symbol self._counter_symbol += 1 self._conversions: List[List[List[Tuple[bool, Optional[Variable]]]]] \ = [[[(False, None) for _ in range(len(states))] for _ in range(len(stack_symbols))] for _ in range(len(states))] - def _get_state_index(self, state: CFGObjectConvertible) -> int: + def _get_state_index(self, state: FormalObject) -> int: """Get the state index""" - if state.index_cfg_converter is None: + if state.index is None: if state not in self._inverse_states_d: self._inverse_states_d[state] = self._counter_state self._counter_state += 1 - state.index_cfg_converter = self._inverse_states_d[state] - return state.index_cfg_converter + state.index = self._inverse_states_d[state] + return state.index - def _get_symbol_index(self, symbol: CFGObjectConvertible) -> int: + def _get_symbol_index(self, symbol: FormalObject) -> int: """Get the symbol index""" - if symbol.index_cfg_converter is None: + if symbol.index is None: if symbol not in self._inverse_stack_symbol_d: self._inverse_stack_symbol_d[symbol] = self._counter_symbol self._counter_symbol += 1 - symbol.index_cfg_converter = self._inverse_stack_symbol_d[symbol] - return symbol.index_cfg_converter + symbol.index = self._inverse_stack_symbol_d[symbol] + return symbol.index def to_cfg_combined_variable(self, - state0: CFGObjectConvertible, - stack_symbol: CFGObjectConvertible, - state1: CFGObjectConvertible) -> Variable: + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> Variable: """ Conversion used in the to_pda method """ i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -74,9 +75,9 @@ def _create_new_variable(self, return temp def set_valid(self, - state0: CFGObjectConvertible, - stack_symbol: CFGObjectConvertible, - state1: CFGObjectConvertible) -> None: + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> None: """Set valid""" i_stack_symbol, i_state0, i_state1 = self._get_indexes( stack_symbol, state0, state1) @@ -84,9 +85,9 @@ def set_valid(self, self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) def is_valid_and_get(self, - state0: CFGObjectConvertible, - stack_symbol: CFGObjectConvertible, - state1: CFGObjectConvertible) -> Optional[Variable]: + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> Optional[Variable]: """Check if valid and get""" i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) @@ -102,9 +103,9 @@ def is_valid_and_get(self, return current[1] def _get_indexes(self, - stack_symbol: CFGObjectConvertible, - state0: CFGObjectConvertible, - state1: CFGObjectConvertible) \ + stack_symbol: FormalObject, + state0: FormalObject, + state1: FormalObject) \ -> Tuple[int, int, int]: i_state0 = self._get_state_index(state0) i_stack_symbol = self._get_symbol_index(stack_symbol) diff --git a/pyformlang/cfg/tests/test_terminal.py b/pyformlang/cfg/tests/test_terminal.py index ee44461..7cd9a0e 100644 --- a/pyformlang/cfg/tests/test_terminal.py +++ b/pyformlang/cfg/tests/test_terminal.py @@ -34,5 +34,5 @@ def test_eq(self): assert "A" == Terminal("A") assert Variable(1) == 1 assert Epsilon() == FAEpsilon() - assert Terminal("ABC") != Symbol("ABC") + assert Terminal("ABC") == Symbol("ABC") assert State("S") != Variable("S") diff --git a/pyformlang/objects/base_epsilon.py b/pyformlang/objects/base_epsilon.py index 1d5aa99..926c859 100644 --- a/pyformlang/objects/base_epsilon.py +++ b/pyformlang/objects/base_epsilon.py @@ -3,11 +3,12 @@ from typing import Any from .formal_object import FormalObject +from .base_terminal import BaseTerminal EPSILON_SYMBOLS = ["epsilon", "ɛ"] -class BaseEpsilon(FormalObject): +class BaseEpsilon(BaseTerminal): """ An epsilon transition Examples diff --git a/pyformlang/objects/base_terminal.py b/pyformlang/objects/base_terminal.py new file mode 100644 index 0000000..3f5d48b --- /dev/null +++ b/pyformlang/objects/base_terminal.py @@ -0,0 +1,24 @@ +""" General terminal representation """ + +from typing import Any +from abc import abstractmethod + +from .formal_object import FormalObject + + +class BaseTerminal(FormalObject): + """ General terminal representation """ + + def __eq__(self, other: Any) -> bool: + if isinstance(other, BaseTerminal): + return self.value == other.value + if isinstance(other, FormalObject): + return False + return self.value == other + + def __hash__(self) -> int: + return super().__hash__() + + @abstractmethod + def __repr__(self): + raise NotImplementedError diff --git a/pyformlang/objects/cfg_objects/__init__.py b/pyformlang/objects/cfg_objects/__init__.py index 726db30..a999974 100644 --- a/pyformlang/objects/cfg_objects/__init__.py +++ b/pyformlang/objects/cfg_objects/__init__.py @@ -5,12 +5,10 @@ from .terminal import Terminal from .epsilon import Epsilon from .production import Production -from .cfg_object_convertible import CFGObjectConvertible __all__ = ["CFGObject", "Variable", "Terminal", "Epsilon", - "Production", - "CFGObjectConvertible"] + "Production"] diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py index 1e27841..e347e1e 100644 --- a/pyformlang/objects/cfg_objects/cfg_object.py +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -2,10 +2,10 @@ from abc import abstractmethod -from .cfg_object_convertible import CFGObjectConvertible +from ..formal_object import FormalObject -class CFGObject(CFGObjectConvertible): +class CFGObject(FormalObject): """ An object in a CFG Parameters diff --git a/pyformlang/objects/cfg_objects/cfg_object_convertible.py b/pyformlang/objects/cfg_objects/cfg_object_convertible.py deleted file mode 100644 index dbd6b8e..0000000 --- a/pyformlang/objects/cfg_objects/cfg_object_convertible.py +++ /dev/null @@ -1,18 +0,0 @@ -""" Interface representing the ability of conversion to cfg object """ - -from typing import Optional, Any -from abc import abstractmethod - -from ..formal_object import FormalObject - - -class CFGObjectConvertible(FormalObject): - """ Interface representing the ability of conversion to cfg object """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - self.index_cfg_converter: Optional[int] = None - - @abstractmethod - def __repr__(self) -> str: - raise NotImplementedError diff --git a/pyformlang/objects/cfg_objects/terminal.py b/pyformlang/objects/cfg_objects/terminal.py index 9a8a595..68f4852 100644 --- a/pyformlang/objects/cfg_objects/terminal.py +++ b/pyformlang/objects/cfg_objects/terminal.py @@ -1,24 +1,12 @@ """ A terminal in a CFG """ -from typing import Any - from .cfg_object import CFGObject -from ..formal_object import FormalObject +from ..base_terminal import BaseTerminal -class Terminal(CFGObject): +class Terminal(BaseTerminal, CFGObject): """ A terminal in a CFG """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, Terminal): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"Terminal({self})" diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index ad22c2e..d8fb45c 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -5,11 +5,10 @@ from typing import Any from .finite_automaton_object import FiniteAutomatonObject -from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class State(CFGObjectConvertible, FiniteAutomatonObject): +class State(FiniteAutomatonObject): """ A state in a finite automaton Parameters diff --git a/pyformlang/objects/finite_automaton_objects/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py index 5db815a..090692d 100644 --- a/pyformlang/objects/finite_automaton_objects/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -2,13 +2,11 @@ This module describe a symbol in a finite automaton. """ -from typing import Any - from .finite_automaton_object import FiniteAutomatonObject -from ..formal_object import FormalObject +from ..base_terminal import BaseTerminal -class Symbol(FiniteAutomatonObject): +class Symbol(BaseTerminal, FiniteAutomatonObject): """ A symbol in a finite automaton Parameters @@ -23,15 +21,5 @@ class Symbol(FiniteAutomatonObject): A """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, Symbol): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"Symbol({self})" diff --git a/pyformlang/objects/formal_object.py b/pyformlang/objects/formal_object.py index 3ec8cc7..4af8560 100644 --- a/pyformlang/objects/formal_object.py +++ b/pyformlang/objects/formal_object.py @@ -1,6 +1,6 @@ """ General object representation """ -from typing import Hashable, Any +from typing import Hashable, Optional, Any from abc import abstractmethod @@ -10,6 +10,7 @@ class FormalObject: def __init__(self, value: Hashable) -> None: self._value = value self._hash = None + self.index: Optional[int] = None @property def value(self) -> Hashable: diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 156e49a..0fb4c91 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -3,11 +3,10 @@ from typing import Any from .symbol import Symbol -from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class StackSymbol(CFGObjectConvertible, Symbol): +class StackSymbol(Symbol): """ A StackSymbol in a pushdown automaton Parameters diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index 2a8034b..b174354 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -3,11 +3,10 @@ from typing import Any from .pda_object import PDAObject -from ..cfg_objects import CFGObjectConvertible from ..formal_object import FormalObject -class State(CFGObjectConvertible, PDAObject): +class State(PDAObject): """ A State in a pushdown automaton Parameters diff --git a/pyformlang/objects/pda_objects/symbol.py b/pyformlang/objects/pda_objects/symbol.py index 9b853d8..78843de 100644 --- a/pyformlang/objects/pda_objects/symbol.py +++ b/pyformlang/objects/pda_objects/symbol.py @@ -1,12 +1,10 @@ """ A Symbol in a pushdown automaton """ -from typing import Any - from .pda_object import PDAObject -from ..formal_object import FormalObject +from ..base_terminal import BaseTerminal -class Symbol(PDAObject): +class Symbol(BaseTerminal, PDAObject): """ A Symbol in a pushdown automaton Parameters @@ -16,15 +14,5 @@ class Symbol(PDAObject): """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, Symbol): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"Symbol({self})" diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index e4e0640..e0f7f17 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -388,7 +388,7 @@ def test_object_eq(self): assert Symbol("A") != StackSymbol("A") assert StackSymbol("ABC") != Symbol("ABC") assert State("ABC") != FAState("ABC") - assert Symbol("s") != Terminal("s") + assert Symbol("s") == Terminal("s") def test_contains(self, pda_example: PDA): """ Tests the transition containment checks """ From 770cecf040c0fcb99aa6f2348b47cf9e53c3473b Mon Sep 17 00:00:00 2001 From: bygu4 Date: Sun, 29 Dec 2024 18:02:45 +0300 Subject: [PATCH 30/30] correct formal objects equality --- pyformlang/objects/base_epsilon.py | 3 +++ pyformlang/objects/base_terminal.py | 14 +++----------- pyformlang/objects/cfg_objects/variable.py | 14 +++----------- .../objects/finite_automaton_objects/state.py | 15 +++------------ pyformlang/objects/formal_object.py | 9 +++++++-- pyformlang/objects/pda_objects/stack_symbol.py | 15 +++------------ pyformlang/objects/pda_objects/state.py | 15 +++------------ pyformlang/pda/tests/test_pda.py | 2 ++ 8 files changed, 27 insertions(+), 60 deletions(-) diff --git a/pyformlang/objects/base_epsilon.py b/pyformlang/objects/base_epsilon.py index 926c859..17c06b2 100644 --- a/pyformlang/objects/base_epsilon.py +++ b/pyformlang/objects/base_epsilon.py @@ -30,3 +30,6 @@ def __hash__(self) -> int: def __repr__(self) -> str: return "epsilon" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, BaseEpsilon) diff --git a/pyformlang/objects/base_terminal.py b/pyformlang/objects/base_terminal.py index 3f5d48b..1b37802 100644 --- a/pyformlang/objects/base_terminal.py +++ b/pyformlang/objects/base_terminal.py @@ -1,6 +1,5 @@ """ General terminal representation """ -from typing import Any from abc import abstractmethod from .formal_object import FormalObject @@ -9,16 +8,9 @@ class BaseTerminal(FormalObject): """ General terminal representation """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, BaseTerminal): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - @abstractmethod def __repr__(self): raise NotImplementedError + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, BaseTerminal) and self.value == other.value diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py index 46a031a..85c78f5 100644 --- a/pyformlang/objects/cfg_objects/variable.py +++ b/pyformlang/objects/cfg_objects/variable.py @@ -1,6 +1,5 @@ """ A variable in a CFG """ -from typing import Any from string import ascii_uppercase from .cfg_object import CFGObject @@ -16,16 +15,6 @@ class Variable(CFGObject): The value of the variable """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, Variable): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"Variable({self})" @@ -34,3 +23,6 @@ def to_text(self) -> str: if text and text[0] not in ascii_uppercase: return '"VAR:' + text + '"' return text + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, Variable) and self.value == other.value diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py index d8fb45c..41244c3 100644 --- a/pyformlang/objects/finite_automaton_objects/state.py +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -2,8 +2,6 @@ Representation of a state in a finite state automaton """ -from typing import Any - from .finite_automaton_object import FiniteAutomatonObject from ..formal_object import FormalObject @@ -24,15 +22,8 @@ class State(FiniteAutomatonObject): """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, State): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"State({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, State) and self.value == other.value diff --git a/pyformlang/objects/formal_object.py b/pyformlang/objects/formal_object.py index 4af8560..31d88f5 100644 --- a/pyformlang/objects/formal_object.py +++ b/pyformlang/objects/formal_object.py @@ -23,9 +23,10 @@ def value(self) -> Hashable: """ return self._value - @abstractmethod def __eq__(self, other: Any) -> bool: - raise NotImplementedError + if not isinstance(other, FormalObject): + return self.value == other + return self._is_equal_to(other) and other._is_equal_to(self) def __hash__(self) -> int: if self._hash is None: @@ -38,3 +39,7 @@ def __str__(self) -> str: @abstractmethod def __repr__(self) -> str: raise NotImplementedError + + @abstractmethod + def _is_equal_to(self, other: "FormalObject") -> bool: + raise NotImplementedError diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py index 0fb4c91..c22f29e 100644 --- a/pyformlang/objects/pda_objects/stack_symbol.py +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -1,7 +1,5 @@ """ A StackSymbol in a pushdown automaton """ -from typing import Any - from .symbol import Symbol from ..formal_object import FormalObject @@ -16,15 +14,8 @@ class StackSymbol(Symbol): """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, StackSymbol): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"StackSymbol({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, StackSymbol) and self.value == other.value diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py index b174354..8b0a385 100644 --- a/pyformlang/objects/pda_objects/state.py +++ b/pyformlang/objects/pda_objects/state.py @@ -1,7 +1,5 @@ """ A State in a pushdown automaton """ -from typing import Any - from .pda_object import PDAObject from ..formal_object import FormalObject @@ -16,15 +14,8 @@ class State(PDAObject): """ - def __eq__(self, other: Any) -> bool: - if isinstance(other, State): - return self.value == other.value - if isinstance(other, FormalObject): - return False - return self.value == other - - def __hash__(self) -> int: - return super().__hash__() - def __repr__(self) -> str: return f"State({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, State) and self.value == other.value diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index e0f7f17..03173cd 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -389,6 +389,8 @@ def test_object_eq(self): assert StackSymbol("ABC") != Symbol("ABC") assert State("ABC") != FAState("ABC") assert Symbol("s") == Terminal("s") + assert Terminal(1) != StackSymbol(1) + assert StackSymbol(42) != FAState(42) def test_contains(self, pda_example: PDA): """ Tests the transition containment checks """