diff --git a/pyformlang/__init__.py b/pyformlang/__init__.py index dc80d0a..cd1b1cc 100644 --- a/pyformlang/__init__.py +++ b/pyformlang/__init__.py @@ -26,10 +26,21 @@ """ +from . import finite_automaton +from . import regular_expression +from . import cfg +from . import fst +from . import indexed_grammar +from . import pda +from . import rsa +from . import fcfg + + __all__ = ["finite_automaton", "regular_expression", "cfg", "fst", "indexed_grammar", "pda", - "rsa"] + "rsa", + "fcfg"] diff --git a/pyformlang/cfg/__init__.py b/pyformlang/cfg/__init__.py index 0fd4888..7e3e283 100644 --- a/pyformlang/cfg/__init__.py +++ b/pyformlang/cfg/__init__.py @@ -20,16 +20,15 @@ """ -from .variable import Variable -from .terminal import Terminal -from .production import Production -from .cfg import CFG -from .epsilon import Epsilon -from .llone_parser import LLOneParser +from .cfg import CFG, CFGObject, Variable, Terminal, Epsilon, Production +from .parse_tree import ParseTree, DerivationDoesNotExist -__all__ = ["Variable", + +__all__ = ["CFGObject", + "Variable", "Terminal", + "Epsilon", "Production", "CFG", - "Epsilon", - "LLOneParser"] + "ParseTree", + "DerivationDoesNotExist"] diff --git a/pyformlang/cfg/cfg.py b/pyformlang/cfg/cfg.py index 50af3fc..1517440 100644 --- a/pyformlang/cfg/cfg.py +++ b/pyformlang/cfg/cfg.py @@ -1,43 +1,30 @@ """ A context free grammar """ -import string -from copy import deepcopy -from typing import AbstractSet, Iterable, Tuple, Dict, Any, Union - -import networkx as nx - -# pylint: disable=cyclic-import -from pyformlang import pda -from pyformlang.finite_automaton import DeterministicFiniteAutomaton -# pylint: disable=cyclic-import -from pyformlang.pda import cfg_variable_converter as cvc -from .cfg_object import CFGObject -# pylint: disable=cyclic-import -from .cyk_table import CYKTable, DerivationDoesNotExist -from .epsilon import Epsilon -from .pda_object_creator import PDAObjectCreator -from .production import Production -from .terminal import Terminal -from .utils import to_variable, to_terminal -from .utils_cfg import remove_nullable_production, get_productions_d -from .variable import Variable -EPSILON_SYMBOLS = ["epsilon", "$", "ε", "ϵ", "Є"] +from string import ascii_uppercase +from copy import deepcopy +from typing import Dict, List, Set, AbstractSet, Iterable, Tuple, Hashable -SUBS_SUFFIX = "#SUBS#" +from networkx import DiGraph, find_cycle +from networkx.exception import NetworkXNoCycle +from pyformlang.finite_automaton import DeterministicFiniteAutomaton, State -class NotParsableException(Exception): - """When the grammar cannot be parsed (parser not powerful enough)""" +from .formal_grammar import FormalGrammar +from .parse_tree import ParseTree +from .cyk_table import CYKTable +from .cfg_variable_converter import CFGVariableConverter +from .utils import remove_nullable_production, get_productions_d, \ + is_special_text +from ..objects.cfg_objects import CFGObject, \ + Variable, Terminal, Epsilon, Production +from ..objects.cfg_objects.utils import to_variable, to_terminal +EPSILON_SYMBOLS = ["epsilon", "$", "ε", "ϵ", "Є"] -def is_special_text(text): - """ Check if the input is given an explicit type """ - return len(text) > 5 and \ - (text[0:5] == '"VAR:' or text[0:5] == '"TER:') and \ - text[-1] == '"' +SUBS_SUFFIX = "#SUBS#" -class CFG: +class CFG(FormalGrammar): """ A class representing a context free grammar Parameters @@ -55,43 +42,29 @@ class CFG: # pylint: disable=too-many-instance-attributes def __init__(self, - variables: AbstractSet[Union[Variable, str]] = None, - terminals: AbstractSet[Union[Terminal, str]] = None, - start_symbol: Union[Variable, str] = None, - productions: Iterable[Production] = None): + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, + productions: Iterable[Production] = None) -> None: + super().__init__() if variables is not None: variables = {to_variable(x) for x in variables} self._variables = variables or set() - self._variables = set(self._variables) if terminals is not None: terminals = {to_terminal(x) for x in terminals} self._terminals = terminals or set() - self._terminals = set(self._terminals) if start_symbol is not None: start_symbol = to_variable(start_symbol) - self._start_symbol = start_symbol - if start_symbol is not None: self._variables.add(start_symbol) - self._productions = productions or set() - self._productions = self._productions - for production in self._productions: - self.__initialize_production_in_cfg(production) - self._normal_form = None - self._generating_symbols = None - self._nullable_symbols = None - self._impacts = None - self._remaining_lists = None - self._added_impacts = None - - def __initialize_production_in_cfg(self, production): - self._variables.add(production.head) - for cfg_object in production.body: - if isinstance(cfg_object, Terminal): - self._terminals.add(cfg_object) - else: - self._variables.add(cfg_object) - - def get_generating_symbols(self) -> AbstractSet[CFGObject]: + self._start_symbol = start_symbol + self._productions = set() + for production in productions or set(): + self.add_production(production) + self._impacts: Dict[CFGObject, List[Tuple[CFGObject, int]]] = {} + self._remaining_lists: Dict[CFGObject, List[int]] = {} + self._added_impacts: Set[CFGObject] = set() + + def get_generating_symbols(self) -> Set[CFGObject]: """ Gives the objects which are generating in the CFG Returns @@ -99,14 +72,23 @@ def get_generating_symbols(self) -> AbstractSet[CFGObject]: generating_symbols : set of :class:`~pyformlang.cfg.CFGObject` The generating symbols of the CFG """ - if self._generating_symbols is None: - self._generating_symbols = self._get_generating_or_nullable(False) - return self._generating_symbols + return self._get_generating_or_nullable(False) + + def get_nullable_symbols(self) -> Set[CFGObject]: + """ Gives the objects which are nullable in the CFG - def _get_generating_or_nullable(self, nullable=False): + Returns + ---------- + nullable_symbols : set of :class:`~pyformlang.cfg.CFGObject` + The nullable symbols of the CFG + """ + return self._get_generating_or_nullable(True) + + def _get_generating_or_nullable(self, nullable: bool = False) \ + -> Set[CFGObject]: """ Merge of nullable and generating """ - to_process = [Epsilon()] - g_symbols = {Epsilon()} + to_process: List[CFGObject] = [Epsilon()] + g_symbols: Set[CFGObject] = {Epsilon()} self._set_impacts_and_remaining_lists() @@ -138,8 +120,8 @@ def _get_generating_or_nullable(self, nullable=False): g_symbols.remove(Epsilon()) return g_symbols - def _set_impacts_and_remaining_lists(self): - if self._impacts is not None: + def _set_impacts_and_remaining_lists(self) -> None: + if self._impacts: return self._added_impacts = set() self._remaining_lists = {} @@ -157,7 +139,7 @@ def _set_impacts_and_remaining_lists(self): self._impacts.setdefault(symbol, []).append( (head, index_impact)) - def generate_epsilon(self): + def generate_epsilon(self) -> bool: """ Whether the grammar generates epsilon or not Returns @@ -165,8 +147,8 @@ def generate_epsilon(self): generate_epsilon : bool Whether epsilon is generated or not by the CFG """ - generate_epsilon = {Epsilon()} - to_process = [Epsilon()] + generate_epsilon: Set[CFGObject] = {Epsilon()} + to_process: List[CFGObject] = [Epsilon()] self._set_impacts_and_remaining_lists() @@ -193,7 +175,7 @@ def generate_epsilon(self): to_process.append(symbol_impact) return False - def get_reachable_symbols(self) -> AbstractSet[CFGObject]: + def get_reachable_symbols(self) -> Set[CFGObject]: """ Gives the objects which are reachable in the CFG Returns @@ -201,8 +183,10 @@ def get_reachable_symbols(self) -> AbstractSet[CFGObject]: reachable_symbols : set of :class:`~pyformlang.cfg.CFGObject` The reachable symbols of the CFG """ - r_symbols = set() - r_symbols.add(self._start_symbol) + if not self.start_symbol: + return set() + r_symbols: Set[CFGObject] = set() + r_symbols.add(self.start_symbol) reachable_transition_d = {} for production in self._productions: temp = reachable_transition_d.setdefault(production.head, []) @@ -240,18 +224,6 @@ def remove_useless_symbols(self) -> "CFG": new_ter = new_ter.intersection(reachables) return CFG(new_var, new_ter, self._start_symbol, productions) - def get_nullable_symbols(self) -> AbstractSet[CFGObject]: - """ Gives the objects which are nullable in the CFG - - Returns - ---------- - nullable_symbols : set of :class:`~pyformlang.cfg.CFGObject` - The nullable symbols of the CFG - """ - if self._nullable_symbols is None: - self._nullable_symbols = self._get_generating_or_nullable(True) - return self._nullable_symbols - def remove_epsilon(self) -> "CFG": """ Removes the epsilon of a cfg @@ -270,7 +242,7 @@ def remove_epsilon(self) -> "CFG": self._start_symbol, new_productions) - def get_unit_pairs(self) -> AbstractSet[Tuple[Variable, Variable]]: + def get_unit_pairs(self) -> Set[Tuple[Variable, Variable]]: """ Finds all the unit pairs Returns @@ -318,7 +290,7 @@ def eliminate_unit_productions(self) -> "CFG": self._start_symbol, productions) - def _get_productions_with_only_single_terminals(self): + def _get_productions_with_only_single_terminals(self) -> List[Production]: """ Remove the terminals involved in a body of length more than 1 """ term_to_var = {} new_productions = [] @@ -345,7 +317,8 @@ def _get_productions_with_only_single_terminals(self): Production(term_to_var[terminal], [terminal])) return new_productions - def _get_next_free_variable(self, idx, prefix): + def _get_next_free_variable(self, idx: int, prefix: str) \ + -> Tuple[int, Variable]: idx += 1 temp = Variable(prefix + str(idx)) while temp in self._variables: @@ -353,7 +326,8 @@ def _get_next_free_variable(self, idx, prefix): temp = Variable(prefix + str(idx)) return idx, temp - def _decompose_productions(self, productions): + def _decompose_productions(self, productions: Iterable[Production]) \ + -> List[Production]: """ Decompose productions """ idx = 0 new_productions = [] @@ -400,8 +374,6 @@ def to_normal_form(self) -> "CFG": contains the same word as before, except the epsilon word. """ - if self._normal_form is not None: - return self._normal_form nullables = self.get_nullable_symbols() unit_pairs = self.get_unit_pairs() generating = self.get_generating_symbols() @@ -412,67 +384,18 @@ def to_normal_form(self) -> "CFG": len(reachables) != len(self._variables) + len(self._terminals)): if len(self._productions) == 0: - self._normal_form = self return self new_cfg = self.remove_useless_symbols() \ .remove_epsilon() \ .remove_useless_symbols() \ .eliminate_unit_productions() \ .remove_useless_symbols() - cfg = new_cfg.to_normal_form() - self._normal_form = cfg - return cfg + return new_cfg.to_normal_form() # Remove terminals from body new_productions = self._get_productions_with_only_single_terminals() new_productions = self._decompose_productions(new_productions) - cfg = CFG(start_symbol=self._start_symbol, - productions=set(new_productions)) - self._normal_form = cfg - return cfg - - @property - def variables(self) -> AbstractSet[Variable]: - """ Gives the variables - - Returns - ---------- - variables : set of :class:`~pyformlang.cfg.Variable` - The variables of the CFG - """ - return self._variables - - @property - def terminals(self) -> AbstractSet[Terminal]: - """ Gives the terminals - - Returns - ---------- - terminals : set of :class:`~pyformlang.cfg.Terminal` - The terminals of the CFG - """ - return self._terminals - - @property - def productions(self) -> AbstractSet[Production]: - """ Gives the productions - - Returns - ---------- - productions : set of :class:`~pyformlang.cfg.Production` - The productions of the CFG - """ - return self._productions - - @property - def start_symbol(self) -> Variable: - """ Gives the start symbol - - Returns - ---------- - start_variable : :class:`~pyformlang.cfg.Variable` - The start symbol of the CFG - """ - return self._start_symbol + return CFG(start_symbol=self._start_symbol, + productions=set(new_productions)) def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": """ Substitutes CFG to terminals in the current CFG @@ -492,7 +415,7 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": new_variables_d = {} new_vars = set() for variable in self._variables: - temp = Variable(variable.value + SUBS_SUFFIX + str(idx)) + temp = Variable(str(variable) + SUBS_SUFFIX + str(idx)) new_variables_d[variable] = temp new_vars.add(temp) idx += 1 @@ -502,18 +425,18 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": for ter, cfg in substitution.items(): new_variables_d_local = {} for variable in cfg.variables: - temp = Variable(variable.value + SUBS_SUFFIX + str(idx)) + temp = Variable(str(variable) + SUBS_SUFFIX + str(idx)) new_variables_d_local[variable] = temp new_vars.add(temp) idx += 1 # Add rules of the new cfg for production in cfg.productions: body = [] - for cfgobj in production.body: - if cfgobj in new_variables_d_local: - body.append(new_variables_d_local[cfgobj]) + for cfg_obj in production.body: + if cfg_obj in new_variables_d_local: + body.append(new_variables_d_local[cfg_obj]) else: - body.append(cfgobj) + body.append(cfg_obj) productions.append( Production(new_variables_d_local[production.head], body)) @@ -521,13 +444,13 @@ def substitute(self, substitution: Dict[Terminal, "CFG"]) -> "CFG": terminals = terminals.union(cfg.terminals) for production in self._productions: body = [] - for cfgobj in production.body: - if cfgobj in new_variables_d: - body.append(new_variables_d[cfgobj]) - elif cfgobj in final_replacement: - body.append(final_replacement[cfgobj]) + for cfg_obj in production.body: + if cfg_obj in new_variables_d: + body.append(new_variables_d[cfg_obj]) + elif cfg_obj in final_replacement: + body.append(final_replacement[cfg_obj]) else: - body.append(cfgobj) + body.append(cfg_obj) productions.append(Production(new_variables_d[production.head], body)) return CFG(new_vars, None, new_variables_d[self._start_symbol], @@ -561,7 +484,7 @@ def union(self, other: "CFG") -> "CFG": return cfg_temp.substitute({temp_0: self, temp_1: other}) - def __or__(self, other): + def __or__(self, other: "CFG") -> "CFG": """ Makes the union of two CFGs Parameters @@ -603,7 +526,7 @@ def concatenate(self, other: "CFG") -> "CFG": return cfg_temp.substitute({temp_0: self, temp_1: other}) - def __add__(self, other): + def __add__(self, other: "CFG") -> "CFG": """ Makes the concatenation of two CFGs Parameters @@ -678,7 +601,7 @@ def reverse(self) -> "CFG": self.start_symbol, productions) - def __invert__(self): + def __invert__(self) -> "CFG": """ Reverse the current CFG Returns @@ -702,13 +625,10 @@ def is_empty(self) -> bool: """ return self._start_symbol not in self.get_generating_symbols() - def __bool__(self): + def __bool__(self) -> bool: return not self.is_empty() - def __contains__(self, word: Iterable[Union[Terminal, str]]) -> bool: - return self.contains(word) - - def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: + def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar Parameters @@ -723,12 +643,13 @@ def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: """ # Remove epsilons word = [to_terminal(x) for x in word if x != Epsilon()] - if not word: - return self.generate_epsilon() cyk_table = CYKTable(self, word) return cyk_table.generate_word() - def get_cnf_parse_tree(self, word): + def __contains__(self, word: Iterable[Hashable]) -> bool: + return self.contains(word) + + def get_cnf_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Get a parse tree of the CNF of this grammar @@ -744,49 +665,9 @@ def get_cnf_parse_tree(self, word): """ word = [to_terminal(x) for x in word if x != Epsilon()] - if not word and not self.generate_epsilon(): - raise DerivationDoesNotExist cyk_table = CYKTable(self, word) return cyk_table.get_parse_tree() - def to_pda(self) -> "pda.PDA": - """ Converts the CFG to a PDA that generates on empty stack an \ - equivalent language - - Returns - ---------- - new_pda : :class:`~pyformlang.pda.PDA` - The equivalent PDA when accepting on empty stack - """ - state = pda.State("q") - pda_object_creator = PDAObjectCreator(self._terminals, self._variables) - input_symbols = {pda_object_creator.get_symbol_from(x) - for x in self._terminals} - stack_alphabet = {pda_object_creator.get_stack_symbol_from(x) - for x in self._terminals.union(self._variables)} - start_stack_symbol = pda_object_creator.get_stack_symbol_from( - self._start_symbol) - new_pda = pda.PDA(states={state}, - input_symbols=input_symbols, - stack_alphabet=stack_alphabet, - start_state=state, - start_stack_symbol=start_stack_symbol) - for production in self._productions: - new_pda.add_transition(state, pda.Epsilon(), - pda_object_creator.get_stack_symbol_from( - production.head), - state, - [pda_object_creator.get_stack_symbol_from(x) - for x in production.body]) - for terminal in self._terminals: - new_pda.add_transition(state, - pda_object_creator.get_symbol_from( - terminal), - pda_object_creator.get_stack_symbol_from( - terminal), - state, []) - return new_pda - def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": """ Gives the intersection of the current CFG with an other object @@ -809,13 +690,12 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": When trying to intersect with something else than a regex or a finite automaton """ - if other.is_empty(): + if self.is_empty() or other.is_empty(): return CFG() generate_empty = self.contains([]) and other.accepts([]) cfg = self.to_normal_form() - states = list(other.states) - cv_converter = \ - cvc.CFGVariableConverter(states, cfg.variables) + states = set(other.states) + cv_converter = CFGVariableConverter(states, cfg.variables) new_productions = [] for production in cfg.productions: if len(production.body) == 2: @@ -825,46 +705,47 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "CFG": new_productions += self._intersection_when_terminal( other, production, - cv_converter, - states) + states, + cv_converter) + start = Variable("Start") new_productions += self._intersection_starting_rules(cfg, + start, other, cv_converter) - start = Variable("Start") if generate_empty: new_productions.append(Production(start, [])) res_cfg = CFG(start_symbol=start, productions=new_productions) return res_cfg @staticmethod - def _intersection_starting_rules(cfg: "CFG", - other: DeterministicFiniteAutomaton, - cv_converter): - start = Variable("Start") - productions_temp = [] - start_other = other.start_state - for final_state in other.final_states: - new_body = [ - cv_converter.to_cfg_combined_variable( - start_other, - cfg.start_symbol, - final_state)] - productions_temp.append( - Production(start, new_body, filtering=False)) - return productions_temp + def _intersection_starting_rules( + cfg: "CFG", + start: Variable, + other: DeterministicFiniteAutomaton, + cv_converter: CFGVariableConverter) \ + -> List[Production]: + if not cfg.start_symbol or not other.start_state: + return [] + return [Production(start, + [cv_converter.to_cfg_combined_variable( + other.start_state, + cfg.start_symbol, + final_state)]) + for final_state in other.final_states] @staticmethod - def _intersection_when_terminal(other: DeterministicFiniteAutomaton, - production, - cv_converter, states): + def _intersection_when_terminal( + other: DeterministicFiniteAutomaton, + production: Production, + states: Iterable[State], + cv_converter: CFGVariableConverter) \ + -> List[Production]: productions_temp = [] for state_p in states: - next_state = other.get_next_state( - state_p, production.body[0].value) + next_state = other.get_next_state(state_p, production.body[0].value) if next_state: - new_head = \ - cv_converter.to_cfg_combined_variable( - state_p, production.head, next_state) + new_head = cv_converter.to_cfg_combined_variable( + state_p, production.head, next_state) productions_temp.append( Production(new_head, [production.body[0]], @@ -872,17 +753,19 @@ def _intersection_when_terminal(other: DeterministicFiniteAutomaton, return productions_temp @staticmethod - def _intersection_when_two_non_terminals(production, states, - cv_converter): + def _intersection_when_two_non_terminals( + production: Production, + states: Iterable[State], + cv_converter: CFGVariableConverter) \ + -> List[Production]: productions_temp = [] for state_p in states: for state_r in states: bodies = CFG._get_all_bodies(production, state_p, state_r, states, cv_converter) - new_head = \ - cv_converter.to_cfg_combined_variable( - state_p, production.head, state_r) + new_head = cv_converter.to_cfg_combined_variable( + state_p, production.head, state_r) productions_temp += [Production(new_head, body, filtering=False) @@ -890,14 +773,21 @@ def _intersection_when_two_non_terminals(production, states, return productions_temp @staticmethod - def _get_all_bodies(production, state_p, state_r, states, cv_converter): + def _get_all_bodies(production: Production, + state_p: State, + state_r: State, + states: Iterable[State], + cv_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: return [ - [cv_converter.to_cfg_combined_variable(state_p, - production.body[0], - state_q), - cv_converter.to_cfg_combined_variable(state_q, - production.body[1], - state_r)] + [cv_converter.to_cfg_combined_variable( + state_p, + production.body[0], + state_q), + cv_converter.to_cfg_combined_variable( + state_q, + production.body[1], + state_r)] for state_q in states] def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": @@ -918,7 +808,7 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "CFG": """ return self.intersection(other) - def get_words(self, max_length: int = -1): + def get_words(self, max_length: int = -1) -> Iterable[List[Terminal]]: """ Get the words generated by the CFG Parameters @@ -933,7 +823,7 @@ def get_words(self, max_length: int = -1): return cfg = self.to_normal_form() productions = cfg.productions - gen_d = {} + gen_d: Dict[CFGObject, List[List[List[Terminal]]]] = {} # Look for Epsilon Transitions for production in productions: if production.head not in gen_d: @@ -945,13 +835,14 @@ def get_words(self, max_length: int = -1): # To a single terminal for production in productions: body = production.body - if len(body) == 1: + if len(body) == 1 and isinstance(body[0], Terminal): + word = [body[0]] if len(gen_d[production.head]) == 1: gen_d[production.head].append([]) - if body not in gen_d[production.head][-1]: - gen_d[production.head][-1].append(list(body)) + if word not in gen_d[production.head][-1]: + gen_d[production.head][-1].append(word) if production.head == cfg.start_symbol: - yield list(body) + yield word # Complete what is missing current_length = 2 total_no_modification = 0 @@ -993,76 +884,28 @@ def is_finite(self) -> bool: Whether the grammar is finite or not """ normal = self.to_normal_form() - di_graph = nx.DiGraph() + di_graph = DiGraph() for production in normal.productions: body = production.body if len(body) == 2: di_graph.add_edge(production.head, body[0]) di_graph.add_edge(production.head, body[1]) try: - nx.find_cycle(di_graph, orientation="original") - except nx.exception.NetworkXNoCycle: + find_cycle(di_graph, orientation="original") + except NetworkXNoCycle: return True return False - def to_text(self): - """ - Turns the grammar into its string representation. This might lose some\ - type information and the start_symbol. - Returns - ------- - text : str - The grammar as a string. - """ - res = [] - for production in self._productions: - res.append(str(production.head) + " -> " + - " ".join([x.to_text() for x in production.body])) - return "\n".join(res) + "\n" + def copy(self) -> "CFG": + """ Copies the Context Free Grammar """ + return CFG._copy_from(self) @classmethod - def from_text(cls, text, start_symbol=Variable("S")): - """ - Read a context free grammar from a text. - The text contains one rule per line. - The structure of a production is: - head -> body1 | body2 | ... | bodyn - where | separates the bodies. - A variable (or non terminal) begins by a capital letter. - A terminal begins by a non-capital character - Terminals and Variables are separated by spaces. - An epsilon symbol can be represented by epsilon, $, ε, ϵ or Є. - If you want to have a variable name starting with a non-capital \ - letter or a terminal starting with a capital letter, you can \ - explicitly give the type of your symbol with "VAR:yourVariableName" \ - or "TER:yourTerminalName" (with the quotation marks). For example: - S -> "TER:John" "VAR:d" a b - - Parameters - ---------- - text : str - The text of transform - start_symbol : str, optional - The start symbol, S by default - - Returns - ------- - cfg : :class:`~pyformlang.cfg.CFG` - A context free grammar. - """ - variables = set() - productions = set() - terminals = set() - for line in text.splitlines(): - line = line.strip() - if not line: - continue - cls._read_line(line, productions, terminals, variables) - return cls(variables=variables, terminals=terminals, - productions=productions, start_symbol=start_symbol) - - @classmethod - def _read_line(cls, line, productions, terminals, variables): + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: head_s, body_s = line.split("->") head_text = head_s.strip() if is_special_text(head_text): @@ -1077,7 +920,7 @@ def _read_line(cls, line, productions, terminals, variables): body_component = body_component[5:-1] else: type_component = "" - if body_component[0] in string.ascii_uppercase or \ + if body_component[0] in ascii_uppercase or \ type_component == "VAR": body_var = Variable(body_component) variables.add(body_var) @@ -1088,15 +931,3 @@ def _read_line(cls, line, productions, terminals, variables): terminals.add(body_ter) body.append(body_ter) productions.add(Production(head, body)) - - def is_normal_form(self): - """ - Tells is the current grammar is in Chomsky Normal Form or not - - Returns - ------- - is_normal_form : bool - If the current grammar is in CNF - """ - return all( - production.is_normal_form() for production in self._productions) diff --git a/pyformlang/cfg/cfg_object.py b/pyformlang/cfg/cfg_object.py deleted file mode 100644 index ed454b8..0000000 --- a/pyformlang/cfg/cfg_object.py +++ /dev/null @@ -1,28 +0,0 @@ -""" An object in a CFG (Variable and Terminal)""" - -from typing import Any - - -class CFGObject: # pylint: disable=too-few-public-methods - """ An object in a CFG - - Parameters - ----------- - value : any - The value of the object - """ - - __slots__ = ["_value", "_hash"] - - def __init__(self, value: Any): - self._value = value - self._hash = None - - @property - def value(self) -> Any: - """Gets the value of the object""" - return self._value - - def to_text(self) -> str: - """ Turns the object into a text format """ - raise NotImplementedError diff --git a/pyformlang/cfg/cfg_variable_converter.py b/pyformlang/cfg/cfg_variable_converter.py new file mode 100644 index 0000000..bd8dd10 --- /dev/null +++ b/pyformlang/cfg/cfg_variable_converter.py @@ -0,0 +1,113 @@ +"""A CFG Variable Converter""" + +from typing import Dict, List, AbstractSet, Tuple, Optional, Hashable + +from ..objects.formal_object import FormalObject +from ..objects.cfg_objects import Variable + + +class CFGVariableConverter: + """A CFG Variable Converter""" + + def __init__(self, + states: AbstractSet[FormalObject], + stack_symbols: AbstractSet[FormalObject]) -> None: + self._counter = 0 + self._inverse_states_d: Dict[FormalObject, int] = {} + self._counter_state = 0 + for self._counter_state, state in enumerate(states): + self._inverse_states_d[state] = self._counter_state + state.index = self._counter_state + self._counter_state += 1 + self._inverse_stack_symbol_d: Dict[FormalObject, int] = {} + self._counter_symbol = 0 + for self._counter_symbol, symbol in enumerate(stack_symbols): + self._inverse_stack_symbol_d[symbol] = self._counter_symbol + symbol.index = self._counter_symbol + self._counter_symbol += 1 + self._conversions: List[List[List[Tuple[bool, Optional[Variable]]]]] \ + = [[[(False, None) for _ in range(len(states))] + for _ in range(len(stack_symbols))] for _ in + range(len(states))] + + def _get_state_index(self, state: FormalObject) -> int: + """Get the state index""" + if state.index is None: + if state not in self._inverse_states_d: + self._inverse_states_d[state] = self._counter_state + self._counter_state += 1 + state.index = self._inverse_states_d[state] + return state.index + + def _get_symbol_index(self, symbol: FormalObject) -> int: + """Get the symbol index""" + if symbol.index is None: + if symbol not in self._inverse_stack_symbol_d: + self._inverse_stack_symbol_d[symbol] = self._counter_symbol + self._counter_symbol += 1 + symbol.index = self._inverse_stack_symbol_d[symbol] + return symbol.index + + def to_cfg_combined_variable(self, + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> Variable: + """ Conversion used in the to_pda method """ + i_stack_symbol, i_state0, i_state1 = self._get_indexes( + stack_symbol, state0, state1) + prev = self._conversions[i_state0][i_stack_symbol][i_state1] + if prev[1] is None: + return self._create_new_variable( + i_stack_symbol, i_state0, i_state1, prev)[1] + return prev[1] + + def _create_new_variable(self, + i_stack_symbol: int, + i_state0: int, + i_state1: int, + prev: Tuple, + value: Hashable = None) -> Tuple[bool, Variable]: + if value is None: + value = self._counter + temp = (prev[0], Variable(value)) + self._counter += 1 + self._conversions[i_state0][i_stack_symbol][i_state1] = temp + return temp + + def set_valid(self, + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> None: + """Set valid""" + i_stack_symbol, i_state0, i_state1 = self._get_indexes( + stack_symbol, state0, state1) + prev = self._conversions[i_state0][i_stack_symbol][i_state1] + self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) + + def is_valid_and_get(self, + state0: FormalObject, + stack_symbol: FormalObject, + state1: FormalObject) -> Optional[Variable]: + """Check if valid and get""" + i_state0 = self._get_state_index(state0) + i_stack_symbol = self._get_symbol_index(stack_symbol) + i_state1 = self._get_state_index(state1) + current = self._conversions[i_state0][i_stack_symbol][i_state1] + if not current[0]: + return None + if current[1] is None: + return self._create_new_variable(i_stack_symbol, + i_state0, + i_state1, + current)[1] + return current[1] + + def _get_indexes(self, + stack_symbol: FormalObject, + state0: FormalObject, + state1: FormalObject) \ + -> Tuple[int, int, int]: + i_state0 = self._get_state_index(state0) + i_stack_symbol = self._get_symbol_index(stack_symbol) + i_state1 = self._get_state_index(state1) + return i_stack_symbol, i_state0, i_state1 diff --git a/pyformlang/cfg/cyk_table.py b/pyformlang/cfg/cyk_table.py index 1fe68ad..eb2b692 100644 --- a/pyformlang/cfg/cyk_table.py +++ b/pyformlang/cfg/cyk_table.py @@ -2,7 +2,14 @@ Representation of a CYK table """ -from pyformlang.cfg.parse_tree import ParseTree +from typing import Dict, List, Set, Iterable, Tuple, Any + +from .formal_grammar import FormalGrammar +from .parse_tree import ParseTree, DerivationDoesNotExist +from ..objects.cfg_objects import CFGObject, Terminal + +ProductionsDict = Dict[Tuple[CFGObject, ...], List[CFGObject]] +ParsingTable = Dict[Tuple[int, int], Set["CYKNode"]] class CYKTable: @@ -16,43 +23,45 @@ class CYKTable: The word from which we construct the CYK table """ - def __init__(self, cfg, word): - self._cnf = cfg.to_normal_form() - self._word = word - self._productions_d = {} + def __init__(self, grammar: FormalGrammar, word: List[Terminal]) -> None: + self._normal_form: FormalGrammar = grammar.to_normal_form() + self._generate_epsilon: bool = grammar.generate_epsilon() + self._word: List[Terminal] = word + self._productions_d: ProductionsDict = {} + self._cyk_table: ParsingTable = {} self._set_productions_by_body() - self._cyk_table = {} if not self._generates_all_terminals(): self._cyk_table[(0, len(self._word))] = set() else: self._set_cyk_table() - def _set_productions_by_body(self): + def _set_productions_by_body(self) -> None: # Organize productions - for production in self._cnf.productions: + for production in self._normal_form.productions: temp = tuple(production.body) if temp in self._productions_d: self._productions_d[temp].append(production.head) else: self._productions_d[temp] = [production.head] - def _set_cyk_table(self): + def _set_cyk_table(self) -> None: self._initialize_cyk_table() self._propagate_in_cyk_table() - def _get_windows(self): + def _get_windows(self) -> Iterable[Tuple[int, int]]: # The windows must in order by length for window_size in range(2, len(self._word) + 1): for start_window in range(len(self._word) - window_size + 1): yield start_window, start_window + window_size - def _get_all_window_pairs(self, start_window, end_window): + def _get_all_window_pairs(self, start_window: int, end_window: int) \ + -> Iterable[Tuple["CYKNode", "CYKNode"]]: for mid_window in range(start_window + 1, end_window): for var_b in self._cyk_table[(start_window, mid_window)]: for var_c in self._cyk_table[(mid_window, end_window)]: yield var_b, var_c - def _propagate_in_cyk_table(self): + def _propagate_in_cyk_table(self) -> None: for start_window, end_window in self._get_windows(): for var_b, var_c in self._get_all_window_pairs(start_window, end_window): @@ -61,7 +70,7 @@ def _propagate_in_cyk_table(self): self._cyk_table[(start_window, end_window)].add( CYKNode(var_a, var_b, var_c)) - def _initialize_cyk_table(self): + def _initialize_cyk_table(self) -> None: for i, terminal in enumerate(self._word): self._cyk_table[(i, i + 1)] = \ {CYKNode(x, CYKNode(terminal)) @@ -73,7 +82,7 @@ def _initialize_cyk_table(self): self._cyk_table[ (start_window, start_window + window_size)] = set() - def generate_word(self): + def generate_word(self) -> bool: """ Checks is the word is generated Returns @@ -81,16 +90,19 @@ def generate_word(self): is_generated : bool """ - return self._cnf.start_symbol in self._cyk_table[(0, len(self._word))] + if not self._word: + return self._generate_epsilon + return self._normal_form.start_symbol \ + in self._cyk_table[(0, len(self._word))] - def _generates_all_terminals(self): + def _generates_all_terminals(self) -> bool: generate_all_terminals = True for terminal in self._word: if (terminal,) not in self._productions_d: generate_all_terminals = False return generate_all_terminals - def get_parse_tree(self): + def get_parse_tree(self) -> ParseTree: """ Give the parse tree associated with this CYK Table @@ -98,21 +110,24 @@ def get_parse_tree(self): ------- parse_tree : :class:`~pyformlang.cfg.ParseTree` """ - if self._word and not self.generate_word(): + if not self._normal_form.start_symbol or not self.generate_word(): raise DerivationDoesNotExist if not self._word: - return CYKNode(self._cnf.start_symbol) + return ParseTree(self._normal_form.start_symbol) root = [ x for x in self._cyk_table[(0, len(self._word))] - if x == self._cnf.start_symbol][0] + if x == self._normal_form.start_symbol][0] return root class CYKNode(ParseTree): """A node in the CYK table""" - def __init__(self, value, left_son=None, right_son=None): + def __init__(self, + value: CFGObject, + left_son: "CYKNode" = None, + right_son: "CYKNode" = None) -> None: super().__init__(value) self.value = value self.left_son = left_son @@ -122,14 +137,10 @@ def __init__(self, value, left_son=None, right_son=None): if right_son is not None: self.sons.append(right_son) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: if isinstance(other, CYKNode): return self.value == other.value return self.value == other - def __hash__(self): + def __hash__(self) -> int: return hash(self.value) - - -class DerivationDoesNotExist(Exception): - """Exception raised when the word cannot be derived""" diff --git a/pyformlang/cfg/epsilon.py b/pyformlang/cfg/epsilon.py deleted file mode 100644 index cfab093..0000000 --- a/pyformlang/cfg/epsilon.py +++ /dev/null @@ -1,14 +0,0 @@ -""" An epsilon terminal """ - -from .terminal import Terminal - - -class Epsilon(Terminal): - """ An epsilon terminal """ - # pylint: disable=too-few-public-methods - - def __init__(self): - super().__init__("epsilon") - - def to_text(self) -> str: - return "epsilon" diff --git a/pyformlang/cfg/formal_grammar.py b/pyformlang/cfg/formal_grammar.py new file mode 100644 index 0000000..75300d8 --- /dev/null +++ b/pyformlang/cfg/formal_grammar.py @@ -0,0 +1,200 @@ +""" Basic grammar representation """ + +from typing import Set, AbstractSet, Iterable, Optional, Hashable, TypeVar, Type +from abc import abstractmethod + +from ..objects.cfg_objects import Variable, Terminal, Production +from ..objects.cfg_objects.utils import to_variable + +GrammarT = TypeVar("GrammarT", bound="FormalGrammar") + + +class FormalGrammar: + """ Basic grammar representation """ + + @abstractmethod + def __init__(self, + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, + productions: Iterable[Production] = None) -> None: + self._variables: Set[Variable] + self._terminals: Set[Terminal] + self._start_symbol: Optional[Variable] + self._productions: Set[Production] + + @property + def variables(self) -> Set[Variable]: + """ Gives the variables + + Returns + ---------- + variables : set of :class:`~pyformlang.cfg.Variable` + The variables of the CFG + """ + return self._variables + + @property + def terminals(self) -> Set[Terminal]: + """ Gives the terminals + + Returns + ---------- + terminals : set of :class:`~pyformlang.cfg.Terminal` + The terminals of the CFG + """ + return self._terminals + + @property + def productions(self) -> Set[Production]: + """ Gives the productions + + Returns + ---------- + productions : set of :class:`~pyformlang.cfg.Production` + The productions of the CFG + """ + return self._productions + + @property + def start_symbol(self) -> Optional[Variable]: + """ Gives the start symbol + + Returns + ---------- + start_variable : :class:`~pyformlang.cfg.Variable` + The start symbol of the CFG + """ + return self._start_symbol + + def add_production(self, production: Production) -> None: + """ Adds the given production to the grammar """ + self.variables.update(production.variables) + self.terminals.update(production.terminals) + self.productions.add(production) + + def add_start_symbol(self, symbol: Hashable) -> None: + """ Adds the start symbol to the grammar """ + symbol = to_variable(symbol) + self.variables.add(symbol) + self._start_symbol = symbol + + def remove_start_symbol(self) -> None: + """ Removes the start symbol from the grammar """ + self._start_symbol = None + + @abstractmethod + def copy(self: GrammarT) -> GrammarT: + """ Copies the grammar """ + raise NotImplementedError + + def __copy__(self: GrammarT) -> GrammarT: + return self.copy() + + @classmethod + def _copy_from(cls: Type[GrammarT], other: GrammarT) -> GrammarT: + return cls(variables=other.variables, + terminals=other.terminals, + productions=other.productions, + start_symbol=other.start_symbol) + + @abstractmethod + def generate_epsilon(self) -> bool: + """ Whether the grammar generates epsilon or not """ + raise NotImplementedError + + @abstractmethod + def to_normal_form(self) -> "FormalGrammar": + """ Gets Chomsky normal form of the grammar """ + raise NotImplementedError + + def is_normal_form(self) -> bool: + """ + Whether the current grammar is in Chomsky Normal Form + + Returns + ------- + is_normal_form : bool + If the current grammar is in CNF + """ + return all( + production.is_normal_form() for production in self._productions) + + def to_text(self) -> str: + """ + Turns the grammar into its string representation. This might lose some\ + type information and the start_symbol. + Returns + ------- + text : str + The grammar as a string. + """ + res = [] + for production in self._productions: + res.append(str(production.head) + " -> " + + " ".join([x.to_text() for x in production.body])) + return "\n".join(res) + "\n" + + @classmethod + def from_text( + cls: Type[GrammarT], + text: str, + start_symbol: Optional[Hashable] = Variable("S")) \ + -> GrammarT: + """ + Read a context free grammar from a text. + The text contains one rule per line. + The structure of a production is: + head -> body1 | body2 | ... | bodyn + where | separates the bodies. + A variable (or non terminal) begins by a capital letter. + A terminal begins by a non-capital character + Terminals and Variables are separated by spaces. + An epsilon symbol can be represented by epsilon, $, ε, ϵ or Є. + If you want to have a variable name starting with a non-capital \ + letter or a terminal starting with a capital letter, you can \ + explicitly give the type of your symbol with "VAR:yourVariableName" \ + or "TER:yourTerminalName" (with the quotation marks). For example: + S -> "TER:John" "VAR:d" a b + + Parameters + ---------- + text : str + The text of transform + start_symbol : str, optional + The start symbol, S by default + + Returns + ------- + cfg : :class:`~pyformlang.cfg.CFG` + A context free grammar. + """ + variables = set() + productions = set() + terminals = set() + cls._read_text(text, productions, terminals, variables) + return cls(variables=variables, + terminals=terminals, + productions=productions, + start_symbol=start_symbol) + + @classmethod + def _read_text(cls, + text: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: + for line in text.splitlines(): + line = line.strip() + if not line: + continue + cls._read_line(line, productions, terminals, variables) + + @classmethod + @abstractmethod + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: + raise NotImplementedError diff --git a/pyformlang/cfg/llone_parser.py b/pyformlang/cfg/llone_parser.py index 1a29f63..661cf68 100644 --- a/pyformlang/cfg/llone_parser.py +++ b/pyformlang/cfg/llone_parser.py @@ -1,12 +1,17 @@ """ LL(1) Parser """ +from typing import Dict, List, Set, Iterable, Tuple, Hashable -from pyformlang.cfg.epsilon import Epsilon -from pyformlang.cfg.cfg import NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.set_queue import SetQueue -from pyformlang.cfg.utils import to_terminal -from pyformlang.cfg.utils_cfg import get_productions_d +from .cfg import CFG, Production +from .parse_tree import ParseTree, NotParsableException +from .set_queue import SetQueue +from .utils import get_productions_d +from ..objects.cfg_objects import CFGObject, Epsilon +from ..objects.cfg_objects.utils import to_terminal + +ParserSet = Dict[CFGObject, Set[CFGObject]] +Triggers = Dict[CFGObject, List[CFGObject]] +ParsingTable = Dict[CFGObject, Dict[CFGObject, List[Production]]] class LLOneParser: @@ -19,10 +24,10 @@ class LLOneParser: A context-free Grammar """ - def __init__(self, cfg): + def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_first_set(self): + def get_first_set(self) -> ParserSet: """ Used in LL(1) """ # Algorithm from: # https://www.geeksforgeeks.org/first-set-in-syntax-analysis/ @@ -46,7 +51,8 @@ def get_first_set(self): return first_set @staticmethod - def _get_first_set_production(production, first_set): + def _get_first_set_production(production: Production, + first_set: ParserSet) -> Set[CFGObject]: first_not_containing_epsilon = 0 first_set_temp = set() for body_component in production.body: @@ -62,10 +68,12 @@ def _get_first_set_production(production, first_set): first_set_temp.remove(Epsilon()) return first_set_temp - def _initialize_first_set(self, triggers): + def _initialize_first_set(self, + triggers: Triggers) \ + -> Tuple[ParserSet, SetQueue]: + first_set: ParserSet = {} to_process = SetQueue() - first_set = {} - # Initialisation + # Initialization for terminal in self._cfg.terminals: first_set[terminal] = {terminal} for triggered in triggers.get(terminal, []): @@ -78,8 +86,8 @@ def _initialize_first_set(self, triggers): to_process.append(triggered) return first_set, to_process - def _get_triggers(self): - triggers = {} + def _get_triggers(self) -> Triggers: + triggers: Triggers = {} for production in self._cfg.productions: for body_component in production.body: if body_component not in triggers: @@ -87,7 +95,7 @@ def _get_triggers(self): triggers[body_component].append(production.head) return triggers - def get_follow_set(self): + def get_follow_set(self) -> ParserSet: """ Get follow set """ first_set = self.get_first_set() triggers = self._get_triggers_follow_set(first_set) @@ -103,7 +111,9 @@ def get_follow_set(self): to_process.append(triggered) return follow_set - def _initialize_follow_set(self, first_set): + def _initialize_follow_set(self, + first_set: ParserSet) \ + -> Tuple[ParserSet, SetQueue]: to_process = SetQueue() follow_set = {} follow_set[self._cfg.start_symbol] = {"$"} @@ -123,11 +133,13 @@ def _initialize_follow_set(self, first_set): to_process.append(component) return follow_set, to_process - def _get_triggers_follow_set(self, first_set): - triggers = {} + def _get_triggers_follow_set(self, + first_set: ParserSet) \ + -> ParserSet: + follow_set: ParserSet = {} for production in self._cfg.productions: - if production.head not in triggers: - triggers[production.head] = set() + if production.head not in follow_set: + follow_set[production.head] = set() for i, component in enumerate(production.body): all_epsilon = True for component_next in production.body[i + 1:]: @@ -135,10 +147,10 @@ def _get_triggers_follow_set(self, first_set): all_epsilon = False break if all_epsilon: - triggers[production.head].add(component) - return triggers + follow_set[production.head].add(component) + return follow_set - def get_llone_parsing_table(self): + def get_llone_parsing_table(self) -> ParsingTable: """ Get the LL(1) parsing table From: https://www.slideshare.net/MahbuburRahman273/ll1-parser-in-compilers @@ -153,7 +165,7 @@ def get_llone_parsing_table(self): nullable_productions.append(production) else: non_nullable_productions.append(production) - llone_parsing_table = {} + llone_parsing_table: ParsingTable = {} for production in nullable_productions: if production.head not in llone_parsing_table: llone_parsing_table[production.head] = {} @@ -175,7 +187,7 @@ def get_llone_parsing_table(self): ) return llone_parsing_table - def is_llone_parsable(self): + def is_llone_parsable(self) -> bool: """ Checks whether the grammar can be parse with the LL(1) parser. @@ -190,7 +202,7 @@ def is_llone_parsable(self): return False return True - def get_llone_parse_tree(self, word): + def get_llone_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Get LL(1) parse Tree @@ -210,21 +222,24 @@ def get_llone_parse_tree(self, word): When the word cannot be parsed """ + if not self._cfg.start_symbol: + raise NotParsableException word = [to_terminal(x) for x in word if x != Epsilon()] - word.append("$") + word.append("$") # type: ignore word = word[::-1] parsing_table = self.get_llone_parsing_table() parse_tree = ParseTree(self._cfg.start_symbol) stack = ["$", parse_tree] while stack: current = stack.pop() - if current == "$" and word[-1] == "$": - return parse_tree - if current.value == word[-1]: + if isinstance(current, str): + if current == "$" and word[-1] == "$": + return parse_tree + elif current.value == word[-1]: word.pop() else: - rule_applied = list(parsing_table.get(current.value, {}) - .get(word[-1], [])) + rule_applied = parsing_table.get(current.value, {}) \ + .get(word[-1], []) if len(rule_applied) == 1: for component in rule_applied[0].body[::-1]: new_node = ParseTree(component) diff --git a/pyformlang/cfg/parse_tree.py b/pyformlang/cfg/parse_tree.py index e343302..74c6b29 100644 --- a/pyformlang/cfg/parse_tree.py +++ b/pyformlang/cfg/parse_tree.py @@ -1,22 +1,24 @@ """ A parse Tree """ -import networkx as nx +from typing import List + +from networkx import DiGraph from networkx.drawing.nx_pydot import write_dot -from pyformlang.cfg.variable import Variable +from ..objects.cfg_objects import CFGObject, Variable class ParseTree: """ A parse tree """ - def __init__(self, value): + def __init__(self, value: CFGObject) -> None: self.value = value - self.sons = [] + self.sons: List[ParseTree] = [] - def __repr__(self): + def __repr__(self) -> str: return "ParseTree(" + str(self.value) + ", " + str(self.sons) + ")" - def get_leftmost_derivation(self): + def get_leftmost_derivation(self) -> List[List[CFGObject]]: """ Get the leftmost derivation @@ -46,7 +48,7 @@ def get_leftmost_derivation(self): start.append(son.value) return res - def get_rightmost_derivation(self): + def get_rightmost_derivation(self) -> List[List[CFGObject]]: """ Get the leftmost derivation @@ -73,7 +75,7 @@ def get_rightmost_derivation(self): end = derivation + end return res - def to_networkx(self): + def to_networkx(self) -> DiGraph: """ Transforms the tree into a Networkx Directed Graph @@ -83,7 +85,7 @@ def to_networkx(self): The tree in Networkx format. """ - tree = nx.DiGraph() + tree = DiGraph() tree.add_node("ROOT", label=self.value.value) to_process = [("ROOT", son) for son in self.sons[::-1]] counter = 0 @@ -99,7 +101,7 @@ def to_networkx(self): to_process += [(new_node, son) for son in current_node.sons[::-1]] return tree - def write_as_dot(self, filename): + def write_as_dot(self, filename: str) -> None: """ Write the parse tree in dot format into a file @@ -110,3 +112,11 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) + + +class DerivationDoesNotExist(Exception): + """Exception raised when the word cannot be derived""" + + +class NotParsableException(Exception): + """When the grammar cannot be parsed (parser not powerful enough)""" diff --git a/pyformlang/cfg/pda_object_creator.py b/pyformlang/cfg/pda_object_creator.py deleted file mode 100644 index 5553290..0000000 --- a/pyformlang/cfg/pda_object_creator.py +++ /dev/null @@ -1,41 +0,0 @@ -"""Creation of objects for PDA""" - -from pyformlang import cfg -from pyformlang import pda - - -class PDAObjectCreator: - """Creates Objects for a PDA""" - - def __init__(self, terminals, variables): - self._inverse_symbol = {} - self._inverse_stack_symbol = {} - for terminal in terminals: - self._inverse_symbol[terminal] = None - self._inverse_stack_symbol[terminal] = None - for variable in variables: - self._inverse_stack_symbol[variable] = None - - def get_symbol_from(self, symbol): - """Get a symbol""" - if isinstance(symbol, cfg.Epsilon): - return pda.Epsilon() - if self._inverse_symbol[symbol] is None: - value = str(symbol.value) - temp = pda.Symbol(value) - self._inverse_symbol[symbol] = temp - return temp - return self._inverse_symbol[symbol] - - def get_stack_symbol_from(self, stack_symbol): - """Get a stack symbol""" - if isinstance(stack_symbol, cfg.Epsilon): - return pda.Epsilon() - if self._inverse_stack_symbol[stack_symbol] is None: - value = str(stack_symbol.value) - if isinstance(stack_symbol, cfg.Terminal): - value = "#TERM#" + value - temp = pda.StackSymbol(value) - self._inverse_stack_symbol[stack_symbol] = temp - return temp - return self._inverse_stack_symbol[stack_symbol] diff --git a/pyformlang/cfg/recursive_decent_parser.py b/pyformlang/cfg/recursive_decent_parser.py index 8f10b2f..ad6e43c 100644 --- a/pyformlang/cfg/recursive_decent_parser.py +++ b/pyformlang/cfg/recursive_decent_parser.py @@ -2,13 +2,19 @@ A recursive decent parser. """ -from pyformlang.cfg import Variable, Epsilon -from pyformlang.cfg.cfg import NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.utils import to_terminal +from typing import List, Iterable, Tuple, Optional, Hashable +from .cfg import CFG +from .parse_tree import ParseTree, NotParsableException +from ..objects.cfg_objects import CFGObject, Variable, Terminal, Epsilon +from ..objects.cfg_objects.utils import to_terminal -def _get_index_to_extend(current_expansion, left): +ExpansionSymbol = Tuple[CFGObject, ParseTree] +Expansion = List[ExpansionSymbol] + + +def _get_index_to_extend(current_expansion: Expansion, left: bool) \ + -> Tuple[int, Optional[ExpansionSymbol]]: order = enumerate(current_expansion) if not left: order = reversed(list(order)) @@ -29,10 +35,11 @@ class RecursiveDecentParser: """ - def __init__(self, cfg): + def __init__(self, cfg: CFG) -> None: self._cfg = cfg - def get_parse_tree(self, word, left=True): + def get_parse_tree(self, word: Iterable[Hashable], left: bool = True) \ + -> ParseTree: """ Get a parse tree for a given word @@ -55,15 +62,20 @@ def get_parse_tree(self, word, left=True): When the word cannot be parsed """ + if not self._cfg.start_symbol: + raise NotParsableException word = [to_terminal(x) for x in word if x != Epsilon()] parse_tree = ParseTree(self._cfg.start_symbol) - starting_expansion = [(self._cfg.start_symbol, parse_tree)] + starting_expansion: Expansion = [(self._cfg.start_symbol, parse_tree)] if self._get_parse_tree_sub(word, starting_expansion, left): return parse_tree raise NotParsableException - def _match(self, word, current_expansion, idx_word=0, - idx_current_expansion=0): + def _match(self, + word: List[Terminal], + current_expansion: Expansion, + idx_word: int = 0, + idx_current_expansion: int = 0) -> bool: if idx_word == len(word) and \ idx_current_expansion == len(current_expansion): return True @@ -82,7 +94,10 @@ def _match(self, word, current_expansion, idx_word=0, idx_current_expansion + 1) return False - def _get_parse_tree_sub(self, word, current_expansion, left=True): + def _get_parse_tree_sub(self, + word: List[Terminal], + current_expansion: Expansion, + left: bool = True) -> bool: if not self._match(word, current_expansion): return False extend_idx, to_expand = _get_index_to_extend(current_expansion, left) @@ -100,7 +115,7 @@ def _get_parse_tree_sub(self, word, current_expansion, left=True): return True return False - def is_parsable(self, word, left=True): + def is_parsable(self, word: Iterable[Hashable], left: bool = True) -> bool: """ Whether a word is parsable or not diff --git a/pyformlang/cfg/set_queue.py b/pyformlang/cfg/set_queue.py index 8f01de2..b46f37b 100644 --- a/pyformlang/cfg/set_queue.py +++ b/pyformlang/cfg/set_queue.py @@ -1,24 +1,26 @@ """ A queue with non duplicate elements""" +from typing import Any + class SetQueue: """ A queue with non duplicate elements""" - def __init__(self): + def __init__(self) -> None: self._to_process = [] self._processing = set() - def append(self, value): + def append(self, value: Any) -> None: """ Append an element """ if value not in self._processing: self._to_process.append(value) self._processing.add(value) - def pop(self): + def pop(self) -> Any: """ Pop an element """ popped = self._to_process.pop() self._processing.remove(popped) return popped - def __bool__(self): + def __bool__(self) -> bool: return bool(self._to_process) diff --git a/pyformlang/cfg/terminal.py b/pyformlang/cfg/terminal.py deleted file mode 100644 index 230f646..0000000 --- a/pyformlang/cfg/terminal.py +++ /dev/null @@ -1,24 +0,0 @@ -""" A terminal in a CFG """ - -from .cfg_object import CFGObject - - -class Terminal(CFGObject): # pylint: disable=too-few-public-methods - """ A terminal in a CFG """ - - def __eq__(self, other): - return isinstance(other, Terminal) and self.value == other.value - - def __repr__(self): - return "Terminal(" + str(self.value) + ")" - - def __hash__(self): - if self._hash is None: - self._hash = hash(self.value) - return self._hash - - def to_text(self) -> str: - text = str(self._value) - if text and text[0].isupper(): - return '"TER:' + text + '"' - return text diff --git a/pyformlang/cfg/tests/test_cfg.py b/pyformlang/cfg/tests/test_cfg.py index 75c480a..dfb11e6 100644 --- a/pyformlang/cfg/tests/test_cfg.py +++ b/pyformlang/cfg/tests/test_cfg.py @@ -1,14 +1,14 @@ """ Tests the CFG """ -from pyformlang import pda +import pytest + +from pyformlang.pda import PDA from pyformlang.cfg import Production, Variable, Terminal, CFG, Epsilon from pyformlang.cfg.cyk_table import DerivationDoesNotExist -from pyformlang.cfg.pda_object_creator import PDAObjectCreator from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State from pyformlang.finite_automaton import Symbol from pyformlang.regular_expression import Regex -import pytest class TestCFG: @@ -225,7 +225,7 @@ def test_cnf(self): assert len(new_cfg.productions) == 41 assert not cfg.is_empty() new_cfg2 = cfg.to_normal_form() - assert new_cfg == new_cfg2 + assert new_cfg.productions == new_cfg2.productions cfg2 = CFG(start_symbol=var_e, productions={Production(var_e, [var_t])}) @@ -415,7 +415,7 @@ def test_to_pda(self): ter_par_close, ter_mult, ter_plus}, var_e, productions) - pda_equivalent = cfg.to_pda() + pda_equivalent = PDA.from_cfg(cfg) assert len(pda_equivalent.states) == 1 assert len(pda_equivalent.final_states) == 0 assert len(pda_equivalent.input_symbols) == 8 @@ -431,7 +431,7 @@ def test_conversions(self): productions = {Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() assert cfg.contains([ter_c]) assert cfg.contains([ter_a, ter_c, ter_b]) assert cfg.contains([ter_a, ter_a, ter_c, ter_b, ter_b]) @@ -448,9 +448,9 @@ def test_profiling_conversions(): productions = {Production(var_s, [ter_a, var_s, ter_b]), Production(var_s, [ter_c])} cfg = CFG(productions=productions, start_symbol=var_s) - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() - cfg = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() - cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() + cfg = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() + PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() def test_generation_words(self): """ Tests the generation of word """ @@ -604,11 +604,11 @@ def test_intersection_with_epsilon(self): assert not cfg.is_empty() assert cfg.contains([ter_a]) - cfg_temp = cfg.to_pda().to_cfg() + cfg_temp = PDA.from_cfg(cfg).to_cfg() assert not cfg_temp.is_empty() assert cfg_temp.contains([ter_a]) - cfg_temp = cfg.to_pda().to_final_state().to_empty_stack().to_cfg() + cfg_temp = PDA.from_cfg(cfg).to_final_state().to_empty_stack().to_cfg() assert not cfg_temp.is_empty() assert cfg_temp.contains([ter_a]) @@ -675,12 +675,6 @@ def test_profiling_intersection(self): assert cfg_i.contains([ter_a] * size + [ter_b] * size) assert not cfg_i.contains([]) - def test_pda_object_creator(self): - pda_oc = PDAObjectCreator([], []) - assert pda_oc.get_symbol_from(Epsilon()) == pda.Epsilon() - assert pda_oc.get_stack_symbol_from(Epsilon()) == \ - pda.Epsilon() - def test_string_variable(self): var = Variable("A") assert repr(var) == "Variable(A)" @@ -843,16 +837,51 @@ def test_to_text_epsilon(self): cfg = CFG.from_text("S -> a S b | a b epsilon") assert cfg.contains(["a", "b"]) + def test_copy(self): + text_example = get_example_text_duplicate() + cfg = CFG.from_text(text_example) + cfg_copy = cfg.copy() + assert cfg.variables == cfg_copy.variables + assert cfg.terminals == cfg_copy.terminals + assert cfg.productions == cfg_copy.productions + assert cfg.start_symbol == cfg_copy.start_symbol + assert cfg is not cfg_copy + + def test_add_production(self): + text_example = get_example_text_duplicate() + cfg = CFG.from_text(text_example) + assert Epsilon() not in cfg.terminals + production = Production(Variable("K"), + [Epsilon(), Terminal("a"), Variable("B")]) + cfg.add_production(production) + assert production in cfg.productions + assert "K" in cfg.variables + assert "a" in cfg.terminals + assert "B" in cfg.variables + assert Epsilon() not in cfg.terminals + + def test_start_symbol(self): + cfg = CFG() + assert not cfg.variables + assert not cfg.start_symbol + cfg.add_start_symbol("S") + assert cfg.start_symbol == "S" + assert "S" in cfg.variables + cfg.remove_start_symbol() + assert not cfg.start_symbol + cfg.remove_start_symbol() + assert not cfg.start_symbol + def get_example_text_duplicate(): """ Duplicate text """ text = """ - E -> T E’ - E’ -> + T E’ | Є - T -> F T’ - T’ -> * F T’ | Є - F -> ( E ) | id - """ + E -> T E’ + E’ -> + T E’ | Є + T -> F T’ + T’ -> * F T’ | Є + F -> ( E ) | id + """ return text diff --git a/pyformlang/cfg/tests/test_llone_parser.py b/pyformlang/cfg/tests/test_llone_parser.py index a4a843a..e7bb2fa 100644 --- a/pyformlang/cfg/tests/test_llone_parser.py +++ b/pyformlang/cfg/tests/test_llone_parser.py @@ -88,13 +88,13 @@ def test_get_follow_set2(self): cfg = CFG.from_text(text) llone_parser = LLOneParser(cfg) follow_set = llone_parser.get_follow_set() - assert follow_set["S"] == \ + assert follow_set[Variable("S")] == \ {"$"} - assert follow_set["A"] == \ + assert follow_set[Variable("A")] == \ {"$", Terminal("h"), Terminal("g")} - assert follow_set["B"] == \ + assert follow_set[Variable("B")] == \ {"$", Terminal("h"), Terminal("g"), Terminal("a")} - assert follow_set["C"] == \ + assert follow_set[Variable("C")] == \ {"$", Terminal("h"), Terminal("g"), Terminal("b")} def test_get_llone_table(self): diff --git a/pyformlang/cfg/tests/test_recursive_decent_parser.py b/pyformlang/cfg/tests/test_recursive_decent_parser.py index b727a8e..18ce9e2 100644 --- a/pyformlang/cfg/tests/test_recursive_decent_parser.py +++ b/pyformlang/cfg/tests/test_recursive_decent_parser.py @@ -2,8 +2,8 @@ # pylint: disable=missing-class-docstring # pylint: disable=missing-function-docstring from pyformlang.cfg import CFG, Variable, Terminal -from pyformlang.cfg.cfg import NotParsableException -from pyformlang.cfg.recursive_decent_parser import RecursiveDecentParser +from pyformlang.cfg.recursive_decent_parser import \ + RecursiveDecentParser, NotParsableException import pytest diff --git a/pyformlang/cfg/tests/test_terminal.py b/pyformlang/cfg/tests/test_terminal.py index 358e38f..7cd9a0e 100644 --- a/pyformlang/cfg/tests/test_terminal.py +++ b/pyformlang/cfg/tests/test_terminal.py @@ -1,5 +1,6 @@ """ Tests the terminal """ -from pyformlang.cfg import Terminal, Epsilon +from pyformlang.cfg import Variable, Terminal, Epsilon +from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon class TestTerminal: @@ -22,3 +23,16 @@ def test_creation(self): epsilon = Epsilon() assert epsilon.to_text() == "epsilon" assert Terminal("C").to_text() == '"TER:C"' + assert repr(Epsilon()) == "epsilon" + + def test_eq(self): + assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" + assert Terminal("A") != Variable("A") + assert Variable("S") == Variable("S") + assert Terminal("A") != Terminal("B") + assert "A" == Terminal("A") + assert Variable(1) == 1 + assert Epsilon() == FAEpsilon() + assert Terminal("ABC") == Symbol("ABC") + assert State("S") != Variable("S") diff --git a/pyformlang/cfg/tests/test_variable.py b/pyformlang/cfg/tests/test_variable.py index 2bdc78b..56c186e 100644 --- a/pyformlang/cfg/tests/test_variable.py +++ b/pyformlang/cfg/tests/test_variable.py @@ -19,3 +19,4 @@ def test_creation(self): assert str(variable0) == str(variable2) assert str(variable0) == str(variable3) assert str(variable0) != str(variable1) + assert "A" == Variable("A") diff --git a/pyformlang/cfg/utils.py b/pyformlang/cfg/utils.py index 86da22b..590ac39 100644 --- a/pyformlang/cfg/utils.py +++ b/pyformlang/cfg/utils.py @@ -1,18 +1,50 @@ -""" Useful functions """ +""" Internal Usage only """ -from .variable import Variable -from .terminal import Terminal +from typing import Dict, List, Iterable, AbstractSet +from ..objects.cfg_objects import CFGObject, Variable, Epsilon, Production -def to_variable(given): - """ Transformation into a variable """ - if isinstance(given, Variable): - return given - return Variable(given) +def is_special_text(text: str) -> bool: + """ Check if the input is given an explicit type """ + return len(text) > 5 and \ + (text[0:5] == '"VAR:' or text[0:5] == '"TER:') and \ + text[-1] == '"' -def to_terminal(given): - """ Transformation into a terminal """ - if isinstance(given, Terminal): - return given - return Terminal(given) + +def remove_nullable_production_sub(body: List[CFGObject], + nullables: AbstractSet[CFGObject]) \ + -> List[List[CFGObject]]: + """ Recursive sub function to remove nullable objects """ + if not body: + return [[]] + all_next = remove_nullable_production_sub(body[1:], nullables) + res = [] + for body_temp in all_next: + if body[0] in nullables: + res.append(body_temp) + if body[0] != Epsilon(): + res.append([body[0]] + body_temp.copy()) + return res + + +def remove_nullable_production(production: Production, + nullables: AbstractSet[CFGObject]) \ + -> List[Production]: + """ Get all combinations of productions rules after removing nullable """ + next_prod_l = remove_nullable_production_sub(production.body, + nullables) + res = [Production(production.head, prod_l) + for prod_l in next_prod_l + if prod_l] + return res + + +def get_productions_d(productions: Iterable[Production]) \ + -> Dict[Variable, List[Production]]: + """ Get productions as a dictionary """ + productions_d: Dict[Variable, List[Production]] = {} + for production in productions: + production_head = productions_d.setdefault(production.head, []) + production_head.append(production) + return productions_d diff --git a/pyformlang/cfg/utils_cfg.py b/pyformlang/cfg/utils_cfg.py deleted file mode 100644 index 4c3ff19..0000000 --- a/pyformlang/cfg/utils_cfg.py +++ /dev/null @@ -1,45 +0,0 @@ -""" Internal Usage only""" - - -from typing import List, AbstractSet - -from .production import Production -from .epsilon import Epsilon -from .cfg_object import CFGObject - - -def remove_nullable_production_sub(body: List[CFGObject], - nullables: AbstractSet[CFGObject]) \ - -> List[List[CFGObject]]: - """ Recursive sub function to remove nullable objects """ - if not body: - return [[]] - all_next = remove_nullable_production_sub(body[1:], nullables) - res = [] - for body_temp in all_next: - if body[0] in nullables: - res.append(body_temp) - if body[0] != Epsilon(): - res.append([body[0]] + body_temp.copy()) - return res - - -def remove_nullable_production(production: Production, - nullables: AbstractSet[CFGObject]) \ - -> List[Production]: - """ Get all combinations of productions rules after removing nullable """ - next_prod_l = remove_nullable_production_sub(production.body, - nullables) - res = [Production(production.head, prod_l) - for prod_l in next_prod_l - if prod_l] - return res - - -def get_productions_d(productions): - """ Get productions as a dictionary """ - productions_d = {} - for production in productions: - production_head = productions_d.setdefault(production.head, []) - production_head.append(production) - return productions_d diff --git a/pyformlang/cfg/variable.py b/pyformlang/cfg/variable.py deleted file mode 100644 index 808ebbd..0000000 --- a/pyformlang/cfg/variable.py +++ /dev/null @@ -1,44 +0,0 @@ -""" A variable in a CFG """ -import string - -from .cfg_object import CFGObject - - -class Variable(CFGObject): # pylint: disable=too-few-public-methods - """ An variable in a CFG - - Parameters - ----------- - value : any - The value of the variable - """ - - def __init__(self, value): - super().__init__(value) - self._hash = None - self.index_cfg_converter = None - - def __eq__(self, other): - if isinstance(other, CFGObject): - return self._value == other.value - return self._value == other - - def __str__(self): - return str(self.value) - - def __repr__(self): - return "Variable(" + str(self.value) + ")" - - def __hash__(self): - if self._hash is None: - self._hash = self._compute_new_hash() - return self._hash - - def _compute_new_hash(self): - return hash(self._value) - - def to_text(self) -> str: - text = str(self._value) - if text and text[0] not in string.ascii_uppercase: - return '"VAR:' + text + '"' - return text diff --git a/pyformlang/fcfg/__init__.py b/pyformlang/fcfg/__init__.py index ab4d04c..3824940 100644 --- a/pyformlang/fcfg/__init__.py +++ b/pyformlang/fcfg/__init__.py @@ -15,14 +15,24 @@ """ +from .fcfg import FCFG, CFGObject, \ + Variable, Terminal, Epsilon, ParseTree, NotParsableException +from .feature_production import FeatureProduction +from .feature_structure import FeatureStructure, \ + ContentAlreadyExistsException, \ + FeatureStructuresNotCompatibleException, \ + PathDoesNotExistsException + + __all__ = ["FCFG", "FeatureStructure", "FeatureProduction", + "CFGObject", + "Variable", + "Terminal", + "Epsilon", + "ParseTree", + "NotParsableException", "ContentAlreadyExistsException", "FeatureStructuresNotCompatibleException", "PathDoesNotExistsException"] - -from pyformlang.fcfg.fcfg import FCFG -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure, ContentAlreadyExistsException, \ - FeatureStructuresNotCompatibleException, PathDoesNotExistsException diff --git a/pyformlang/fcfg/fcfg.py b/pyformlang/fcfg/fcfg.py index c02da46..a2c82c3 100644 --- a/pyformlang/fcfg/fcfg.py +++ b/pyformlang/fcfg/fcfg.py @@ -1,14 +1,18 @@ """Feature Context-Free Grammar""" -import string -from typing import Iterable, AbstractSet, Union -from pyformlang.cfg import CFG, Terminal, Epsilon, Variable -from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS, NotParsableException -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.cfg.utils import to_terminal -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure, FeatureStructuresNotCompatibleException -from pyformlang.fcfg.state import State, StateProcessed +from typing import List, Set, Tuple, AbstractSet, Iterable, Optional, Hashable +from string import ascii_uppercase + +from pyformlang.cfg import CFG, CFGObject, \ + Variable, Terminal, Epsilon, ParseTree, Production +from pyformlang.cfg.cfg import is_special_text, EPSILON_SYMBOLS +from pyformlang.cfg.llone_parser import NotParsableException + +from .feature_structure import FeatureStructure, \ + FeatureStructuresNotCompatibleException +from .feature_production import FeatureProduction +from .state import State, StateProcessed +from ..objects.cfg_objects.utils import to_terminal class FCFG(CFG): @@ -55,25 +59,28 @@ class FCFG(CFG): """ def __init__(self, - variables: AbstractSet[Variable] = None, - terminals: AbstractSet[Terminal] = None, - start_symbol: Variable = None, - productions: Iterable[FeatureProduction] = None): + variables: AbstractSet[Hashable] = None, + terminals: AbstractSet[Hashable] = None, + start_symbol: Hashable = None, + productions: Iterable[Production] = None) -> None: super().__init__(variables, terminals, start_symbol, productions) + self._productions: Set[FeatureProduction] - def __predictor(self, state, chart, processed): - # We have an incomplete state and the next token is a variable - # We must ask to process the variable with another rule - end_idx = state.positions[1] - next_var = state.production.body[state.positions[2]] - for production in self.productions: - if production.head == next_var: - new_state = State(production, (end_idx, end_idx, 0), - production.features, ParseTree(production.head)) - if processed.add(end_idx, new_state): - chart[end_idx].append(new_state) + @property + def feature_productions(self) -> Set[FeatureProduction]: + """ Gets the feature productions of the grammar """ + return self._productions - def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: + def add_production(self, production: Production) -> None: + """ Adds given production to the grammar """ + if not isinstance(production, FeatureProduction): + production = FeatureProduction(production.head, + production.body, + FeatureStructure(), + [FeatureStructure()]) + super().add_production(production) + + def contains(self, word: Iterable[Hashable]) -> bool: """ Gives the membership of a word to the grammar Parameters @@ -86,9 +93,10 @@ def contains(self, word: Iterable[Union[Terminal, str]]) -> bool: contains : bool Whether word if in the FCFG or not """ + word = [to_terminal(x) for x in word if x != Epsilon()] return self._get_final_state(word) is not None - def get_parse_tree(self, word: Iterable[Union[Terminal, str]]) -> ParseTree: + def get_parse_tree(self, word: Iterable[Hashable]) -> ParseTree: """ Gives the parse tree for a sentence, if possible Parameters @@ -106,20 +114,30 @@ def get_parse_tree(self, word: Iterable[Union[Terminal, str]]) -> ParseTree: NotParsableException When the word is not parsable. """ + word = [to_terminal(x) for x in word if x != Epsilon()] final_state = self._get_final_state(word) if final_state is None: - raise NotParsableException() + raise NotParsableException return final_state.parse_tree - def _get_final_state(self, word: Iterable[Terminal]): - word = [to_terminal(x) for x in word if x != Epsilon()] - chart = [[] for _ in range(len(word) + 1)] - # Processed[i] contains all production rule that are currently working until i. + def _get_final_state(self, word: List[Terminal]) -> Optional[State]: + chart: List[List[State]] = [[] for _ in range(len(word) + 1)] + # Processed[i] contains all production rule \ + # that are currently working until i. processed = StateProcessed(len(word) + 1) gamma = Variable("Gamma") - dummy_rule = FeatureProduction(gamma, [self.start_symbol], FeatureStructure(), [FeatureStructure()]) + production_body: List[CFGObject] = [] + if self.start_symbol is not None: + production_body.append(self.start_symbol) + dummy_rule = FeatureProduction(gamma, + production_body, + FeatureStructure(), + [FeatureStructure()]) # State = (rule, [begin, end, dot position, diag) - first_state = State(dummy_rule, (0, 0, 0), dummy_rule.features, ParseTree("BEGIN")) + first_state = State(dummy_rule, + (0, 0, 0), + dummy_rule.features, + ParseTree(Variable("BEGIN"))) chart[0].append(first_state) processed.add(0, first_state) for i in range(len(chart) - 1): @@ -128,7 +146,7 @@ def _get_final_state(self, word: Iterable[Terminal]): if state.is_incomplete() and state.next_is_variable(): self.__predictor(state, chart, processed) elif state.is_incomplete(): - if state.next_is_word(word[i]): + if state.next_is_symbol(word[i]): _scanner(state, chart, processed) else: _completer(state, chart, processed) @@ -137,19 +155,30 @@ def _get_final_state(self, word: Iterable[Terminal]): if not state.is_incomplete(): _completer(state, chart, processed) for state in processed.generator(len(word)): - if state.positions[0] == 0 and not state.is_incomplete() and state.production.head == self.start_symbol: + if state.positions[0] == 0 \ + and not state.is_incomplete() \ + and state.production.head == self.start_symbol: return state return None + def copy(self) -> "FCFG": + """ Copies the FCFG """ + return FCFG._copy_from(self) + @classmethod - def _read_line(cls, line, productions, terminals, variables): + def _read_line(cls, + line: str, + productions: Set[Production], + terminals: Set[Terminal], + variables: Set[Variable]) -> None: structure_variables = {} head_s, body_s = line.split("->") head_text = head_s.strip() if is_special_text(head_text): head_text = head_text[5:-1] head_text, head_conditions = _split_text_conditions(head_text) - head_fs = FeatureStructure.from_text(head_conditions, structure_variables) + head_fs = FeatureStructure.from_text( + head_conditions, structure_variables) head = Variable(head_text) variables.add(head) all_body_fs = [] @@ -161,10 +190,12 @@ def _read_line(cls, line, productions, terminals, variables): body_component = body_component[5:-1] else: type_component = "" - if body_component[0] in string.ascii_uppercase or \ + if body_component[0] in ascii_uppercase or \ type_component == "VAR": - body_component, body_conditions = _split_text_conditions(body_component) - body_fs = FeatureStructure.from_text(body_conditions, structure_variables) + body_component, body_conditions = \ + _split_text_conditions(body_component) + body_fs = FeatureStructure.from_text( + body_conditions, structure_variables) all_body_fs.append(body_fs) body_var = Variable(body_component) variables.add(body_var) @@ -178,8 +209,25 @@ def _read_line(cls, line, productions, terminals, variables): production = FeatureProduction(head, body, head_fs, all_body_fs) productions.add(production) + def __predictor(self, + state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: + # We have an incomplete state and the next token is a variable + # We must ask to process the variable with another rule + end_idx = state.positions[1] + next_var = state.production.body[state.positions[2]] + for production in self._productions: + if production.head == next_var: + new_state = State(production, + (end_idx, end_idx, 0), + production.features, + ParseTree(production.head)) + if processed.add(end_idx, new_state): + chart[end_idx].append(new_state) + -def _split_text_conditions(head_text): +def _split_text_conditions(head_text: str) -> Tuple[str, str]: if head_text[-1] != "]": return head_text, "" idx = head_text.find("[") @@ -188,36 +236,48 @@ def _split_text_conditions(head_text): return head_text[:idx], head_text[idx+1:-1] -def _scanner(state, chart, processed): +def _scanner(state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: # We have an incomplete state and the next token is the word given as input # We move the end token and the dot token by one. end_idx = state.positions[1] - state.parse_tree.sons.append(ParseTree(state.production.body[state.positions[2]])) - new_state = State(state.production, (state.positions[0], end_idx + 1, state.positions[2] + 1), - state.feature_stucture, state.parse_tree) + state.parse_tree.sons.append( + ParseTree(state.production.body[state.positions[2]])) + new_state = State(state.production, + (state.positions[0], end_idx + 1, state.positions[2] + 1), + state.feature_stucture, + state.parse_tree) if processed.add(end_idx + 1, new_state): chart[end_idx + 1].append(new_state) -def _completer(state, chart, processed): - # We have a complete state. We must check if it helps to move another state forward. +def _completer(state: State, + chart: List[List[State]], + processed: StateProcessed) -> None: + # We have a complete state. + # We must check if it helps to move another state forward. begin_idx = state.positions[0] head = state.production.head for next_state in processed.generator(begin_idx): # next_state[1][1] == begin_idx always true - if next_state.is_incomplete() and next_state.production.body[next_state.positions[2]] == head: + if next_state.is_incomplete() \ + and next_state.production.body[next_state.positions[2]] == head: try: copy_left = state.feature_stucture.copy() copy_left = copy_left.get_feature_by_path(["head"]) copy_right = next_state.feature_stucture.copy() - copy_right_considered = copy_right.get_feature_by_path([str(next_state.positions[2])]) + copy_right_considered = copy_right.get_feature_by_path( + [str(next_state.positions[2])]) copy_right_considered.unify(copy_left) except FeatureStructuresNotCompatibleException: continue parse_tree = next_state.parse_tree parse_tree.sons.append(state.parse_tree) new_state = State(next_state.production, - (next_state.positions[0], state.positions[1], next_state.positions[2] + 1), + (next_state.positions[0], + state.positions[1], + next_state.positions[2] + 1), copy_right, parse_tree) if processed.add(state.positions[1], new_state): chart[state.positions[1]].append(new_state) diff --git a/pyformlang/fcfg/feature_production.py b/pyformlang/fcfg/feature_production.py index 587aba2..19c7b1b 100644 --- a/pyformlang/fcfg/feature_production.py +++ b/pyformlang/fcfg/feature_production.py @@ -1,9 +1,10 @@ """Production rules with features""" -from typing import List -from pyformlang.cfg import Production, Variable -from pyformlang.cfg.cfg_object import CFGObject -from pyformlang.fcfg.feature_structure import FeatureStructure +from typing import List, Iterable + +from pyformlang.cfg import CFGObject, Variable, Production + +from .feature_structure import FeatureStructure class FeatureProduction(Production): @@ -18,10 +19,16 @@ class FeatureProduction(Production): head_feature : :class:`~pyformlang.fcfg.FeatureStructure` The feature structure of the head body_features : Iterable of :class:`~pyformlang.fcfg.FeatureStructure` - The feature structures of the elements of the body. Must be the same size as the body. + The feature structures of the elements of the body. + Must be the same size as the body. """ - def __init__(self, head: Variable, body: List[CFGObject], head_feature, body_features, filtering=True): + def __init__(self, + head: Variable, + body: List[CFGObject], + head_feature: FeatureStructure, + body_features: Iterable[FeatureStructure], + filtering: bool = True) -> None: super().__init__(head, body, filtering) self._features = FeatureStructure() self._features.add_content("head", head_feature) @@ -29,11 +36,11 @@ def __init__(self, head: Variable, body: List[CFGObject], head_feature, body_fea self._features.add_content(str(i), feature_structure) @property - def features(self): + def features(self) -> FeatureStructure: """The merged features of the production rules""" return self._features - def __repr__(self): + def __repr__(self) -> str: res = [self.head.to_text()] cond_head = str(self._features.get_feature_by_path(["head"])) if cond_head: diff --git a/pyformlang/fcfg/feature_structure.py b/pyformlang/fcfg/feature_structure.py index b116de0..c7e7651 100644 --- a/pyformlang/fcfg/feature_structure.py +++ b/pyformlang/fcfg/feature_structure.py @@ -1,5 +1,6 @@ """Feature Structure""" -from typing import Any, List, Dict + +from typing import Dict, List, Iterable, Tuple, Optional, Hashable class ContentAlreadyExistsException(Exception): @@ -24,64 +25,39 @@ class FeatureStructure: """ - def __init__(self, value=None): - self._content = {} + def __init__(self, value: Hashable = None) -> None: + self._content: Dict[str, FeatureStructure] = {} self._value = value - self._pointer = None - - def copy(self, already_copied=None): - """Copies the current feature structure - - Parameters - ---------- - already_copied : dict - A dictionary containing the parts already copied. For internal usage. - - Returns - ---------- - fs : :class:`~pyformlang.fcfg.FeatureStructure` - The copied feature structure - """ - if already_copied is None: - already_copied = {} - if self in already_copied: - return already_copied[self] - new_fs = FeatureStructure(self.value) - if self._pointer is not None: - pointer_copy = self._pointer.copy(already_copied) - new_fs.pointer = pointer_copy - for feature, content in self._content.items(): - new_fs.content[feature] = content.copy(already_copied) - already_copied[self] = new_fs - return new_fs + self._pointer: Optional[FeatureStructure] = None @property - def content(self) -> Any: + def content(self) -> Dict[str, "FeatureStructure"]: """Gets the content of the current node""" return self._content @property - def pointer(self) -> Any: + def pointer(self) -> Optional["FeatureStructure"]: """Gets the pointer of the current node""" return self._pointer @pointer.setter - def pointer(self, new_pointer): + def pointer(self, new_pointer: "FeatureStructure") -> None: """Set the value of the pointer""" self._pointer = new_pointer - @property - def value(self) -> Any: + def value(self) -> Hashable: """Gets the value associated to the current node""" return self._value if self.pointer is None else self.pointer.value @value.setter - def value(self, new_value) -> Any: + def value(self, new_value: Hashable) -> None: """Gets the value associated to the current node""" self._value = new_value - def add_content(self, content_name: str, feature_structure: "FeatureStructure"): + def add_content(self, + content_name: str, + feature_structure: "FeatureStructure") -> None: """Add content to the current feature structure. Parameters @@ -100,7 +76,10 @@ def add_content(self, content_name: str, feature_structure: "FeatureStructure"): raise ContentAlreadyExistsException() self._content[content_name] = feature_structure - def add_content_path(self, content_name: str, feature_structure: "FeatureStructure", path: List[str]): + def add_content_path(self, + content_name: str, + feature_structure: "FeatureStructure", + path: List[str]) -> None: """Add content to the current feature structure at a specific path Parameters @@ -122,16 +101,20 @@ def add_content_path(self, content_name: str, feature_structure: "FeatureStructu to_modify = self.get_feature_by_path(path) to_modify.add_content(content_name, feature_structure) - def get_dereferenced(self): - """Get the dereferences version of the feature structure. For internal usage.""" - return self._pointer.get_dereferenced() if self._pointer is not None else self + def get_dereferenced(self) -> "FeatureStructure": + """ + Get the dereferences version of the feature structure. + For internal usage. + """ + return self._pointer.get_dereferenced() \ + if self._pointer is not None else self - def get_feature_by_path(self, path: List[str] = None): + def get_feature_by_path(self, path: List[str] = None) -> "FeatureStructure": """ Get a feature at a given path. Parameters ----------- - path : Iterable of str, optional + path : List of str, optional The path to the new feature. Returns @@ -152,7 +135,7 @@ def get_feature_by_path(self, path: List[str] = None): raise PathDoesNotExistsException() return current.content[path[0]].get_feature_by_path(path[1:]) - def unify(self, other: "FeatureStructure"): + def unify(self, other: "FeatureStructure") -> None: """Unify the current structure with another one. Modifies the current structure. @@ -171,7 +154,8 @@ def unify(self, other: "FeatureStructure"): other_dereferenced = other.get_dereferenced() if current_dereferenced == other_dereferenced: return - if len(current_dereferenced.content) == 0 and len(other_dereferenced.content) == 0: + if len(current_dereferenced.content) == 0 \ + and len(other_dereferenced.content) == 0: # We have a simple feature if current_dereferenced.value == other_dereferenced.value: current_dereferenced.pointer = other_dereferenced @@ -186,9 +170,10 @@ def unify(self, other: "FeatureStructure"): for feature in other_dereferenced.content: if feature not in current_dereferenced.content: current_dereferenced.content[feature] = FeatureStructure() - current_dereferenced.content[feature].unify(other_dereferenced.content[feature]) + current_dereferenced.content[feature].unify( + other_dereferenced.content[feature]) - def subsumes(self, other: "FeatureStructure"): + def subsumes(self, other: "FeatureStructure") -> bool: """Check whether the current feature structure subsumes another one. Parameters @@ -208,16 +193,17 @@ def subsumes(self, other: "FeatureStructure"): for feature in current_dereferenced.content: if feature not in other_dereferenced.content: return False - if not current_dereferenced.content[feature].subsumes(other_dereferenced.content[feature]): + if not current_dereferenced.content[feature].subsumes( + other_dereferenced.content[feature]): return False return True - def get_all_paths(self): + def get_all_paths(self) -> List[List[str]]: """ Get the list of all path in the feature structure Returns -------- - paths : Iterable of :class:`~pyformlang.fcfg.FeatureStructure` + paths : List of string lists The paths """ @@ -230,7 +216,7 @@ def get_all_paths(self): res.append([]) return res - def __repr__(self): + def __repr__(self) -> str: res = [] for path in self.get_all_paths(): if path: @@ -241,15 +227,48 @@ def __repr__(self): res.append(".".join(path) + "=" + str(value)) return " | ".join(res) + def copy(self, already_copied: Dict["FeatureStructure", + "FeatureStructure"] = None) \ + -> "FeatureStructure": + """Copies the current feature structure + + Parameters + ---------- + already_copied : dict + A dictionary containing the parts already copied. + For internal usage. + + Returns + ---------- + fs : :class:`~pyformlang.fcfg.FeatureStructure` + The copied feature structure + """ + if already_copied is None: + already_copied = {} + if self in already_copied: + return already_copied[self] + new_fs = FeatureStructure(self.value) + if self._pointer is not None: + pointer_copy = self._pointer.copy(already_copied) + new_fs.pointer = pointer_copy + for feature, content in self._content.items(): + new_fs.content[feature] = content.copy(already_copied) + already_copied[self] = new_fs + return new_fs + @classmethod - def from_text(cls, text: str, structure_variables: Dict[str, "FeatureStructure"] = None): + def from_text(cls, + text: str, + structure_variables: Dict[str, "FeatureStructure"] = None) \ + -> "FeatureStructure": """ Construct a feature structure from a text. Parameters ----------- text : str The text to parse - structure_variables : dict of (str, :class:`~pyformlang.fcfg.FeatureStructure`), optional + structure_variables : \ + dict of (str, :class:`~pyformlang.fcfg.FeatureStructure`), optional Existing structure variables. Returns @@ -261,10 +280,14 @@ def from_text(cls, text: str, structure_variables: Dict[str, "FeatureStructure"] if structure_variables is None: structure_variables = {} preprocessed_conditions = _preprocess_conditions(text) - return _create_feature_structure(preprocessed_conditions, structure_variables) + return _create_feature_structure( + preprocessed_conditions, structure_variables) -def _find_closing_bracket(condition, start, opening="[", closing="]"): +def _find_closing_bracket(condition: str, + start: int, + opening: str = "[", + closing: str = "]") -> int: counter = 0 pos = start for current_char in condition[start:]: @@ -282,7 +305,9 @@ class ParsingException(Exception): """When there is a problem during parsing.""" -def _preprocess_conditions(conditions, start=0, end=-1): +def _preprocess_conditions(conditions: str, + start: int = 0, + end: int = -1) -> List[Tuple[str, str, str]]: conditions = conditions.replace("->", "=") conditions = conditions.strip() res = [] @@ -304,7 +329,8 @@ def _preprocess_conditions(conditions, start=0, end=-1): end_bracket = _find_closing_bracket(conditions, pos) if end_bracket == -1: raise ParsingException() - current_value = _preprocess_conditions(conditions, pos + 1, end_bracket) + current_value = _preprocess_conditions( + conditions, pos + 1, end_bracket) pos = end_bracket + 1 elif current == "(": end_bracket = _find_closing_bracket(conditions, pos, "(", ")") @@ -322,7 +348,7 @@ def _preprocess_conditions(conditions, start=0, end=-1): reference = None pos += 1 else: - current_value += current + current_value += current # type: ignore pos += 1 if current_feature.strip(): if isinstance(current_value, str): @@ -331,7 +357,12 @@ def _preprocess_conditions(conditions, start=0, end=-1): return res -def _create_feature_structure(conditions, structure_variables, existing_references=None, feature_structure=None): +def _create_feature_structure( + conditions: Iterable[Tuple[str, str, str]], + structure_variables: Dict[str, FeatureStructure], + existing_references: Dict[str, FeatureStructure] = None, + feature_structure: FeatureStructure = None) \ + -> FeatureStructure: if existing_references is None: existing_references = {} if feature_structure is None: @@ -354,7 +385,8 @@ def _create_feature_structure(conditions, structure_variables, existing_referenc feature_structure.add_content(feature, new_fs) structure_variables[value[1:]] = new_fs elif not isinstance(value, str): - structure = _create_feature_structure(value, structure_variables, existing_references, new_fs) + structure = _create_feature_structure( + value, structure_variables, existing_references, new_fs) feature_structure.add_content(feature, structure) else: feature_structure.add_content(feature, new_fs) diff --git a/pyformlang/fcfg/state.py b/pyformlang/fcfg/state.py index fd5e2bd..b7768b2 100644 --- a/pyformlang/fcfg/state.py +++ b/pyformlang/fcfg/state.py @@ -1,10 +1,15 @@ """Internal usage states""" -from typing import Tuple -from pyformlang.cfg import Variable -from pyformlang.cfg.parse_tree import ParseTree -from pyformlang.fcfg.feature_production import FeatureProduction -from pyformlang.fcfg.feature_structure import FeatureStructure +from typing import Dict, List, Iterable, Tuple + +from pyformlang.cfg import Variable, Terminal, ParseTree + +from .feature_structure import FeatureStructure +from .feature_production import FeatureProduction + +Positions = Tuple[int, int, int] +StateKey = Tuple[FeatureProduction, Positions] +ProcessedStates = List[Dict[StateKey, List["State"]]] class State: @@ -12,39 +17,42 @@ class State: def __init__(self, production: FeatureProduction, - positions: Tuple[int, int, int], + positions: Positions, feature_stucture: FeatureStructure, - parse_tree: ParseTree): + parse_tree: ParseTree) -> None: self.production = production self.positions = positions self.feature_stucture = feature_stucture self.parse_tree = parse_tree - def get_key(self): + def get_key(self) -> StateKey: """Get the key of the state""" return self.production, self.positions - def is_incomplete(self): + def is_incomplete(self) -> bool: """Check if a state is incomplete""" return self.positions[2] < len(self.production.body) - def next_is_variable(self): + def next_is_variable(self) -> bool: """Check if the next symbol to process is a variable""" return isinstance(self.production.body[self.positions[2]], Variable) - def next_is_word(self, word): + def next_is_symbol(self, symbol: Terminal) -> bool: """Check if the next symbol matches a given word""" - return self.production.body[self.positions[2]] == word + return self.production.body[self.positions[2]] == symbol class StateProcessed: """For internal usage""" - def __init__(self, size: int): - self.processed = [{} for _ in range(size)] + def __init__(self, size: int) -> None: + self.processed: ProcessedStates = [{} for _ in range(size)] - def add(self, i: int, element: State): - """Add a state to the processed states. Returns if the insertion was successful or not.""" + def add(self, i: int, element: State) -> bool: + """ + Add a state to the processed states. + Returns if the insertion was successful or not. + """ key = element.get_key() if key not in self.processed[i]: self.processed[i][key] = [] @@ -54,8 +62,7 @@ def add(self, i: int, element: State): self.processed[i][key].append(element) return True - def generator(self, i: int): + def generator(self, i: int) -> Iterable[State]: """Generates a collection of all the states at a given position""" for states in self.processed[i].values(): - for state in states: - yield state + yield from states diff --git a/pyformlang/fcfg/tests/test_fcfg.py b/pyformlang/fcfg/tests/test_fcfg.py index 195e364..7163f35 100644 --- a/pyformlang/fcfg/tests/test_fcfg.py +++ b/pyformlang/fcfg/tests/test_fcfg.py @@ -1,8 +1,9 @@ """Test a FCFG""" -from pyformlang.cfg import Variable, Terminal -from pyformlang.cfg.cfg import NotParsableException +from pyformlang.cfg import Variable, Terminal, Production +from pyformlang.cfg import DerivationDoesNotExist from pyformlang.cfg.parse_tree import ParseTree +from pyformlang.cfg.llone_parser import NotParsableException from pyformlang.fcfg.fcfg import FCFG from pyformlang.fcfg.feature_production import FeatureProduction from pyformlang.fcfg.feature_structure import FeatureStructure @@ -10,9 +11,52 @@ import pytest +@pytest.fixture +def fcfg_text() -> str: + return """ + S -> NP[AGREEMENT=?a] VP[AGREEMENT=?a] + S -> Aux[AGREEMENT=?a] NP[AGREEMENT=?a] VP + NP[AGREEMENT=?a] -> Det[AGREEMENT=?a] Nominal[AGREEMENT=?a] + Aux[AGREEMENT=[NUMBER=pl, PERSON=3rd]] -> do + Aux[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> does + Det[AGREEMENT=[NUMBER=sg]] -> this + Det[AGREEMENT=[NUMBER=pl]] -> these + "VAR:VP[AGREEMENT=?a]" -> Verb[AGREEMENT=?a] + Verb[AGREEMENT=[NUMBER=pl]] -> serve + Verb[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> "TER:serves" + Noun[AGREEMENT=[NUMBER=sg]] -> flight + Noun[AGREEMENT=[NUMBER=pl]] -> flights + Nominal[AGREEMENT=?a] -> Noun[AGREEMENT=?a] + """ + + class TestFCFG: """Test a FCFG""" + def test_creation(self): + """ Tests creation of FCFG """ + variable0 = Variable(0) + terminal0 = Terminal("a") + prod0 = Production(variable0, [terminal0, Terminal("A"), Variable(1)]) + fcfg = FCFG({variable0}, {terminal0}, variable0, {prod0}) + assert fcfg is not None + assert len(fcfg.variables) == 2 + assert len(fcfg.terminals) == 2 + assert len(fcfg.productions) == 1 + assert len(fcfg.feature_productions) == 1 + assert fcfg.productions == fcfg.feature_productions + assert fcfg.is_empty() + assert all(isinstance(prod, FeatureProduction) + for prod in fcfg.productions) + + fcfg = FCFG() + assert fcfg is not None + assert len(fcfg.variables) == 0 + assert len(fcfg.terminals) == 0 + assert len(fcfg.productions) == 0 + assert len(fcfg.feature_productions) == 0 + assert fcfg.is_empty() + def test_contains(self): """Test containment""" # 1st: S -> NP VP @@ -182,31 +226,58 @@ def test_state(self): """Test functions on states""" fs1 = FeatureStructure() fs1.add_content("NUMBER", FeatureStructure("sg")) - state0 = State(FeatureProduction(Variable("S"), [], fs1, []), (0, 0, 0), fs1, ParseTree("S")) + state0 = State(FeatureProduction(Variable("S"), [], fs1, []), + (0, 0, 0), + fs1, + ParseTree(Variable("S"))) processed = StateProcessed(1) - state1 = State(FeatureProduction(Variable("S"), [], fs1, []), (0, 0, 0), fs1, ParseTree("S")) + state1 = State(FeatureProduction(Variable("S"), [], fs1, []), + (0, 0, 0), + fs1, + ParseTree(Variable("S"))) assert processed.add(0, state0) assert not processed.add(0, state1) - def test_from_text(self): + def test_from_text(self, fcfg_text: str): """Test containment from a text description""" - fcfg = FCFG.from_text(""" - S -> NP[AGREEMENT=?a] VP[AGREEMENT=?a] - S -> Aux[AGREEMENT=?a] NP[AGREEMENT=?a] VP - NP[AGREEMENT=?a] -> Det[AGREEMENT=?a] Nominal[AGREEMENT=?a] - Aux[AGREEMENT=[NUMBER=pl, PERSON=3rd]] -> do - Aux[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> does - Det[AGREEMENT=[NUMBER=sg]] -> this - Det[AGREEMENT=[NUMBER=pl]] -> these - "VAR:VP[AGREEMENT=?a]" -> Verb[AGREEMENT=?a] - Verb[AGREEMENT=[NUMBER=pl]] -> serve - Verb[AGREEMENT=[NUMBER=sg, PERSON=3rd]] -> "TER:serves" - Noun[AGREEMENT=[NUMBER=sg]] -> flight - Noun[AGREEMENT=[NUMBER=pl]] -> flights - Nominal[AGREEMENT=?a] -> Noun[AGREEMENT=?a] - """) + fcfg = FCFG.from_text(fcfg_text) self._sub_tests_contains1(fcfg) parse_tree = fcfg.get_parse_tree(["this", "flight", "serves"]) with pytest.raises(NotParsableException): fcfg.get_parse_tree(["these", "flight", "serves"]) assert "Det" in str(parse_tree) + + def test_copy(self, fcfg_text: str): + """Test copying of FCFG""" + fcfg = FCFG.from_text(fcfg_text) + fcfg_copy = fcfg.copy() + assert fcfg.variables == fcfg_copy.variables + assert fcfg.terminals == fcfg_copy.terminals + assert fcfg.productions == fcfg_copy.productions + assert fcfg.start_symbol == fcfg_copy.start_symbol + assert fcfg is not fcfg_copy + + def test_get_leftmost_derivation(self): + ter_a = Terminal("a") + ter_b = Terminal("b") + var_s = Variable("S") + var_a = Variable("A") + var_b = Variable("B") + var_c = Variable("C") + productions = [Production(var_s, [var_c, var_b]), + Production(var_c, [var_a, var_a]), + Production(var_a, [ter_a]), + Production(var_b, [ter_b]) + ] + fcfg = FCFG(productions=productions, start_symbol=var_s) + parse_tree = fcfg.get_cnf_parse_tree([ter_a, ter_a, ter_b]) + derivation = parse_tree.get_leftmost_derivation() + assert derivation == \ + [[var_s], + [var_c, var_b], + [var_a, var_a, var_b], + [ter_a, var_a, var_b], + [ter_a, ter_a, var_b], + [ter_a, ter_a, ter_b]] + with pytest.raises(DerivationDoesNotExist): + fcfg.get_cnf_parse_tree([]) diff --git a/pyformlang/finite_automaton/__init__.py b/pyformlang/finite_automaton/__init__.py index e6b7db8..6fec7db 100644 --- a/pyformlang/finite_automaton/__init__.py +++ b/pyformlang/finite_automaton/__init__.py @@ -34,13 +34,10 @@ """ -from .finite_automaton import FiniteAutomaton +from .finite_automaton import FiniteAutomaton, State, Symbol, Epsilon from .deterministic_finite_automaton import DeterministicFiniteAutomaton from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .epsilon_nfa import EpsilonNFA -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .deterministic_transition_function import \ (DeterministicTransitionFunction, DuplicateTransitionError, @@ -48,6 +45,7 @@ from .nondeterministic_transition_function import \ NondeterministicTransitionFunction + __all__ = ["FiniteAutomaton", "DeterministicFiniteAutomaton", "NondeterministicFiniteAutomaton", diff --git a/pyformlang/finite_automaton/deterministic_finite_automaton.py b/pyformlang/finite_automaton/deterministic_finite_automaton.py index d695388..3856f72 100644 --- a/pyformlang/finite_automaton/deterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/deterministic_finite_automaton.py @@ -4,13 +4,14 @@ from typing import Iterable, AbstractSet, Optional, Hashable, Any -from .state import State from .deterministic_transition_function import DeterministicTransitionFunction from .epsilon_nfa import EpsilonNFA from .nondeterministic_finite_automaton import NondeterministicFiniteAutomaton from .hopcroft_processing_list import HopcroftProcessingList from .partition import Partition -from .utils import to_state, to_symbol, to_single_state, PreviousTransitions +from .utils import to_single_state, PreviousTransitions +from ..objects.finite_automaton_objects import State +from ..objects.finite_automaton_objects.utils import to_state, to_symbol class DeterministicFiniteAutomaton(NondeterministicFiniteAutomaton): diff --git a/pyformlang/finite_automaton/deterministic_transition_function.py b/pyformlang/finite_automaton/deterministic_transition_function.py index 39d0a8f..7b2f9a7 100644 --- a/pyformlang/finite_automaton/deterministic_transition_function.py +++ b/pyformlang/finite_automaton/deterministic_transition_function.py @@ -4,12 +4,10 @@ from typing import Optional -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction from .nondeterministic_finite_automaton import InvalidEpsilonTransition +from ..objects.finite_automaton_objects import State, Symbol, Epsilon class DeterministicTransitionFunction(NondeterministicTransitionFunction): diff --git a/pyformlang/finite_automaton/doubly_linked_list.py b/pyformlang/finite_automaton/doubly_linked_list.py index ce09237..d1345e9 100644 --- a/pyformlang/finite_automaton/doubly_linked_list.py +++ b/pyformlang/finite_automaton/doubly_linked_list.py @@ -1,6 +1,7 @@ """A doubly linked list""" from typing import Iterable, Optional, Any + from .doubly_linked_node import DoublyLinkedNode diff --git a/pyformlang/finite_automaton/epsilon.py b/pyformlang/finite_automaton/epsilon.py deleted file mode 100644 index 431a98c..0000000 --- a/pyformlang/finite_automaton/epsilon.py +++ /dev/null @@ -1,26 +0,0 @@ -""" -Represents an epsilon transition -""" - -from typing import Any -from .symbol import Symbol - - -class Epsilon(Symbol): # pylint: disable=too-few-public-methods - """ An epsilon transition - - Examples - -------- - - >>> epsilon = Epsilon() - - """ - - def __init__(self) -> None: - super().__init__("epsilon") - - def __hash__(self) -> int: - return hash("EPSILON TRANSITION") - - def __eq__(self, other: Any) -> bool: - return isinstance(other, Epsilon) diff --git a/pyformlang/finite_automaton/epsilon_nfa.py b/pyformlang/finite_automaton/epsilon_nfa.py index 626019d..e80a125 100644 --- a/pyformlang/finite_automaton/epsilon_nfa.py +++ b/pyformlang/finite_automaton/epsilon_nfa.py @@ -5,13 +5,11 @@ from typing import Iterable, Set, AbstractSet, Hashable from networkx import MultiDiGraph -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .nondeterministic_transition_function import \ NondeterministicTransitionFunction from .finite_automaton import FiniteAutomaton -from .utils import to_state, to_symbol +from ..objects.finite_automaton_objects import State, Symbol, Epsilon +from ..objects.finite_automaton_objects.utils import to_state, to_symbol class EpsilonNFA(FiniteAutomaton): diff --git a/pyformlang/finite_automaton/finite_automaton.py b/pyformlang/finite_automaton/finite_automaton.py index efc7392..654a039 100644 --- a/pyformlang/finite_automaton/finite_automaton.py +++ b/pyformlang/finite_automaton/finite_automaton.py @@ -9,11 +9,9 @@ from pyformlang.fst import FST -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon from .transition_function import TransitionFunction -from .utils import to_state, to_symbol +from ..objects.finite_automaton_objects import State, Symbol, Epsilon +from ..objects.finite_automaton_objects.utils import to_state, to_symbol AutomatonT = TypeVar("AutomatonT", bound="FiniteAutomaton") diff --git a/pyformlang/finite_automaton/finite_automaton_object.py b/pyformlang/finite_automaton/finite_automaton_object.py deleted file mode 100644 index ed80609..0000000 --- a/pyformlang/finite_automaton/finite_automaton_object.py +++ /dev/null @@ -1,38 +0,0 @@ -""" -Represents an object of a finite state automaton -""" - -from typing import Hashable - - -class FiniteAutomatonObject: # pylint: disable=too-few-public-methods - """ Represents an object in a finite state automaton - - Parameters - ---------- - value: any - The value of the object - """ - - def __init__(self, value: Hashable) -> None: - self._value = value - self._hash = None - - def __hash__(self) -> int: - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - def __repr__(self) -> str: - return str(self._value) - - @property - def value(self) -> Hashable: - """ Gets the value of the object - - Returns - --------- - value : any - The value of the object - """ - return self._value diff --git a/pyformlang/finite_automaton/hopcroft_processing_list.py b/pyformlang/finite_automaton/hopcroft_processing_list.py index 2bc02f4..3997754 100644 --- a/pyformlang/finite_automaton/hopcroft_processing_list.py +++ b/pyformlang/finite_automaton/hopcroft_processing_list.py @@ -5,7 +5,7 @@ from typing import Dict, List, Set, Tuple from numpy import zeros -from .symbol import Symbol +from ..objects.finite_automaton_objects import Symbol class HopcroftProcessingList: diff --git a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py index 5d32776..7f56f9f 100644 --- a/pyformlang/finite_automaton/nondeterministic_finite_automaton.py +++ b/pyformlang/finite_automaton/nondeterministic_finite_automaton.py @@ -4,9 +4,9 @@ from typing import Iterable, Hashable -from .epsilon import Epsilon from .epsilon_nfa import EpsilonNFA -from .utils import to_symbol +from ..objects.finite_automaton_objects import Epsilon +from ..objects.finite_automaton_objects.utils import to_symbol class NondeterministicFiniteAutomaton(EpsilonNFA): diff --git a/pyformlang/finite_automaton/nondeterministic_transition_function.py b/pyformlang/finite_automaton/nondeterministic_transition_function.py index 030a605..4815cbe 100644 --- a/pyformlang/finite_automaton/nondeterministic_transition_function.py +++ b/pyformlang/finite_automaton/nondeterministic_transition_function.py @@ -5,9 +5,8 @@ from typing import Dict, Set, Iterable, Tuple from copy import deepcopy -from .state import State -from .symbol import Symbol from .transition_function import TransitionFunction +from ..objects.finite_automaton_objects import State, Symbol class NondeterministicTransitionFunction(TransitionFunction): diff --git a/pyformlang/finite_automaton/partition.py b/pyformlang/finite_automaton/partition.py index e292900..77be2d6 100644 --- a/pyformlang/finite_automaton/partition.py +++ b/pyformlang/finite_automaton/partition.py @@ -6,7 +6,7 @@ from .doubly_linked_list import DoublyLinkedList from .doubly_linked_node import DoublyLinkedNode -from .state import State +from ..objects.finite_automaton_objects import State class Partition: diff --git a/pyformlang/finite_automaton/state.py b/pyformlang/finite_automaton/state.py deleted file mode 100644 index 53ed6d9..0000000 --- a/pyformlang/finite_automaton/state.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Representation of a state in a finite state automaton -""" - -from typing import Hashable, Any -from .finite_automaton_object import FiniteAutomatonObject - - -class State(FiniteAutomatonObject): # pylint: disable=too-few-public-methods - """ A state in a finite automaton - - Parameters - ---------- - value : any - The value of the state - - Examples - ---------- - >>> from pyformlang.finite_automaton import State - >>> State("A") - A - - """ - - def __init__(self, value: Hashable) -> None: - super().__init__(value) - self.index = None - self.index_cfg_converter = None - - def __hash__(self) -> int: - return super().__hash__() - - def __eq__(self, other: Any) -> bool: - if isinstance(other, State): - return self._value == other._value - return self._value == other diff --git a/pyformlang/finite_automaton/tests/test_epsilon.py b/pyformlang/finite_automaton/tests/test_epsilon.py index 955ac6f..7dd5add 100644 --- a/pyformlang/finite_automaton/tests/test_epsilon.py +++ b/pyformlang/finite_automaton/tests/test_epsilon.py @@ -3,7 +3,7 @@ """ from pyformlang.finite_automaton import Epsilon -from pyformlang.finite_automaton import Symbol +from pyformlang.finite_automaton import State, Symbol class TestEpsilon: @@ -16,3 +16,7 @@ def test_epsilon(self): symb = Symbol(0) assert eps0 == eps1 assert eps0 != symb + assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" + assert Symbol("ɛ") != Epsilon() + assert Epsilon() != State("epsilon") diff --git a/pyformlang/finite_automaton/tests/test_state.py b/pyformlang/finite_automaton/tests/test_state.py index 0d3d150..8046f88 100644 --- a/pyformlang/finite_automaton/tests/test_state.py +++ b/pyformlang/finite_automaton/tests/test_state.py @@ -34,6 +34,7 @@ def test_eq(self): assert state2 != state3 assert state2 == 1 assert state1 != state2 + assert State("ABC") == "ABC" def test_hash(self): """ Tests the hashing of states @@ -44,4 +45,3 @@ def test_hash(self): assert isinstance(state1, int) assert state1 == state3 assert state2 != state3 - assert state1 != state2 diff --git a/pyformlang/finite_automaton/tests/test_symbol.py b/pyformlang/finite_automaton/tests/test_symbol.py index 5d7be9a..fcb114c 100644 --- a/pyformlang/finite_automaton/tests/test_symbol.py +++ b/pyformlang/finite_automaton/tests/test_symbol.py @@ -2,7 +2,7 @@ Tests for the symbols """ -from pyformlang.finite_automaton import Symbol +from pyformlang.finite_automaton import State, Symbol class TestSymbol: @@ -33,6 +33,8 @@ def test_eq(self): assert symbol2 == 1 assert symbol2 != symbol3 assert symbol1 != symbol2 + assert "A" == Symbol("A") + assert State("A") != Symbol("A") def test_hash(self): """ Tests the hashing of symbols diff --git a/pyformlang/finite_automaton/transition_function.py b/pyformlang/finite_automaton/transition_function.py index 4682e89..2a283aa 100644 --- a/pyformlang/finite_automaton/transition_function.py +++ b/pyformlang/finite_automaton/transition_function.py @@ -5,8 +5,7 @@ from typing import Dict, Set, Tuple, Iterable, Iterator from abc import abstractmethod -from .state import State -from .symbol import Symbol +from ..objects.finite_automaton_objects import State, Symbol class TransitionFunction(Iterable[Tuple[State, Symbol, State]]): diff --git a/pyformlang/finite_automaton/utils.py b/pyformlang/finite_automaton/utils.py index ed7e4a2..d488604 100644 --- a/pyformlang/finite_automaton/utils.py +++ b/pyformlang/finite_automaton/utils.py @@ -1,39 +1,9 @@ """ Utility for finite automata """ -from typing import Dict, List, AbstractSet, Iterable, Optional, Hashable +from typing import Dict, List, AbstractSet, Iterable, Optional from numpy import empty -from .state import State -from .symbol import Symbol -from .epsilon import Epsilon - - -def to_state(given: Hashable) -> State: - """ Transforms the input into a state - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, State): - return given - return State(given) - - -def to_symbol(given: Hashable) -> Symbol: - """ Transforms the input into a symbol - - Parameters - ---------- - given : any - What we want to transform - """ - if isinstance(given, Symbol): - return given - if given in ("epsilon", "ɛ"): - return Epsilon() - return Symbol(given) +from ..objects.finite_automaton_objects import State, Symbol def to_single_state(l_states: Iterable[State]) -> State: diff --git a/pyformlang/fst/__init__.py b/pyformlang/fst/__init__.py index ebf1832..afd33d1 100644 --- a/pyformlang/fst/__init__.py +++ b/pyformlang/fst/__init__.py @@ -14,4 +14,5 @@ from .fst import FST + __all__ = ["FST"] diff --git a/pyformlang/indexed_grammar/__init__.py b/pyformlang/indexed_grammar/__init__.py index 06f5ea5..14da624 100644 --- a/pyformlang/indexed_grammar/__init__.py +++ b/pyformlang/indexed_grammar/__init__.py @@ -29,6 +29,7 @@ from .duplication_rule import DuplicationRule from .indexed_grammar import IndexedGrammar + __all__ = ["Rules", "ConsumptionRule", "EndRule", diff --git a/pyformlang/objects/__init__.py b/pyformlang/objects/__init__.py new file mode 100644 index 0000000..abc7ae7 --- /dev/null +++ b/pyformlang/objects/__init__.py @@ -0,0 +1,12 @@ +""" Collection of object representations """ + +from . import finite_automaton_objects +from . import cfg_objects +from . import regex_objects +from . import pda_objects + + +__all__ = ["finite_automaton_objects", + "cfg_objects", + "regex_objects", + "pda_objects"] diff --git a/pyformlang/objects/base_epsilon.py b/pyformlang/objects/base_epsilon.py new file mode 100644 index 0000000..17c06b2 --- /dev/null +++ b/pyformlang/objects/base_epsilon.py @@ -0,0 +1,35 @@ +""" General epsilon representation """ + +from typing import Any + +from .formal_object import FormalObject +from .base_terminal import BaseTerminal + +EPSILON_SYMBOLS = ["epsilon", "ɛ"] + + +class BaseEpsilon(BaseTerminal): + """ An epsilon transition + + Examples + -------- + + >>> epsilon = Epsilon() + + """ + + def __init__(self) -> None: + super().__init__("epsilon") + + def __eq__(self, other: Any) -> bool: + return isinstance(other, BaseEpsilon) \ + or not isinstance(other, FormalObject) and other in EPSILON_SYMBOLS + + def __hash__(self) -> int: + return super().__hash__() + + def __repr__(self) -> str: + return "epsilon" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, BaseEpsilon) diff --git a/pyformlang/objects/base_terminal.py b/pyformlang/objects/base_terminal.py new file mode 100644 index 0000000..1b37802 --- /dev/null +++ b/pyformlang/objects/base_terminal.py @@ -0,0 +1,16 @@ +""" General terminal representation """ + +from abc import abstractmethod + +from .formal_object import FormalObject + + +class BaseTerminal(FormalObject): + """ General terminal representation """ + + @abstractmethod + def __repr__(self): + raise NotImplementedError + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, BaseTerminal) and self.value == other.value diff --git a/pyformlang/objects/cfg_objects/__init__.py b/pyformlang/objects/cfg_objects/__init__.py new file mode 100644 index 0000000..a999974 --- /dev/null +++ b/pyformlang/objects/cfg_objects/__init__.py @@ -0,0 +1,14 @@ +""" CFG object representations """ + +from .cfg_object import CFGObject +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon +from .production import Production + + +__all__ = ["CFGObject", + "Variable", + "Terminal", + "Epsilon", + "Production"] diff --git a/pyformlang/objects/cfg_objects/cfg_object.py b/pyformlang/objects/cfg_objects/cfg_object.py new file mode 100644 index 0000000..e347e1e --- /dev/null +++ b/pyformlang/objects/cfg_objects/cfg_object.py @@ -0,0 +1,20 @@ +""" An object in a CFG (Variable and Terminal)""" + +from abc import abstractmethod + +from ..formal_object import FormalObject + + +class CFGObject(FormalObject): + """ An object in a CFG + + Parameters + ----------- + value : any + The value of the object + """ + + @abstractmethod + def to_text(self) -> str: + """ Turns the object into a text format """ + raise NotImplementedError diff --git a/pyformlang/objects/cfg_objects/epsilon.py b/pyformlang/objects/cfg_objects/epsilon.py new file mode 100644 index 0000000..35eba40 --- /dev/null +++ b/pyformlang/objects/cfg_objects/epsilon.py @@ -0,0 +1,8 @@ +""" An epsilon terminal """ + +from .terminal import Terminal +from ..base_epsilon import BaseEpsilon + + +class Epsilon(BaseEpsilon, Terminal): + """ An epsilon terminal """ diff --git a/pyformlang/cfg/production.py b/pyformlang/objects/cfg_objects/production.py similarity index 62% rename from pyformlang/cfg/production.py rename to pyformlang/objects/cfg_objects/production.py index 77270d6..5e9bd2e 100644 --- a/pyformlang/cfg/production.py +++ b/pyformlang/objects/cfg_objects/production.py @@ -1,9 +1,10 @@ """ A production or rule of a CFG """ -from typing import List -from .terminal import Terminal -from .variable import Variable +from typing import List, Set, Any + from .cfg_object import CFGObject +from .variable import Variable +from .terminal import Terminal from .epsilon import Epsilon @@ -20,7 +21,10 @@ class Production: __slots__ = ["_body", "_head", "_hash"] - def __init__(self, head: Variable, body: List[CFGObject], filtering=True): + def __init__(self, + head: Variable, + body: List[CFGObject], + filtering: bool = True) -> None: if filtering: self._body = [x for x in body if not isinstance(x, Epsilon)] else: @@ -30,26 +34,40 @@ def __init__(self, head: Variable, body: List[CFGObject], filtering=True): @property def head(self) -> Variable: - """Get the head variable""" + """Gets the head variable""" return self._head @property def body(self) -> List[CFGObject]: - """Get the body objects""" + """Gets the body objects""" return self._body - def __repr__(self): - return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) + @property + def variables(self) -> Set[Variable]: + """Gets variables used in the production""" + return {self.head} | {object for object in self.body + if isinstance(object, Variable)} - def __hash__(self): + @property + def terminals(self) -> Set[Terminal]: + """Gets terminals used in the production""" + return {object for object in self.body + if isinstance(object, Terminal) and object != Epsilon()} + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Production): + return False + return self.head == other.head and self.body == other.body + + def __hash__(self) -> int: if self._hash is None: self._hash = sum(map(hash, self._body)) + hash(self._head) return self._hash - def __eq__(self, other): - return self.head == other.head and self.body == other.body + def __repr__(self) -> str: + return str(self.head) + " -> " + " ".join([str(x) for x in self.body]) - def is_normal_form(self): + def is_normal_form(self) -> bool: """ Tells is the production is in Chomsky Normal Form diff --git a/pyformlang/objects/cfg_objects/terminal.py b/pyformlang/objects/cfg_objects/terminal.py new file mode 100644 index 0000000..68f4852 --- /dev/null +++ b/pyformlang/objects/cfg_objects/terminal.py @@ -0,0 +1,17 @@ +""" A terminal in a CFG """ + +from .cfg_object import CFGObject +from ..base_terminal import BaseTerminal + + +class Terminal(BaseTerminal, CFGObject): + """ A terminal in a CFG """ + + def __repr__(self) -> str: + return f"Terminal({self})" + + def to_text(self) -> str: + text = str(self._value) + if text and text[0].isupper(): + return '"TER:' + text + '"' + return text diff --git a/pyformlang/objects/cfg_objects/utils.py b/pyformlang/objects/cfg_objects/utils.py new file mode 100644 index 0000000..76dbb96 --- /dev/null +++ b/pyformlang/objects/cfg_objects/utils.py @@ -0,0 +1,23 @@ +""" Utility for cfg object creation """ + +from typing import Hashable + +from .variable import Variable +from .terminal import Terminal +from .epsilon import Epsilon + + +def to_variable(given: Hashable) -> Variable: + """ Transformation into a variable """ + if isinstance(given, Variable): + return given + return Variable(given) + + +def to_terminal(given: Hashable) -> Terminal: + """ Transformation into a terminal """ + if given == Epsilon(): + return Epsilon() + if isinstance(given, Terminal): + return given + return Terminal(given) diff --git a/pyformlang/objects/cfg_objects/variable.py b/pyformlang/objects/cfg_objects/variable.py new file mode 100644 index 0000000..85c78f5 --- /dev/null +++ b/pyformlang/objects/cfg_objects/variable.py @@ -0,0 +1,28 @@ +""" A variable in a CFG """ + +from string import ascii_uppercase + +from .cfg_object import CFGObject +from ..formal_object import FormalObject + + +class Variable(CFGObject): + """ An variable in a CFG + + Parameters + ----------- + value : any + The value of the variable + """ + + def __repr__(self) -> str: + return f"Variable({self})" + + def to_text(self) -> str: + text = str(self._value) + if text and text[0] not in ascii_uppercase: + return '"VAR:' + text + '"' + return text + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, Variable) and self.value == other.value diff --git a/pyformlang/objects/finite_automaton_objects/__init__.py b/pyformlang/objects/finite_automaton_objects/__init__.py new file mode 100644 index 0000000..b2d9074 --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/__init__.py @@ -0,0 +1,12 @@ +""" Finite automaton object representations """ + +from .finite_automaton_object import FiniteAutomatonObject +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +__all__ = ["FiniteAutomatonObject", + "State", + "Symbol", + "Epsilon"] diff --git a/pyformlang/objects/finite_automaton_objects/epsilon.py b/pyformlang/objects/finite_automaton_objects/epsilon.py new file mode 100644 index 0000000..1c4f887 --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/epsilon.py @@ -0,0 +1,17 @@ +""" +Represents an epsilon transition +""" + +from .symbol import Symbol +from ..base_epsilon import BaseEpsilon + + +class Epsilon(BaseEpsilon, Symbol): + """ An epsilon transition + + Examples + -------- + + >>> epsilon = Epsilon() + + """ diff --git a/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py new file mode 100644 index 0000000..4666636 --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/finite_automaton_object.py @@ -0,0 +1,21 @@ +""" +Represents an object of a finite state automaton +""" + +from abc import abstractmethod + +from ..formal_object import FormalObject + + +class FiniteAutomatonObject(FormalObject): + """ Represents an object in a finite state automaton + + Parameters + ---------- + value: any + The value of the object + """ + + @abstractmethod + def __repr__(self) -> str: + raise NotImplementedError diff --git a/pyformlang/objects/finite_automaton_objects/state.py b/pyformlang/objects/finite_automaton_objects/state.py new file mode 100644 index 0000000..41244c3 --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/state.py @@ -0,0 +1,29 @@ +""" +Representation of a state in a finite state automaton +""" + +from .finite_automaton_object import FiniteAutomatonObject +from ..formal_object import FormalObject + + +class State(FiniteAutomatonObject): + """ A state in a finite automaton + + Parameters + ---------- + value : any + The value of the state + + Examples + ---------- + >>> from pyformlang.finite_automaton import State + >>> State("A") + A + + """ + + def __repr__(self) -> str: + return f"State({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, State) and self.value == other.value diff --git a/pyformlang/finite_automaton/symbol.py b/pyformlang/objects/finite_automaton_objects/symbol.py similarity index 52% rename from pyformlang/finite_automaton/symbol.py rename to pyformlang/objects/finite_automaton_objects/symbol.py index 8599108..090692d 100644 --- a/pyformlang/finite_automaton/symbol.py +++ b/pyformlang/objects/finite_automaton_objects/symbol.py @@ -2,11 +2,11 @@ This module describe a symbol in a finite automaton. """ -from typing import Any from .finite_automaton_object import FiniteAutomatonObject +from ..base_terminal import BaseTerminal -class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods +class Symbol(BaseTerminal, FiniteAutomatonObject): """ A symbol in a finite automaton Parameters @@ -21,10 +21,5 @@ class Symbol(FiniteAutomatonObject): # pylint: disable=too-few-public-methods A """ - def __hash__(self) -> int: - return super().__hash__() - - def __eq__(self, other: Any) -> bool: - if isinstance(other, Symbol): - return self._value == other.value - return self._value == other + def __repr__(self) -> str: + return f"Symbol({self})" diff --git a/pyformlang/objects/finite_automaton_objects/utils.py b/pyformlang/objects/finite_automaton_objects/utils.py new file mode 100644 index 0000000..462596e --- /dev/null +++ b/pyformlang/objects/finite_automaton_objects/utils.py @@ -0,0 +1,35 @@ +""" Utility for finite automaton object creation """ + +from typing import Hashable + +from .state import State +from .symbol import Symbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Transforms the input into a state + + Parameters + ---------- + given : any + What we want to transform + """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Transforms the input into a symbol + + Parameters + ---------- + given : any + What we want to transform + """ + if given == Epsilon(): + return Epsilon() + if isinstance(given, Symbol): + return given + return Symbol(given) diff --git a/pyformlang/objects/formal_object.py b/pyformlang/objects/formal_object.py new file mode 100644 index 0000000..31d88f5 --- /dev/null +++ b/pyformlang/objects/formal_object.py @@ -0,0 +1,45 @@ +""" General object representation """ + +from typing import Hashable, Optional, Any +from abc import abstractmethod + + +class FormalObject: + """ General object representation """ + + def __init__(self, value: Hashable) -> None: + self._value = value + self._hash = None + self.index: Optional[int] = None + + @property + def value(self) -> Hashable: + """ Gets the value of the object + + Returns + --------- + value : any + The value of the object + """ + return self._value + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, FormalObject): + return self.value == other + return self._is_equal_to(other) and other._is_equal_to(self) + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(self._value) + return self._hash + + def __str__(self) -> str: + return str(self._value) + + @abstractmethod + def __repr__(self) -> str: + raise NotImplementedError + + @abstractmethod + def _is_equal_to(self, other: "FormalObject") -> bool: + raise NotImplementedError diff --git a/pyformlang/objects/pda_objects/__init__.py b/pyformlang/objects/pda_objects/__init__.py new file mode 100644 index 0000000..de26f00 --- /dev/null +++ b/pyformlang/objects/pda_objects/__init__.py @@ -0,0 +1,14 @@ +""" PDA object representations """ + +from .pda_object import PDAObject +from .state import State +from .symbol import Symbol +from .stack_symbol import StackSymbol +from .epsilon import Epsilon + + +__all__ = ["PDAObject", + "State", + "Symbol", + "StackSymbol", + "Epsilon"] diff --git a/pyformlang/objects/pda_objects/epsilon.py b/pyformlang/objects/pda_objects/epsilon.py new file mode 100644 index 0000000..710d922 --- /dev/null +++ b/pyformlang/objects/pda_objects/epsilon.py @@ -0,0 +1,8 @@ +""" An epsilon symbol """ + +from .stack_symbol import StackSymbol +from ..base_epsilon import BaseEpsilon + + +class Epsilon(BaseEpsilon, StackSymbol): + """ An epsilon symbol """ diff --git a/pyformlang/objects/pda_objects/pda_object.py b/pyformlang/objects/pda_objects/pda_object.py new file mode 100644 index 0000000..ce70176 --- /dev/null +++ b/pyformlang/objects/pda_objects/pda_object.py @@ -0,0 +1,13 @@ +""" Basic PDA object representation """ + +from abc import abstractmethod + +from ..formal_object import FormalObject + + +class PDAObject(FormalObject): + """ Basic PDA object representation """ + + @abstractmethod + def __repr__(self) -> str: + raise NotImplementedError diff --git a/pyformlang/objects/pda_objects/stack_symbol.py b/pyformlang/objects/pda_objects/stack_symbol.py new file mode 100644 index 0000000..c22f29e --- /dev/null +++ b/pyformlang/objects/pda_objects/stack_symbol.py @@ -0,0 +1,21 @@ +""" A StackSymbol in a pushdown automaton """ + +from .symbol import Symbol +from ..formal_object import FormalObject + + +class StackSymbol(Symbol): + """ A StackSymbol in a pushdown automaton + + Parameters + ---------- + value : any + The value of the state + + """ + + def __repr__(self) -> str: + return f"StackSymbol({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, StackSymbol) and self.value == other.value diff --git a/pyformlang/objects/pda_objects/state.py b/pyformlang/objects/pda_objects/state.py new file mode 100644 index 0000000..8b0a385 --- /dev/null +++ b/pyformlang/objects/pda_objects/state.py @@ -0,0 +1,21 @@ +""" A State in a pushdown automaton """ + +from .pda_object import PDAObject +from ..formal_object import FormalObject + + +class State(PDAObject): + """ A State in a pushdown automaton + + Parameters + ---------- + value : any + The value of the state + + """ + + def __repr__(self) -> str: + return f"State({self})" + + def _is_equal_to(self, other: FormalObject) -> bool: + return isinstance(other, State) and self.value == other.value diff --git a/pyformlang/objects/pda_objects/symbol.py b/pyformlang/objects/pda_objects/symbol.py new file mode 100644 index 0000000..78843de --- /dev/null +++ b/pyformlang/objects/pda_objects/symbol.py @@ -0,0 +1,18 @@ +""" A Symbol in a pushdown automaton """ + +from .pda_object import PDAObject +from ..base_terminal import BaseTerminal + + +class Symbol(BaseTerminal, PDAObject): + """ A Symbol in a pushdown automaton + + Parameters + ---------- + value : any + The value of the state + + """ + + def __repr__(self) -> str: + return f"Symbol({self})" diff --git a/pyformlang/objects/pda_objects/utils.py b/pyformlang/objects/pda_objects/utils.py new file mode 100644 index 0000000..6fc0f37 --- /dev/null +++ b/pyformlang/objects/pda_objects/utils.py @@ -0,0 +1,33 @@ +""" Utility for pda object creation """ + +from typing import Hashable + +from .state import State +from .symbol import Symbol +from .stack_symbol import StackSymbol +from .epsilon import Epsilon + + +def to_state(given: Hashable) -> State: + """ Convert to a state """ + if isinstance(given, State): + return given + return State(given) + + +def to_symbol(given: Hashable) -> Symbol: + """ Convert to a symbol """ + if given == Epsilon(): + return Epsilon() + if isinstance(given, Symbol): + return given + return Symbol(given) + + +def to_stack_symbol(given: Hashable) -> StackSymbol: + """ Convert to a stack symbol """ + if given == Epsilon(): + return Epsilon() + if isinstance(given, StackSymbol): + return given + return StackSymbol(given) diff --git a/pyformlang/objects/regex_objects/__init__.py b/pyformlang/objects/regex_objects/__init__.py new file mode 100644 index 0000000..f7d9944 --- /dev/null +++ b/pyformlang/objects/regex_objects/__init__.py @@ -0,0 +1,13 @@ +""" Regex object representations """ + +from .regex_objects import * + + +__all__ = ["Node", + "Operator", + "Symbol", + "Concatenation", + "Union", + "KleeneStar", + "Epsilon", + "Empty"] diff --git a/pyformlang/regular_expression/regex_objects.py b/pyformlang/objects/regex_objects/regex_objects.py similarity index 76% rename from pyformlang/regular_expression/regex_objects.py rename to pyformlang/objects/regex_objects/regex_objects.py index 65fc5b1..9522e05 100644 --- a/pyformlang/regular_expression/regex_objects.py +++ b/pyformlang/objects/regex_objects/regex_objects.py @@ -5,11 +5,11 @@ from typing import List, Iterable from abc import abstractmethod -from pyformlang.cfg import Production -from pyformlang.cfg.utils import to_variable, to_terminal +from ..cfg_objects.production import Production +from ..cfg_objects.utils import to_variable, to_terminal -class Node: # pylint: disable=too-few-public-methods +class Node: """ Represents a node in the tree representation of a regex Parameters @@ -58,39 +58,7 @@ def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ raise NotImplementedError -CONCATENATION_SYMBOLS = ["."] -UNION_SYMBOLS = ["|", "+"] -KLEENE_STAR_SYMBOLS = ["*"] -EPSILON_SYMBOLS = ["epsilon", "$"] -PARENTHESIS = ["(", ")"] - -SPECIAL_SYMBOLS = CONCATENATION_SYMBOLS + \ - UNION_SYMBOLS + \ - KLEENE_STAR_SYMBOLS + \ - EPSILON_SYMBOLS + \ - PARENTHESIS - - -def to_node(value: str) -> Node: - """ Transforms a given value into a node """ - if not value: - res = Empty() - elif value in CONCATENATION_SYMBOLS: - res = Concatenation() - elif value in UNION_SYMBOLS: - res = Union() - elif value in KLEENE_STAR_SYMBOLS: - res = KleeneStar() - elif value in EPSILON_SYMBOLS: - res = Epsilon() - elif value[0] == "\\": - res = Symbol(value[1:]) - else: - res = Symbol(value) - return res - - -class Operator(Node): # pylint: disable=too-few-public-methods +class Operator(Node): """ Represents an operator Parameters @@ -113,7 +81,7 @@ def get_cfg_rules(self, current_symbol: str, sons: Iterable[str]) \ raise NotImplementedError -class Symbol(Node): # pylint: disable=too-few-public-methods +class Symbol(Node): """ Represents a symbol Parameters @@ -137,7 +105,7 @@ def __repr__(self) -> str: return "Symbol(" + str(self._value) + ")" -class Concatenation(Operator): # pylint: disable=too-few-public-methods +class Concatenation(Operator): """ Represents a concatenation """ @@ -154,7 +122,7 @@ def __init__(self) -> None: super().__init__("Concatenation") -class Union(Operator): # pylint: disable=too-few-public-methods +class Union(Operator): """ Represents a union """ @@ -171,7 +139,7 @@ def __init__(self) -> None: super().__init__("Union") -class KleeneStar(Operator): # pylint: disable=too-few-public-methods +class KleeneStar(Operator): """ Represents an epsilon symbol """ @@ -195,7 +163,7 @@ def __init__(self) -> None: super().__init__("Kleene Star") -class Epsilon(Symbol): # pylint: disable=too-few-public-methods +class Epsilon(Symbol): """ Represents an epsilon symbol """ @@ -210,7 +178,7 @@ def __init__(self) -> None: super().__init__("Epsilon") -class Empty(Symbol): # pylint: disable=too-few-public-methods +class Empty(Symbol): """ Represents an empty symbol """ diff --git a/pyformlang/objects/regex_objects/utils.py b/pyformlang/objects/regex_objects/utils.py new file mode 100644 index 0000000..5c061f9 --- /dev/null +++ b/pyformlang/objects/regex_objects/utils.py @@ -0,0 +1,35 @@ +""" Utility for regex object creation """ + +from .regex_objects import Symbol, Node, \ + Empty, Concatenation, Union, KleeneStar, Epsilon + +CONCATENATION_SYMBOLS = ["."] +UNION_SYMBOLS = ["|", "+"] +KLEENE_STAR_SYMBOLS = ["*"] +EPSILON_SYMBOLS = ["epsilon", "$"] +PARENTHESIS = ["(", ")"] + +SPECIAL_SYMBOLS = CONCATENATION_SYMBOLS + \ + UNION_SYMBOLS + \ + KLEENE_STAR_SYMBOLS + \ + EPSILON_SYMBOLS + \ + PARENTHESIS + + +def to_node(value: str) -> Node: + """ Transforms a given value into a node """ + if not value: + res = Empty() + elif value in CONCATENATION_SYMBOLS: + res = Concatenation() + elif value in UNION_SYMBOLS: + res = Union() + elif value in KLEENE_STAR_SYMBOLS: + res = KleeneStar() + elif value in EPSILON_SYMBOLS: + res = Epsilon() + elif value[0] == "\\": + res = Symbol(value[1:]) + else: + res = Symbol(value) + return res diff --git a/pyformlang/pda/__init__.py b/pyformlang/pda/__init__.py index c77cfd0..201cffc 100644 --- a/pyformlang/pda/__init__.py +++ b/pyformlang/pda/__init__.py @@ -21,12 +21,12 @@ """ from .pda import PDA -from .state import State -from .symbol import Symbol -from .stack_symbol import StackSymbol -from .epsilon import Epsilon +from .transition_function import TransitionFunction +from ..objects.pda_objects import State, Symbol, StackSymbol, Epsilon + __all__ = ["PDA", + "TransitionFunction", "State", "Symbol", "StackSymbol", diff --git a/pyformlang/pda/cfg_variable_converter.py b/pyformlang/pda/cfg_variable_converter.py deleted file mode 100644 index e49d213..0000000 --- a/pyformlang/pda/cfg_variable_converter.py +++ /dev/null @@ -1,103 +0,0 @@ -"""A CFG Variable Converter""" - -from pyformlang import cfg - - -class CFGVariableConverter: - """A CFG Variable Converter""" - - def __init__(self, states, stack_symbols): - self._counter = 0 - self._inverse_states_d = {} - self._counter_state = 0 - for self._counter_state, state in enumerate(states): - self._inverse_states_d[state] = self._counter_state - state.index_cfg_converter = self._counter_state - self._counter_state += 1 - self._inverse_stack_symbol_d = {} - self._counter_symbol = 0 - for self._counter_symbol, symbol in enumerate(stack_symbols): - self._inverse_stack_symbol_d[symbol] = self._counter_symbol - symbol.index_cfg_converter = self._counter_symbol - self._counter_symbol += 1 - self._conversions = [[[(False, None) for _ in range(len(states))] - for _ in range(len(stack_symbols))] for _ in - range(len(states))] - - def _get_state_index(self, state): - """Get the state index""" - if state.index_cfg_converter is None: - self._set_index_state(state) - return state.index_cfg_converter - - def _set_index_state(self, state): - """Set the state index""" - if state not in self._inverse_states_d: - self._inverse_states_d[state] = self._counter_state - self._counter_state += 1 - state.index_cfg_converter = self._inverse_states_d[state] - - def _get_symbol_index(self, symbol): - """Get the symbol index""" - if symbol.index_cfg_converter is None: - self._set_index_symbol(symbol) - return symbol.index_cfg_converter - - def _set_index_symbol(self, symbol): - """ Set the symbol index """ - if symbol not in self._inverse_stack_symbol_d: - self._inverse_stack_symbol_d[symbol] = self._counter_symbol - self._counter_symbol += 1 - symbol.index_cfg_converter = self._inverse_stack_symbol_d[symbol] - - def to_cfg_combined_variable(self, state0, stack_symbol, state1): - """ Conversion used in the to_pda method """ - i_stack_symbol, i_state0, i_state1 = self._get_indexes( - stack_symbol, state0, state1) - prev = self._conversions[i_state0][i_stack_symbol][i_state1] - if prev[1] is None: - return self._create_new_variable( - i_stack_symbol, i_state0, i_state1, prev)[1] - return prev[1] - - def _create_new_variable(self, - i_stack_symbol, - i_state0, - i_state1, - prev, - value=None): - # pylint: disable=too-many-arguments - if value is None: - value = self._counter - temp = (prev[0], cfg.Variable(value)) - self._counter += 1 - self._conversions[i_state0][i_stack_symbol][i_state1] = temp - return temp - - def set_valid(self, state0, stack_symbol, state1): - """Set valid""" - i_stack_symbol, i_state0, i_state1 = self._get_indexes( - stack_symbol, state0, state1) - prev = self._conversions[i_state0][i_stack_symbol][i_state1] - self._conversions[i_state0][i_stack_symbol][i_state1] = (True, prev[1]) - - def is_valid_and_get(self, state0, stack_symbol, state1): - """Check if valid and get""" - i_state0 = self._get_state_index(state0) - i_stack_symbol = self._get_symbol_index(stack_symbol) - i_state1 = self._get_state_index(state1) - current = self._conversions[i_state0][i_stack_symbol][i_state1] - if not current[0]: - return None - if current[1] is None: - return self._create_new_variable(i_stack_symbol, - i_state0, - i_state1, - current)[1] - return current[1] - - def _get_indexes(self, stack_symbol, state0, state1): - i_state0 = self._get_state_index(state0) - i_stack_symbol = self._get_symbol_index(stack_symbol) - i_state1 = self._get_state_index(state1) - return i_stack_symbol, i_state0, i_state1 diff --git a/pyformlang/pda/epsilon.py b/pyformlang/pda/epsilon.py deleted file mode 100644 index 3e1f014..0000000 --- a/pyformlang/pda/epsilon.py +++ /dev/null @@ -1,11 +0,0 @@ -""" An epsilon symbol """ - -from .symbol import Symbol - - -class Epsilon(Symbol): - """ An epsilon symbol """ - # pylint: disable=too-few-public-methods - - def __init__(self): - super().__init__("epsilon") diff --git a/pyformlang/pda/pda.py b/pyformlang/pda/pda.py index 1deb75d..f170d02 100644 --- a/pyformlang/pda/pda.py +++ b/pyformlang/pda/pda.py @@ -1,22 +1,25 @@ """ We represent here a push-down automaton """ -import json -from itertools import product -from typing import AbstractSet, List, Iterable, Any -import networkx as nx -import numpy as np +from typing import Dict, List, Set, AbstractSet, \ + Iterator, Iterable, Tuple, Type, Optional, Hashable, Any +from json import dumps, loads +from itertools import product +from networkx import MultiDiGraph from networkx.drawing.nx_pydot import write_dot -from pyformlang import cfg -from pyformlang import finite_automaton from pyformlang.finite_automaton import DeterministicFiniteAutomaton -from pyformlang.pda.cfg_variable_converter import CFGVariableConverter -from .epsilon import Epsilon -from .stack_symbol import StackSymbol -from .state import State +from pyformlang.finite_automaton import Symbol as FASymbol +from pyformlang.finite_automaton import Epsilon as FAEpsilon +from pyformlang.cfg import CFG, CFGObject, Variable, Terminal, Production +from pyformlang.cfg.cfg_variable_converter import CFGVariableConverter + from .transition_function import TransitionFunction -from .utils import PDAObjectCreator -from ..finite_automaton import FiniteAutomaton +from .transition_function import TransitionKey, TransitionValues, Transition +from .utils import PDAStateConverter, PDASymbolConverter +from ..objects.pda_objects import State, StackSymbol +from ..objects.pda_objects import Symbol as PDASymbol +from ..objects.pda_objects import Epsilon as PDAEpsilon +from ..objects.pda_objects.utils import to_state, to_symbol, to_stack_symbol INPUT_SYMBOL = 1 @@ -30,8 +33,11 @@ OUTPUT = 1 +InputTransition = Tuple[Hashable, Hashable, Hashable, + Hashable, Iterable[Hashable]] -class PDA: + +class PDA(Iterable[Transition]): """ Representation of a pushdown automaton Parameters @@ -57,93 +63,42 @@ class PDA: # pylint: disable=too-many-instance-attributes def __init__(self, - states: AbstractSet[Any] = None, - input_symbols: AbstractSet[Any] = None, - stack_alphabet: AbstractSet[Any] = None, + states: AbstractSet[Hashable] = None, + input_symbols: AbstractSet[Hashable] = None, + stack_alphabet: AbstractSet[Hashable] = None, transition_function: TransitionFunction = None, - start_state: Any = None, - start_stack_symbol: Any = None, - final_states: AbstractSet[Any] = None): + start_state: Hashable = None, + start_stack_symbol: Hashable = None, + final_states: AbstractSet[Hashable] = None): # pylint: disable=too-many-arguments - self._pda_obj_creator = PDAObjectCreator() if states is not None: - states = {self._pda_obj_creator.to_state(x) for x in states} + states = {to_state(x) for x in states} if input_symbols is not None: - input_symbols = {self._pda_obj_creator.to_symbol(x) - for x in input_symbols} + input_symbols = {to_symbol(x) for x in input_symbols} if stack_alphabet is not None: - stack_alphabet = {self._pda_obj_creator.to_stack_symbol(x) - for x in stack_alphabet} + stack_alphabet = {to_stack_symbol(x) for x in stack_alphabet} if start_state is not None: - start_state = self._pda_obj_creator.to_state(start_state) + start_state = to_state(start_state) if start_stack_symbol is not None: - start_stack_symbol = \ - self._pda_obj_creator.to_stack_symbol(start_stack_symbol) + start_stack_symbol = to_stack_symbol(start_stack_symbol) if final_states is not None: - final_states = {self._pda_obj_creator.to_state(x) - for x in final_states} - self._states = states or set() - self._states = set(self._states) - self._input_symbols = input_symbols or set() - self._input_symbols = set(self._input_symbols) - self._stack_alphabet = stack_alphabet or set() - self._stack_alphabet = set(self._stack_alphabet) + final_states = {to_state(x) for x in final_states} + self._states: Set[State] = states or set() + self._input_symbols: Set[PDASymbol] = input_symbols or set() + self._stack_alphabet: Set[StackSymbol] = stack_alphabet or set() self._transition_function = transition_function or TransitionFunction() - self._start_state = start_state + self._start_state: Optional[State] = start_state if start_state is not None: self._states.add(start_state) - self._start_stack_symbol = start_stack_symbol + self._start_stack_symbol: Optional[StackSymbol] = start_stack_symbol if start_stack_symbol is not None: self._stack_alphabet.add(start_stack_symbol) - self._final_states = final_states or set() - self._final_states = set(self._final_states) + self._final_states: Set[State] = final_states or set() for state in self._final_states: self._states.add(state) - self._cfg_variable_converter = None - - def set_start_state(self, start_state: Any): - """ Sets the start state to the automaton - - Parameters - ---------- - start_state : :class:`~pyformlang.pda.State` - The start state - """ - start_state = self._pda_obj_creator.to_state(start_state) - self._states.add(start_state) - self._start_state = start_state - - def set_start_stack_symbol(self, start_stack_symbol: Any): - """ Sets the start stack symbol to the automaton - - Parameters - ---------- - start_stack_symbol : :class:`~pyformlang.pda.StackSymbol` - The start stack symbol - """ - start_stack_symbol = self._pda_obj_creator.to_stack_symbol( - start_stack_symbol) - self._stack_alphabet.add(start_stack_symbol) - self._start_stack_symbol = start_stack_symbol - - def add_final_state(self, state: Any): - """ Adds a final state to the automaton - - Parameters - ---------- - state : :class:`~pyformlang.pda.State` - The state to add - """ - state = self._pda_obj_creator.to_state(state) - self._final_states.add(state) @property - def start_state(self): - """ Get start state """ - return self._start_state - - @property - def states(self): + def states(self) -> Set[State]: """ Get the states fo the PDA Returns @@ -154,19 +109,7 @@ def states(self): return self._states @property - def final_states(self): - """ - The final states of the PDA - Returns - ------- - final_states : iterable of :class:`~pyformlang.pda.State` - The final states of the PDA - - """ - return self._final_states - - @property - def input_symbols(self): + def input_symbols(self) -> Set[PDASymbol]: """ The input symbols of the PDA @@ -178,7 +121,7 @@ def input_symbols(self): return self._input_symbols @property - def stack_symbols(self): + def stack_symbols(self) -> Set[StackSymbol]: """ The stack symbols of the PDA @@ -189,35 +132,79 @@ def stack_symbols(self): """ return self._stack_alphabet - def get_number_transitions(self) -> int: - """ Gets the number of transitions in the PDA + @property + def start_state(self) -> Optional[State]: + """ Get start state """ + return self._start_state + + @property + def start_stack_symbol(self) -> Optional[StackSymbol]: + """ Get start stack symbol """ + return self._start_stack_symbol + @property + def final_states(self) -> Set[State]: + """ + The final states of the PDA Returns + ------- + final_states : iterable of :class:`~pyformlang.pda.State` + The final states of the PDA + + """ + return self._final_states + + def set_start_state(self, start_state: Hashable) -> None: + """ Sets the start state to the automaton + + Parameters ---------- - n_transitions : int - The number of transitions + start_state : :class:`~pyformlang.pda.State` + The start state """ - return self._transition_function.get_number_transitions() + start_state = to_state(start_state) + self._states.add(start_state) + self._start_state = start_state - def add_transitions(self, transitions): + def set_start_stack_symbol(self, start_stack_symbol: Hashable) -> None: + """ Sets the start stack symbol to the automaton + + Parameters + ---------- + start_stack_symbol : :class:`~pyformlang.pda.StackSymbol` + The start stack symbol """ - Adds several transitions + start_stack_symbol = to_stack_symbol(start_stack_symbol) + self._stack_alphabet.add(start_stack_symbol) + self._start_stack_symbol = start_stack_symbol + + def add_final_state(self, state: Hashable) -> None: + """ Adds a final state to the automaton Parameters ---------- - transitions : - Transitions as they would be given to add_transition + state : :class:`~pyformlang.pda.State` + The state to add """ - for s_from, input_symbol, stack_from, s_to, stack_to in transitions: - self.add_transition(s_from, input_symbol, stack_from, - s_to, stack_to) + state = to_state(state) + self._final_states.add(state) + + def get_number_transitions(self) -> int: + """ Gets the number of transitions in the PDA + + Returns + ---------- + n_transitions : int + The number of transitions + """ + return self._transition_function.get_number_transitions() def add_transition(self, - s_from: Any, - input_symbol: Any, - stack_from: Any, - s_to: Any, - stack_to: Iterable[Any]): + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable, + s_to: Hashable, + stack_to: Iterable[Hashable]) -> None: """ Add a transition to the PDA Parameters @@ -234,18 +221,18 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ # pylint: disable=too-many-arguments - s_from = self._pda_obj_creator.to_state(s_from) - input_symbol = self._pda_obj_creator.to_symbol(input_symbol) - stack_from = self._pda_obj_creator.to_stack_symbol(stack_from) - s_to = self._pda_obj_creator.to_state(s_to) - stack_to = [self._pda_obj_creator.to_stack_symbol(x) for x in stack_to] + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = tuple(to_stack_symbol(x) for x in stack_to) self._states.add(s_from) self._states.add(s_to) - if input_symbol != Epsilon(): + if input_symbol != PDAEpsilon(): self._input_symbols.add(input_symbol) self._stack_alphabet.add(stack_from) for stack_symbol in stack_to: - if stack_symbol != Epsilon(): + if stack_symbol != PDAEpsilon(): self._stack_alphabet.add(stack_symbol) self._transition_function.add_transition(s_from, input_symbol, @@ -253,6 +240,61 @@ def add_transition(self, s_to, stack_to) + def add_transitions(self, transitions: Iterable[InputTransition]) -> None: + """ + Adds several transitions + + Parameters + ---------- + transitions : + Transitions as they would be given to add_transition + """ + for s_from, input_symbol, stack_from, s_to, stack_to in transitions: + self.add_transition(s_from, input_symbol, stack_from, + s_to, stack_to) + + def remove_transition(self, + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable, + s_to: Hashable, + stack_to: Iterable[Hashable]) -> None: + """ Remove the given transition from the PDA """ + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = tuple(to_stack_symbol(x) for x in stack_to) + self._transition_function.remove_transition(s_from, + input_symbol, + stack_from, + s_to, + stack_to) + + def __call__(self, + s_from: Hashable, + input_symbol: Hashable, + stack_from: Hashable) -> TransitionValues: + """ Calls transition function with given arguments """ + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + return self._transition_function(s_from, input_symbol, stack_from) + + def __contains__(self, transition: InputTransition) -> bool: + """ Whether the given transition is present in the PDA """ + s_from, input_symbol, stack_from, s_to, stack_to = transition + s_from = to_state(s_from) + input_symbol = to_symbol(input_symbol) + stack_from = to_stack_symbol(stack_from) + s_to = to_state(s_to) + stack_to = tuple(to_stack_symbol(x) for x in stack_to) + return (s_to, stack_to) in self(s_from, input_symbol, stack_from) + + def __iter__(self) -> Iterator[Transition]: + """ Gets an iterator of transitions of the PDA """ + yield from self._transition_function + def to_final_state(self) -> "PDA": """ Turns the current PDA that accepts a language L by empty stack \ to another PDA that accepts the same language L by final state @@ -263,23 +305,28 @@ def to_final_state(self) -> "PDA": The new PDA which accepts by final state the language that \ was accepted by empty stack """ - new_start = get_next_free("#STARTTOFINAL#", State, self._states) - new_end = get_next_free("#ENDTOFINAL#", State, self._states) - new_stack_symbol = get_next_free("#BOTTOMTOFINAL#", - StackSymbol, - self._stack_alphabet) + new_start = self.__get_next_free("#STARTTOFINAL#", + State, + self._states) + new_end = self.__get_next_free("#ENDTOFINAL#", + State, + self._states) + new_stack_symbol = self.__get_next_free("#BOTTOMTOFINAL#", + StackSymbol, + self._stack_alphabet) new_states = self._states.copy() new_states.add(new_start) new_states.add(new_end) new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, Epsilon(), new_stack_symbol, - self._start_state, [self._start_stack_symbol, - new_stack_symbol]) + if self.start_state and self.start_stack_symbol: + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, + self.start_state, (self.start_stack_symbol, + new_stack_symbol)) for state in self._states: - new_tf.add_transition(state, Epsilon(), new_stack_symbol, - new_end, []) + new_tf.add_transition(state, PDAEpsilon(), new_stack_symbol, + new_end, tuple()) return PDA(new_states, self._input_symbols.copy(), new_stack_alphabet, @@ -298,27 +345,32 @@ def to_empty_stack(self) -> "PDA": The new PDA which accepts by empty stack the language that was \ accepted by final state """ - new_start = get_next_free("#STARTEMPTYS#", State, self._states) - new_end = get_next_free("#ENDEMPTYS#", State, self._states) - new_stack_symbol = get_next_free("#BOTTOMEMPTYS#", - StackSymbol, - self._stack_alphabet) + new_start = self.__get_next_free("#STARTEMPTYS#", + State, + self._states) + new_end = self.__get_next_free("#ENDEMPTYS#", + State, + self._states) + new_stack_symbol = self.__get_next_free("#BOTTOMEMPTYS#", + StackSymbol, + self._stack_alphabet) new_states = self._states.copy() new_states.add(new_start) new_states.add(new_end) new_stack_alphabet = self._stack_alphabet.copy() new_stack_alphabet.add(new_stack_symbol) new_tf = self._transition_function.copy() - new_tf.add_transition(new_start, Epsilon(), new_stack_symbol, - self._start_state, [self._start_stack_symbol, - new_stack_symbol]) + if self.start_state and self.start_stack_symbol: + new_tf.add_transition(new_start, PDAEpsilon(), new_stack_symbol, + self.start_state, (self.start_stack_symbol, + new_stack_symbol)) for state in self._final_states: for stack_symbol in new_stack_alphabet: - new_tf.add_transition(state, Epsilon(), stack_symbol, - new_end, []) + new_tf.add_transition(state, PDAEpsilon(), stack_symbol, + new_end, tuple()) for stack_symbol in new_stack_alphabet: - new_tf.add_transition(new_end, Epsilon(), stack_symbol, - new_end, []) + new_tf.add_transition(new_end, PDAEpsilon(), stack_symbol, + new_end, tuple()) return PDA(new_states, self._input_symbols.copy(), new_stack_alphabet, @@ -326,7 +378,7 @@ def to_empty_stack(self) -> "PDA": new_start, new_stack_symbol) - def to_cfg(self) -> "cfg.CFG": + def to_cfg(self) -> CFG: """ Turns the language L generated by this PDA when accepting \ on empty \ stack into a CFG that accepts the same language L @@ -336,60 +388,83 @@ def to_cfg(self) -> "cfg.CFG": new_cfg : :class:`~pyformlang.cfg.CFG` The equivalent CFG """ - self._cfg_variable_converter = \ - CFGVariableConverter(self._states, self._stack_alphabet) - start = cfg.Variable("#StartCFG#") - productions = self._initialize_production_from_start_in_to_cfg(start) + variable_converter = CFGVariableConverter(self._states, + self._stack_alphabet) + start = Variable("#StartCFG#") + productions = self._initialize_production_from_start_in_to_cfg( + start, variable_converter) states = self._states - for transition in self._transition_function: + for transition in self: for state in states: - self._cfg_variable_converter.set_valid( + variable_converter.set_valid( transition[INPUT][STATE], transition[INPUT][STACK_FROM], state) - for transition in self._transition_function: + for transition in self: for state in states: self._process_transition_and_state_to_cfg(productions, state, - transition) - return cfg.CFG(start_symbol=start, productions=productions) - - def _process_transition_and_state_to_cfg(self, - productions, - state, - transition): + transition, + variable_converter) + return CFG(start_symbol=start, productions=productions) + + def _process_transition_and_state_to_cfg( + self, + productions: List[Production], + state: State, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> None: current_state_has_empty_new_stack = \ len(transition[OUTPUT][NEW_STACK]) == 0 and \ state != transition[OUTPUT][STATE] if not current_state_has_empty_new_stack: - self._process_transition_and_state_to_cfg_safe(productions, state, - transition) - - def _process_transition_and_state_to_cfg_safe(self, productions, state, - transition): - head = self._get_head_from_state_and_transition(state, transition) - bodies = self._get_all_bodies_from_state_and_transition(state, - transition) - if transition[INPUT][INPUT_SYMBOL] != Epsilon(): - _prepend_input_symbol_to_the_bodies(bodies, transition) + self._process_transition_and_state_to_cfg_safe(productions, + state, + transition, + variable_converter) + + def _process_transition_and_state_to_cfg_safe( + self, + productions: List[Production], + state: State, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> None: + head = self._get_head_from_state_and_transition( + state, transition, variable_converter) + bodies = self._get_all_bodies_from_state_and_transition( + state, transition, variable_converter) + if transition[INPUT][INPUT_SYMBOL] != PDAEpsilon(): + self.__prepend_input_symbol_to_the_bodies(bodies, transition) for body in bodies: - productions.append(cfg.Production(head, body, filtering=False)) - - def _get_all_bodies_from_state_and_transition(self, state, transition): + productions.append(Production(head, body, filtering=False)) + + def _get_all_bodies_from_state_and_transition( + self, + state: State, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: return self._generate_all_rules(transition[OUTPUT][STATE], state, - transition[OUTPUT][NEW_STACK]) - - def _generate_all_rules(self, s_from: State, s_to: State, - ss_by: List[StackSymbol]) \ - -> Iterable[Iterable["cfg.Variable"]]: + transition[OUTPUT][NEW_STACK], + variable_converter) + + def _generate_all_rules(self, + s_from: State, + s_to: State, + ss_by: List[StackSymbol], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: """ Generates the rules in the CFG conversion """ if not ss_by: return [[]] if len(ss_by) == 1: - return self._generate_length_one_rules(s_from, s_to, ss_by) + return self._generate_length_one_rules( + s_from, s_to, ss_by, variable_converter) res = [] - is_valid_and_get = self._cfg_variable_converter.is_valid_and_get + is_valid_and_get = variable_converter.is_valid_and_get append_to_res = res.append length_ss_by_minus_one = len(ss_by) - 1 for states in product(self._states, repeat=length_ss_by_minus_one): @@ -414,30 +489,83 @@ def _generate_all_rules(self, s_from: State, s_to: State, append_to_res(temp) return res - def _generate_length_one_rules(self, s_from, s_to, ss_by): - state = self._cfg_variable_converter.is_valid_and_get(s_from, ss_by[0], + def _generate_length_one_rules(self, + s_from: State, + s_to: State, + ss_by: List[StackSymbol], + variable_converter: CFGVariableConverter) \ + -> List[List[CFGObject]]: + state = variable_converter.is_valid_and_get(s_from, ss_by[0], s_to) if state is not None: return [[state]] return [] - def _get_head_from_state_and_transition(self, state, transition): - return self._cfg_variable_converter.to_cfg_combined_variable( + def _get_head_from_state_and_transition( + self, + state: State, + transition: Tuple[Tuple, Tuple], + variable_converter: CFGVariableConverter) \ + -> Variable: + return variable_converter.to_cfg_combined_variable( transition[INPUT][STATE], transition[INPUT][STACK_FROM], state) - def _initialize_production_from_start_in_to_cfg(self, start): - productions = [] - for state in self._states: - productions.append( - cfg.Production( - start, - [self._cfg_variable_converter.to_cfg_combined_variable( - self._start_state, - self._start_stack_symbol, - state)])) - return productions + def _initialize_production_from_start_in_to_cfg( + self, + start: Variable, + variable_converter: CFGVariableConverter) \ + -> List[Production]: + if not self.start_state or not self.start_stack_symbol: + return [] + return [Production(start, + [variable_converter.to_cfg_combined_variable( + self.start_state, + self.start_stack_symbol, + state)]) + for state in self.states] + + @classmethod + def from_cfg(cls, cfg: CFG) -> "PDA": + """ Converts the CFG to a PDA that generates on empty stack an \ + equivalent language + + Returns + ---------- + new_pda : :class:`~pyformlang.pda.PDA` + The equivalent PDA when accepting on empty stack + """ + state = State("q") + pda_symbol_converter = PDASymbolConverter(cfg.terminals, cfg.variables) + input_symbols = {pda_symbol_converter.get_symbol_from(x) + for x in cfg.terminals} + stack_alphabet = {pda_symbol_converter.get_stack_symbol_from(x) + for x in cfg.terminals.union(cfg.variables)} + start_stack_symbol = None + if cfg.start_symbol: + start_stack_symbol = pda_symbol_converter.get_stack_symbol_from( + cfg.start_symbol) + new_pda = PDA(states={state}, + input_symbols=input_symbols, + stack_alphabet=stack_alphabet, + start_state=state, + start_stack_symbol=start_stack_symbol) + for production in cfg.productions: + new_pda.add_transition(state, PDAEpsilon(), + pda_symbol_converter.get_stack_symbol_from( + production.head), + state, + [pda_symbol_converter.get_stack_symbol_from( + x) for x in production.body]) + for terminal in cfg.terminals: + new_pda.add_transition(state, + pda_symbol_converter.get_symbol_from( + terminal), + pda_symbol_converter.get_stack_symbol_from( + terminal), + state, []) + return new_pda def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": """ Gets the intersection of the language L generated by the \ @@ -467,19 +595,18 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": When intersecting with something else than a regex or a finite automaton """ - start_state_other = other.start_state - if not start_state_other: + if not self.start_state or not other.start_state or other.is_empty(): return PDA() - pda_state_converter = _PDAStateConverter(self._states, other.states) + pda_state_converter = PDAStateConverter(self._states, other.states) final_states_other = other.final_states - start = pda_state_converter.to_pda_combined_state(self._start_state, - start_state_other) + start = pda_state_converter.to_pda_combined_state(self.start_state, + other.start_state) pda = PDA(start_state=start, start_stack_symbol=self._start_stack_symbol) symbols = self._input_symbols.copy() - symbols.add(Epsilon()) - to_process = [(self._start_state, start_state_other)] - processed = {(self._start_state, start_state_other)} + symbols.add(PDAEpsilon()) + to_process = [(self.start_state, other.start_state)] + processed = {(self.start_state, other.start_state)} while to_process: state_in, state_dfa = to_process.pop() if (state_in in self._final_states and state_dfa in @@ -488,18 +615,16 @@ def intersection(self, other: DeterministicFiniteAutomaton) -> "PDA": pda_state_converter.to_pda_combined_state(state_in, state_dfa)) for symbol in symbols: - if symbol == Epsilon(): - symbol_dfa = finite_automaton.Epsilon() + if symbol == PDAEpsilon(): + symbol_dfa = FAEpsilon() next_state_dfa = state_dfa else: - symbol_dfa = finite_automaton.Symbol(symbol.value) + symbol_dfa = FASymbol(symbol.value) next_state_dfa = other.get_next_state(state_dfa, symbol_dfa) if not next_state_dfa: continue for stack_symbol in self._stack_alphabet: - next_states_self = self._transition_function(state_in, - symbol, - stack_symbol) + next_states_self = self(state_in, symbol, stack_symbol) for next_state, next_stack in next_states_self: pda.add_transition( pda_state_converter.to_pda_combined_state( @@ -540,7 +665,7 @@ def __and__(self, other: DeterministicFiniteAutomaton) -> "PDA": """ return self.intersection(other) - def to_dict(self): + def to_dict(self) -> Dict[TransitionKey, TransitionValues]: """ Get the transitions of the PDA as a dictionary Returns @@ -550,7 +675,7 @@ def to_dict(self): """ return self._transition_function.to_dict() - def to_networkx(self) -> nx.MultiDiGraph: + def to_networkx(self) -> MultiDiGraph: """ Transform the current pda into a networkx graph @@ -560,7 +685,7 @@ def to_networkx(self) -> nx.MultiDiGraph: A networkx MultiDiGraph representing the pda """ - graph = nx.MultiDiGraph() + graph = MultiDiGraph() for state in self._states: graph.add_node(state.value, is_start=state == self._start_state, @@ -571,23 +696,23 @@ def to_networkx(self) -> nx.MultiDiGraph: self.__add_start_state_to_graph(graph, state) if self._start_stack_symbol is not None: graph.add_node("INITIAL_STACK_HIDDEN", - label=json.dumps(self._start_stack_symbol.value), + label=dumps(self._start_stack_symbol.value), shape=None, height=.0, width=.0) - for key, value in self._transition_function: + for key, value in self: s_from, in_symbol, stack_from = key s_to, stack_to = value graph.add_edge( s_from.value, s_to.value, - label=(json.dumps(in_symbol.value) + " -> " + - json.dumps(stack_from.value) + " / " + - json.dumps([x.value for x in stack_to]))) + label=(dumps(in_symbol.value) + " -> " + + dumps(stack_from.value) + " / " + + dumps([x.value for x in stack_to]))) return graph @classmethod - def from_networkx(cls, graph): + def from_networkx(cls, graph: MultiDiGraph) -> "PDA": """ Import a networkx graph into a PDA. \ The imported graph requires to have the good format, i.e. to come \ @@ -616,10 +741,10 @@ def from_networkx(cls, graph): if "label" in transition: in_symbol, stack_info = transition["label"].split( " -> ") - in_symbol = json.loads(in_symbol) + in_symbol = loads(in_symbol) stack_from, stack_to = stack_info.split(" / ") - stack_from = json.loads(stack_from) - stack_to = json.loads(stack_to) + stack_from = loads(stack_from) + stack_to = loads(stack_to) pda.add_transition(s_from, in_symbol, stack_from, @@ -632,10 +757,10 @@ def from_networkx(cls, graph): pda.add_final_state(node) if "INITIAL_STACK_HIDDEN" in graph.nodes: pda.set_start_stack_symbol( - json.loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"])) + loads(graph.nodes["INITIAL_STACK_HIDDEN"]["label"])) return pda - def write_as_dot(self, filename): + def write_as_dot(self, filename: str) -> None: """ Write the PDA in dot format into a file @@ -647,52 +772,47 @@ def write_as_dot(self, filename): """ write_dot(self.to_networkx(), filename) + def copy(self) -> "PDA": + """ Copies the Push-down Automaton """ + return PDA(self.states, + self.input_symbols, + self.stack_symbols, + self._transition_function.copy(), + self.start_state, + self.start_stack_symbol, + self.final_states) + + def __copy__(self) -> "PDA": + return self.copy() + @staticmethod - def __add_start_state_to_graph(graph: nx.MultiDiGraph, state: State) -> None: + def __add_start_state_to_graph(graph: MultiDiGraph, + state: State) -> None: """ Adds a starting node to a given graph """ graph.add_node("starting_" + str(state.value), - label="", - shape=None, - height=.0, - width=.0) + label="", + shape=None, + height=.0, + width=.0) graph.add_edge("starting_" + str(state.value), - state.value) - - -def _prepend_input_symbol_to_the_bodies(bodies, transition): - to_prepend = cfg.Terminal(transition[INPUT][INPUT_SYMBOL].value) - for body in bodies: - body.insert(0, to_prepend) - - -class _PDAStateConverter: - # pylint: disable=too-few-public-methods - - def __init__(self, states_pda, states_dfa): - self._inverse_state_pda = {} - for i, state in enumerate(states_pda): - self._inverse_state_pda[state] = i - self._inverse_state_dfa = {} - for i, state in enumerate(states_dfa): - self._inverse_state_dfa[state] = i - self._conversions = np.empty((len(states_pda), len(states_dfa)), - dtype=object) - - def to_pda_combined_state(self, state_pda, state_other): - """ To PDA state in the intersection function """ - i_state_pda = self._inverse_state_pda[state_pda] - i_state_other = self._inverse_state_dfa[state_other] - if self._conversions[i_state_pda, i_state_other] is None: - self._conversions[i_state_pda, i_state_other] = State( - (state_pda, state_other)) - return self._conversions[i_state_pda, i_state_other] - - -def get_next_free(prefix, type_generating, to_check): - """ Get free next state or symbol """ - idx = 0 - new_var = type_generating(prefix) - while new_var in to_check: - new_var = type_generating(prefix + str(idx)) - idx += 1 - return new_var + state.value) + + @staticmethod + def __prepend_input_symbol_to_the_bodies(bodies: List[List[CFGObject]], + transition: Tuple[Tuple, Tuple]) \ + -> None: + to_prepend = Terminal(transition[INPUT][INPUT_SYMBOL].value) + for body in bodies: + body.insert(0, to_prepend) + + @staticmethod + def __get_next_free(prefix: str, + type_generating: Type, + to_check: Iterable[Any]) -> Any: + """ Get free next state or symbol """ + idx = 0 + new_var = type_generating(prefix) + while new_var in to_check: + new_var = type_generating(prefix + str(idx)) + idx += 1 + return new_var diff --git a/pyformlang/pda/stack_symbol.py b/pyformlang/pda/stack_symbol.py deleted file mode 100644 index 6d8dd7e..0000000 --- a/pyformlang/pda/stack_symbol.py +++ /dev/null @@ -1,39 +0,0 @@ -""" A StackSymbol in a pushdown automaton """ - - -class StackSymbol: - """ A StackSymbol in a pushdown automaton - - Parameters - ---------- - value : any - The value of the state - - """ - - def __init__(self, value): - self._value = value - self._hash = None - self.index_cfg_converter = None - - @property - def value(self): - """ Returns the value of the stack symbol - - Returns - ---------- - value: The value - any - """ - return self._value - - def __hash__(self): - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - def __eq__(self, other): - return self._value == other.value - - def __repr__(self): - return "StackSymbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/state.py b/pyformlang/pda/state.py deleted file mode 100644 index d69abfd..0000000 --- a/pyformlang/pda/state.py +++ /dev/null @@ -1,41 +0,0 @@ -""" A State in a pushdown automaton """ - - -class State: - """ A State in a pushdown automaton - - Parameters - ---------- - value : any - The value of the state - - """ - - def __init__(self, value): - self._value = value - self._hash = None - self.index_cfg_converter = None - - def __hash__(self): - if self._hash is None: - self._hash = hash(self._value) - return self._hash - - @property - def value(self): - """ Returns the value of the state - - Returns - ---------- - value: The value - any - """ - return self._value - - def __eq__(self, other): - if isinstance(other, State): - return self._value == other.value - return False - - def __repr__(self): - return "State(" + str(self._value) + ")" diff --git a/pyformlang/pda/symbol.py b/pyformlang/pda/symbol.py deleted file mode 100644 index 94c7cf8..0000000 --- a/pyformlang/pda/symbol.py +++ /dev/null @@ -1,37 +0,0 @@ -""" A Symbol in a pushdown automaton """ - - -class Symbol: - """ A Symbol in a pushdown automaton - - Parameters - ---------- - value : any - The value of the state - - """ - - def __init__(self, value): - self._value = value - - def __hash__(self): - return hash(str(self._value)) - - @property - def value(self): - """ Returns the value of the symbol - - Returns - ---------- - value: The value - any - """ - return self._value - - def __eq__(self, other): - if isinstance(other, Symbol): - return self._value == other.value - return False - - def __repr__(self): - return "Symbol(" + str(self._value) + ")" diff --git a/pyformlang/pda/tests/test_pda.py b/pyformlang/pda/tests/test_pda.py index 569415e..03173cd 100644 --- a/pyformlang/pda/tests/test_pda.py +++ b/pyformlang/pda/tests/test_pda.py @@ -1,14 +1,29 @@ """ Tests the PDA """ + +import pytest from os import path from pyformlang.pda import PDA, State, StackSymbol, Symbol, Epsilon -from pyformlang.cfg import Terminal +from pyformlang.cfg import Terminal, Epsilon as CFGEpsilon from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State as FAState, Symbol as FASymbol -from pyformlang.pda.utils import PDAObjectCreator from pyformlang.regular_expression import Regex +@pytest.fixture +def pda_example() -> PDA: + pda = PDA() + pda.add_transitions([ + ("q0", "0", "Z0", "q1", ("Z1", "Z0")), + ("q1", "1", "Z1", "q2", []), + ("q0", "epsilon", "Z1", "q2", []) + ]) + pda.set_start_state("q0") + pda.set_start_stack_symbol("Z0") + pda.add_final_state("q2") + return pda + + class TestPDA: """ Tests the pushdown automata """ @@ -31,16 +46,16 @@ def test_creation(self): pda = PDA(final_states={State("A"), State("A"), State("B"), Symbol("B")}) assert pda is not None - assert len(pda.states) == 3 + assert len(pda.states) == 2 assert len(pda.input_symbols) == 0 assert len(pda.stack_symbols) == 0 - assert len(pda.final_states) == 3 + assert len(pda.final_states) == 2 pda = PDA(input_symbols={Symbol("A"), Symbol("B"), Symbol("A"), State("A")}) assert pda is not None assert len(pda.states) == 0 - assert len(pda.input_symbols) == 3 + assert len(pda.input_symbols) == 2 assert len(pda.stack_symbols) == 0 assert len(pda.final_states) == 0 @@ -52,7 +67,7 @@ def test_creation(self): assert len(pda.final_states) == 0 pda = PDA(stack_alphabet={StackSymbol("A"), StackSymbol("A"), - StackSymbol("B")}) + StackSymbol("B"), Symbol("B")}) assert pda is not None assert len(pda.states) == 0 assert len(pda.input_symbols) == 0 @@ -70,11 +85,13 @@ def test_creation(self): def test_represent(self): """ Tests representations """ symb = Symbol("S") - assert str(symb) == "Symbol(S)" + assert repr(symb) == "Symbol(S)" state = State("T") - assert str(state) == "State(T)" + assert repr(state) == "State(T)" stack_symb = StackSymbol("U") - assert str(stack_symb) == "StackSymbol(U)" + assert repr(stack_symb) == "StackSymbol(U)" + assert repr(Epsilon()) == "epsilon" + assert str(Epsilon()) == "epsilon" def test_transition(self): """ Tests the creation of transition """ @@ -326,24 +343,9 @@ def test_intersection_regex(self): cfg = pda_es.to_cfg() assert not cfg - def test_pda_object_creator_epsilon(self): - """ Test creation objects """ - poc = PDAObjectCreator() - assert poc.to_stack_symbol(Epsilon()) == Epsilon() - - def test_pda_paper(self): + def test_pda_paper(self, pda_example: PDA): """ Code in the paper """ - pda = PDA() - pda.add_transitions( - [ - ("q0", "0", "Z0", "q1", ("Z1", "Z0")), - ("q1", "1", "Z1", "q2", []), - ("q0", "epsilon", "Z1", "q2", []) - ] - ) - pda.set_start_state("q0") - pda.set_start_stack_symbol("Z0") - pda.add_final_state("q2") + pda = pda_example pda_final_state = pda.to_final_state() assert pda_final_state is not None cfg = pda.to_empty_stack().to_cfg() @@ -358,3 +360,63 @@ def test_pda_paper(self): pda_networkx.write_as_dot("pda.dot") assert cfg.contains(["0", "1"]) assert path.exists("pda.dot") + + def test_copy(self, pda_example: PDA): + """ Tests the copying of PDA """ + pda = pda_example + pda_copy = pda.copy() + assert pda.states == pda_copy.states + assert pda.input_symbols == pda_copy.input_symbols + assert pda.stack_symbols == pda_copy.stack_symbols + assert pda.to_dict() == pda_copy.to_dict() + assert pda.start_state == pda_copy.start_state + assert pda.start_stack_symbol == pda_copy.start_stack_symbol + assert pda.final_states == pda_copy.final_states + assert pda is not pda_copy + + def test_object_eq(self): + """ Tests the equality of pda objects """ + assert StackSymbol("c") == StackSymbol("c") + assert State("a") == "a" + assert "C" == Symbol("C") + assert Epsilon() != Symbol("epsilon") + assert Epsilon() == CFGEpsilon() + assert "epsilon" == Epsilon() + assert Epsilon() == "ɛ" + assert State("A") != State("B") + assert State("A") != Symbol("A") + assert Symbol("A") != StackSymbol("A") + assert StackSymbol("ABC") != Symbol("ABC") + assert State("ABC") != FAState("ABC") + assert Symbol("s") == Terminal("s") + assert Terminal(1) != StackSymbol(1) + assert StackSymbol(42) != FAState(42) + + def test_contains(self, pda_example: PDA): + """ Tests the transition containment checks """ + pda = pda_example + assert ("q1", "1", "Z1", "q2", []) in pda + assert ("q0", "epsilon", "Z1", "q2", tuple()) in pda + assert ("a", "b", "c", "d", ["e"]) not in pda + pda.add_transition("q1", "1", "Z1", "q5", ["a"]) + assert ("q1", "1", "Z1", "q5", ["a"]) in pda + + def test_remove_transition(self, pda_example: PDA): + """ Tests the pda transition removal """ + pda = pda_example + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) in pda + pda.remove_transition("q0", "0", "Z0", "q1", ("Z1", "Z0")) + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) not in pda + pda.remove_transition("q0", "0", "Z0", "q1", ("Z1", "Z0")) + assert ("q0", "0", "Z0", "q1", ("Z1", "Z0")) not in pda + pda.remove_transition("a", "b", "c", "d", ["e"]) + assert pda.get_number_transitions() == 2 + + def test_iteration(self, pda_example: PDA): + """ Tests the iteration of pda transitions """ + pda = pda_example + transitions = list(iter(pda)) + assert (("q0", "0", "Z0"), ("q1", ("Z1", "Z0"))) in transitions + assert (("q1", "1", "Z1"), ("q2", tuple())) in transitions + assert (("q0", "epsilon", "Z1"), ("q2", tuple())) in transitions + assert len(transitions) == 3 diff --git a/pyformlang/pda/transition_function.py b/pyformlang/pda/transition_function.py index a458cd1..fa52bec 100644 --- a/pyformlang/pda/transition_function.py +++ b/pyformlang/pda/transition_function.py @@ -1,22 +1,23 @@ """ A transition function in a pushdown automaton """ -from typing import List +from copy import deepcopy +from typing import Dict, Set, Iterator, Iterable, Tuple -from .stack_symbol import StackSymbol -from .state import State -from .symbol import Symbol +from ..objects.pda_objects import State, Symbol, StackSymbol +TransitionKey = Tuple[State, Symbol, StackSymbol] +TransitionValue = Tuple[State, Tuple[StackSymbol, ...]] +TransitionValues = Set[TransitionValue] +Transition = Tuple[TransitionKey, TransitionValue] -class TransitionFunction: + +class TransitionFunction(Iterable[Transition]): """ A transition function in a pushdown automaton """ - def __init__(self): - self._transitions = {} - self._iter_key = None - self._current_key = None - self._iter_inside = None + def __init__(self) -> None: + self._transitions: Dict[TransitionKey, TransitionValues] = {} - def get_number_transitions(self): + def get_number_transitions(self) -> int: """ Gets the number of transitions Returns @@ -32,7 +33,7 @@ def add_transition(self, input_symbol: Symbol, stack_from: StackSymbol, s_to: State, - stack_to: List[StackSymbol]): + stack_to: Tuple[StackSymbol, ...]) -> None: """ Add a transition to the function Parameters @@ -49,12 +50,23 @@ def add_transition(self, The string of stack symbol which replace the stack_from """ temp_in = (s_from, input_symbol, stack_from) - temp_out = (s_to, tuple(stack_to)) + temp_out = (s_to, stack_to) if temp_in in self._transitions: self._transitions[temp_in].add(temp_out) else: self._transitions[temp_in] = {temp_out} + def remove_transition(self, + s_from: State, + input_symbol: Symbol, + stack_from: StackSymbol, + s_to: State, + stack_to: Tuple[StackSymbol, ...]) -> None: + """ Remove the given transition from the function """ + key = (s_from, input_symbol, stack_from) + if key in self._transitions: + self._transitions[key].discard((s_to, stack_to)) + def copy(self) -> "TransitionFunction": """ Copy the current transition function @@ -66,35 +78,27 @@ def copy(self) -> "TransitionFunction": new_tf = TransitionFunction() for temp_in, transition in self._transitions.items(): for temp_out in transition: - new_tf.add_transition(temp_in[0], temp_in[1], temp_in[2], - temp_out[0], temp_out[1]) + new_tf.add_transition(*temp_in, *temp_out) return new_tf - def __iter__(self): - self._iter_key = iter(self._transitions.keys()) - self._current_key = None - self._iter_inside = None - return self - - def __next__(self): - if self._iter_inside is None: - next_key = next(self._iter_key) - self._current_key = next_key - self._iter_inside = iter(self._transitions[next_key]) - try: - next_value = next(self._iter_inside) - return self._current_key, next_value - except StopIteration: - next_key = next(self._iter_key) - self._current_key = next_key - self._iter_inside = iter(self._transitions[next_key]) - return next(self) - - def __call__(self, s_from: State, + def __copy__(self) -> "TransitionFunction": + return self.copy() + + def __call__(self, + s_from: State, input_symbol: Symbol, - stack_from: StackSymbol): - return self._transitions.get((s_from, input_symbol, stack_from), {}) + stack_from: StackSymbol) -> TransitionValues: + return self._transitions.get((s_from, input_symbol, stack_from), set()) + + def __contains__(self, transition: Transition) -> bool: + key, value = transition + return value in self(*key) + + def __iter__(self) -> Iterator[Transition]: + for key, values in self._transitions.items(): + for value in values: + yield key, value - def to_dict(self): + def to_dict(self) -> Dict[TransitionKey, TransitionValues]: """Get the dictionary representation of the transitions""" - return self._transitions + return deepcopy(self._transitions) diff --git a/pyformlang/pda/utils.py b/pyformlang/pda/utils.py index 3fcbb35..e1b0dea 100644 --- a/pyformlang/pda/utils.py +++ b/pyformlang/pda/utils.py @@ -1,57 +1,81 @@ """ Useful functions for a PDA """ -from .state import State -from .symbol import Symbol -from .stack_symbol import StackSymbol -from .epsilon import Epsilon - - -class PDAObjectCreator: - """ - A Object in a PDA - """ - - def __init__(self): - self._state_creator = {} - self._symbol_creator = {} - self._stack_symbol_creator = {} - - def to_state(self, given): - """ Convert to a state """ - if isinstance(given, State): - return _get_object_from_known(given, self._state_creator) - return _get_object_from_raw(given, self._state_creator, State) - - def to_symbol(self, given): - """ Convert to a symbol """ - if isinstance(given, Symbol): - return _get_object_from_known(given, self._symbol_creator) - if given == "epsilon": - return Epsilon() - return _get_object_from_raw(given, self._symbol_creator, Symbol) - - def to_stack_symbol(self, given): - """ Convert to a stack symbol """ - if isinstance(given, StackSymbol): - return _get_object_from_known(given, - self._stack_symbol_creator) - if isinstance(given, Epsilon): - return given - return _get_object_from_raw(given, - self._stack_symbol_creator, - StackSymbol) - - -def _get_object_from_known(given, obj_converter): - if given.value in obj_converter: - return obj_converter[given.value] - obj_converter[given.value] = given - return given - - -def _get_object_from_raw(given, obj_converter, to_type): - if given in obj_converter: - return obj_converter[given] - temp = to_type(given) - obj_converter[given] = temp - return temp +from typing import Dict, Set, Iterable, Optional +from numpy import empty + +from pyformlang.cfg import CFGObject, Variable, Terminal, Epsilon as CFGEpsilon +from pyformlang.finite_automaton import State as FAState + +from ..objects.pda_objects import Symbol, StackSymbol +from ..objects.pda_objects import State as PDAState +from ..objects.pda_objects import Epsilon as PDAEpsilon + + +class PDAStateConverter: + """Combines PDA and FA states""" + # pylint: disable=too-few-public-methods + + def __init__(self, + states_pda: Set[PDAState], + states_dfa: Set[FAState]) -> None: + self._inverse_state_pda = {} + for i, state in enumerate(states_pda): + self._inverse_state_pda[state] = i + self._inverse_state_dfa = {} + for i, state in enumerate(states_dfa): + self._inverse_state_dfa[state] = i + self._conversions = empty((len(states_pda), len(states_dfa)), + dtype=PDAState) + + def to_pda_combined_state(self, + state_pda: PDAState, + state_other: FAState) -> PDAState: + """ To PDA state in the intersection function """ + i_state_pda = self._inverse_state_pda[state_pda] + i_state_other = self._inverse_state_dfa[state_other] + if self._conversions[i_state_pda, i_state_other] is None: + self._conversions[i_state_pda, i_state_other] = \ + [PDAState((state_pda, state_other))] + return self._conversions[i_state_pda, i_state_other][0] + + +class PDASymbolConverter: + """Creates Objects for a PDA""" + + def __init__(self, + terminals: Iterable[Terminal], + variables: Iterable[Variable]) -> None: + self._inverse_symbol: Dict[CFGObject, Optional[Symbol]] = {} + self._inverse_stack_symbol: Dict[CFGObject, Optional[StackSymbol]] = {} + for terminal in terminals: + self._inverse_symbol[terminal] = None + self._inverse_stack_symbol[terminal] = None + for variable in variables: + self._inverse_stack_symbol[variable] = None + + def get_symbol_from(self, symbol: CFGObject) -> Symbol: + """Get a symbol""" + if isinstance(symbol, CFGEpsilon): + return PDAEpsilon() + inverse_symbol = self._inverse_symbol[symbol] + if inverse_symbol is None: + value = str(symbol.value) + temp = Symbol(value) + self._inverse_symbol[symbol] = temp + return temp + return inverse_symbol + + def get_stack_symbol_from(self, stack_symbol: CFGObject) \ + -> StackSymbol: + """Get a stack symbol""" + if isinstance(stack_symbol, CFGEpsilon): + return PDAEpsilon() + inverse_stack_symbol = self._inverse_stack_symbol[stack_symbol] + if inverse_stack_symbol is None: + value = str(stack_symbol.value) + if isinstance(stack_symbol, Terminal): + value = "#TERM#" + value + temp = StackSymbol(value) + self._inverse_stack_symbol[stack_symbol] = temp + return temp + return inverse_stack_symbol diff --git a/pyformlang/regular_expression/__init__.py b/pyformlang/regular_expression/__init__.py index 72435d1..5b8e6fe 100644 --- a/pyformlang/regular_expression/__init__.py +++ b/pyformlang/regular_expression/__init__.py @@ -19,9 +19,8 @@ """ - from .regex import Regex -from .regex_objects import MisformedRegexError -from .python_regex import PythonRegex +from .python_regex import PythonRegex, MisformedRegexError + __all__ = ["Regex", "PythonRegex", "MisformedRegexError"] diff --git a/pyformlang/regular_expression/python_regex.py b/pyformlang/regular_expression/python_regex.py index 3f6f48f..0d446eb 100644 --- a/pyformlang/regular_expression/python_regex.py +++ b/pyformlang/regular_expression/python_regex.py @@ -7,9 +7,9 @@ from string import printable from unicodedata import lookup -from .regex_objects import MisformedRegexError from .regex_reader import WRONG_PARENTHESIS_MESSAGE from .regex import Regex +from ..objects.regex_objects import MisformedRegexError PRINTABLES = list(printable) diff --git a/pyformlang/regular_expression/regex.py b/pyformlang/regular_expression/regex.py index 80274b0..9fee921 100644 --- a/pyformlang/regular_expression/regex.py +++ b/pyformlang/regular_expression/regex.py @@ -7,12 +7,12 @@ from pyformlang.finite_automaton import FiniteAutomaton, EpsilonNFA from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol, Epsilon as FAEpsilon -from pyformlang.cfg.cfg import CFG, Production -from pyformlang.cfg.utils import to_variable +from pyformlang.cfg import CFG, Production from .regex_reader import RegexReader -from .regex_objects import Epsilon as RegexEpsilon, \ - Empty, Concatenation, Union, KleeneStar +from ..objects.regex_objects import \ + Epsilon as RegexEpsilon, Empty, Concatenation, Union, KleeneStar +from ..objects.cfg_objects.utils import to_variable class Regex(RegexReader): diff --git a/pyformlang/regular_expression/regex_reader.py b/pyformlang/regular_expression/regex_reader.py index 11d86fa..157847b 100644 --- a/pyformlang/regular_expression/regex_reader.py +++ b/pyformlang/regular_expression/regex_reader.py @@ -5,8 +5,10 @@ from typing import List, Optional from re import sub -from .regex_objects import to_node, Node, Operator, Symbol, Empty, \ - Concatenation, Union, KleeneStar, MisformedRegexError, SPECIAL_SYMBOLS +from ..objects.regex_objects import \ + Node, Operator, Symbol, Empty, Concatenation, Union, KleeneStar, \ + MisformedRegexError +from ..objects.regex_objects.utils import to_node, SPECIAL_SYMBOLS MISFORMED_MESSAGE = "The regex is misformed here." diff --git a/pyformlang/rsa/__init__.py b/pyformlang/rsa/__init__.py index dda8323..66d25d2 100644 --- a/pyformlang/rsa/__init__.py +++ b/pyformlang/rsa/__init__.py @@ -22,8 +22,8 @@ """ - from .recursive_automaton import RecursiveAutomaton from .box import Box + __all__ = ["RecursiveAutomaton", "Box"] diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 4f792b9..d1cdf55 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -6,7 +6,8 @@ from pyformlang.finite_automaton import DeterministicFiniteAutomaton from pyformlang.finite_automaton import State, Symbol -from pyformlang.finite_automaton.utils import to_symbol + +from ..objects.finite_automaton_objects.utils import to_symbol class Box: diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index d823dc8..703bbfa 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -5,11 +5,11 @@ from typing import Dict, Set, AbstractSet, Optional, Hashable, Any from pyformlang.finite_automaton import Symbol -from pyformlang.finite_automaton.utils import to_symbol from pyformlang.regular_expression import Regex from pyformlang.cfg import Epsilon from .box import Box +from ..objects.finite_automaton_objects.utils import to_symbol class RecursiveAutomaton: diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index 1fa3162..bba62d2 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -1,5 +1,5 @@ """ Tests for RSA """ -from pyformlang.finite_automaton.symbol import Symbol +from pyformlang.finite_automaton import Symbol from pyformlang.regular_expression import Regex from pyformlang.rsa.recursive_automaton import RecursiveAutomaton