From 52b09efffe95799492475f53a1ab33ea0e616cac Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Mon, 24 Mar 2025 09:00:19 -0500 Subject: [PATCH 01/18] Linter tweaks. Code formatting issues from the linter. Pruned imports per linter Provide more information in error-handler. Replace error print statements with logging messages at error level. Tweak error handling. --- popper.py | 2 +- popper/bkcons.py | 27 ++--- popper/combine.py | 2 +- popper/gen2.py | 62 ++++++----- popper/loop.py | 265 +++++++++++++++++++++++++--------------------- popper/tester.py | 94 ++++++++-------- popper/util.py | 183 +++++++++++++++++++------------- 7 files changed, 357 insertions(+), 278 deletions(-) diff --git a/popper.py b/popper.py index 7061e831..78df9430 100755 --- a/popper.py +++ b/popper.py @@ -6,7 +6,7 @@ if __name__ == '__main__': settings = Settings(cmd_line=True) prog, score, stats = learn_solution(settings) - if prog != None: + if prog is not None: settings.print_prog_score(prog, score) else: print('NO SOLUTION') diff --git a/popper/bkcons.py b/popper/bkcons.py index 7ce7a752..4f3e97a4 100644 --- a/popper/bkcons.py +++ b/popper/bkcons.py @@ -1,16 +1,16 @@ +import numbers +import sys +import traceback +from collections import defaultdict +from itertools import product +from traceback import format_tb + import clingo import clingo.script -import numbers -import operator -import pkg_resources -import time -from . util import rule_is_recursive, format_rule, Constraint, order_prog, Literal, suppress_stdout_stderr from clingo import Function, Number, Tuple_ -from collections import defaultdict -from itertools import permutations, product -from pysat.card import * -from pysat.formula import CNF -from pysat.solvers import Solver + +from .util import suppress_stdout_stderr + clingo.script.enable_python() tmp_map = {} @@ -52,7 +52,8 @@ # return (head_pred, arity), body_preds -from itertools import permutations, combinations +from itertools import permutations + all_myvars = ['A','B','C','D','E','F','G','H'] def connected(xs, ys): @@ -628,7 +629,6 @@ def atom_to_symbol(pred, args): return Function(name = pred, arguments = xs) def deduce_bk_cons(settings, tester): - import re prog = [] lookup2 = {k: f'({v})' for k,v in tmp_map.items()} lookup1 = {k:v for k,v in lookup2.items()} @@ -755,7 +755,8 @@ def deduce_recalls(settings): solver.add('base', [], bk) solver.ground([('base', [])]) except Exception as Err: - print('ERROR deducing recalls', Err) + settings.logger.error(f'ERROR deducing recalls: {Err}') + traceback.print_exc(None, file=sys.stderr) return None for pred, arity in settings.body_preds: diff --git a/popper/combine.py b/popper/combine.py index 4d095a03..e42d10c6 100644 --- a/popper/combine.py +++ b/popper/combine.py @@ -258,7 +258,7 @@ def find_combination(self, timeout): else: if not self.settings.lex: - print("ERROR: Combining rec or pi programs not supported with MDL objective. Exiting.") + self.settings.logger.error("ERROR: Combining rec or pi programs not supported with MDL objective. Exiting.") assert(False) model_inconsistent = self.tester.test_prog_inconsistent(model_prog) diff --git a/popper/gen2.py b/popper/gen2.py index 214334b4..ce7a4f98 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -1,12 +1,15 @@ -import time +import numbers import re +from itertools import permutations +from typing import Any, Set, Tuple + import clingo -import numbers import clingo.script import pkg_resources -from . util import Constraint, Literal from clingo import Function, Number, Tuple_ -from itertools import permutations + +from .util import Constraint, Literal + def arg_to_symbol(arg): if isinstance(arg, tuple): @@ -16,9 +19,11 @@ def arg_to_symbol(arg): if isinstance(arg, str): return Function(arg) + def atom_to_symbol(pred, args): xs = tuple(arg_to_symbol(arg) for arg in args) - return Function(name = pred, arguments = xs) + return Function(name=pred, arguments=xs) + DEFAULT_HEURISTIC = """ #heuristic size(N). [1000-N,true] @@ -29,6 +34,7 @@ def atom_to_symbol(pred, args): program_size_at_least(M):- size(N), program_bounds(M), M <= N. """ + class Generator: def __init__(self, settings, bkcons=[]): @@ -44,12 +50,12 @@ def __init__(self, settings, bkcons=[]): with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) - for p,a in settings.pointless: - bias_text = re.sub(rf'body_pred\({p},{a}\).','', bias_text) + for p, a in settings.pointless: + bias_text = re.sub(rf'body_pred\({p},{a}\).', '', bias_text) bias_text = re.sub(rf'constant\({p},.*?\).*', '', bias_text, flags=re.MULTILINE) encoding.append(bias_text) @@ -71,13 +77,13 @@ def __init__(self, settings, bkcons=[]): type_encoding = set() if self.settings.head_types: types = tuple(self.settings.head_types) - str_types = str(types).replace("'","") + str_types = str(types).replace("'", "") for i, x in enumerate(self.settings.head_types): type_encoding.add(f'type_pos({str_types}, {i}, {x}).') for pred, types in self.settings.body_types.items(): types = tuple(types) - str_types = str(types).replace("'","") + str_types = str(types).replace("'", "") for i, x in enumerate(types): type_encoding.add(f'type_pos({str_types}, {i}, {x}).') encoding.extend(type_encoding) @@ -104,10 +110,10 @@ def __init__(self, settings, bkcons=[]): encoding = '\n'.join(encoding) # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) + # f.write(encoding) if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) solver.configuration.solve.models = 0 solver.add('base', [], encoding) @@ -120,12 +126,12 @@ def update_solver(self, size): def get_prog(self): if self.handle is None: - self.handle = iter(self.solver.solve(yield_ = True)) + self.handle = iter(self.solver.solve(yield_=True)) self.model = next(self.handle, None) if self.model is None: return None - return self.parse_model_single_rule(self.model.symbols(shown = True)) + return self.parse_model_single_rule(self.model.symbols(shown=True)) def parse_model_single_rule(self, model): settings = self.settings @@ -154,13 +160,13 @@ def constrain(self, tmp_new_cons): if con_type == Constraint.GENERALISATION or con_type == Constraint.BANISH: con_size = None - if self.settings.noisy and len(xs)>2: + if self.settings.noisy and len(xs) > 2: con_size = xs[2] ground_rules2 = tuple(self.build_generalisation_constraint3(con_prog, con_size)) new_ground_cons.update(ground_rules2) elif con_type == Constraint.SPECIALISATION: con_size = None - if self.settings.noisy and len(xs)>2: + if self.settings.noisy and len(xs) > 2: con_size = xs[2] ground_rules2 = tuple(self.build_specialisation_constraint3(con_prog, con_size)) new_ground_cons.update(ground_rules2) @@ -241,12 +247,12 @@ def find_deep_bindings4(self, body): if len(body_types) == 0 or head_types is None: num_vars = len({var for atom in body for var in atom.arguments}) for xs in permutations(range(self.settings.max_vars), num_vars): - x = {i:xs[i] for i in range(num_vars)} + x = {i: xs[i] for i in range(num_vars)} yield x return # if there are types, only find type-safe permutations - var_type_lookup = {i:head_type for i, head_type in enumerate(head_types)} + var_type_lookup = {i: head_type for i, head_type in enumerate(head_types)} head_vars = set(range(len(self.settings.head_literal.arguments))) body_vars = set() @@ -272,7 +278,7 @@ def find_deep_bindings4(self, body): k = (x, y) bad_type_matching.add(k) - lookup = {x:i for i, x in enumerate(body_vars)} + lookup = {x: i for i, x in enumerate(body_vars)} for xs in permutations(range(self.settings.max_vars), len(lookup)): assignment = {} @@ -287,15 +293,17 @@ def find_deep_bindings4(self, body): continue yield assignment -def remap_variables(rule): + +def remap_variables(rule: Tuple[Literal,Any]): + head: Literal head, body = rule - head_vars = set() + head_vars: Set = set() if head: - head_vars.extend(head.arguments) + head_vars |= head.arguments next_var = len(head_vars) - lookup = {i:i for i in head_vars} + lookup = {i: i for i in head_vars} new_body = set() for pred, args in body: @@ -303,9 +311,9 @@ def remap_variables(rule): for var in args: if var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_atom = Literal(pred, tuple(new_args)) new_body.add(new_atom) - return head, frozenset(new_body) \ No newline at end of file + return head, frozenset(new_body) diff --git a/popper/loop.py b/popper/loop.py index 30a401c8..649cc03d 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -1,12 +1,15 @@ import time from collections import defaultdict -from bitarray.util import subset, any_and, ones from functools import cache from itertools import chain, combinations, permutations -from . util import timeout, format_rule, rule_is_recursive, prog_is_recursive, prog_has_invention, calc_prog_size, format_literal, Constraint, mdl_score, suppress_stdout_stderr, get_raw_prog, Literal, remap_variables, format_prog -from . tester import Tester -from . bkcons import deduce_bk_cons, deduce_recalls, deduce_type_cons, deduce_non_singletons -from . combine import Combiner + +from bitarray.util import any_and, ones, subset + +from .bkcons import deduce_bk_cons, deduce_non_singletons, deduce_recalls, deduce_type_cons +from .combine import Combiner +from .tester import Tester +from .util import Constraint, Literal, calc_prog_size, format_literal, format_prog, format_rule, get_raw_prog, \ + mdl_score, order_prog, prog_has_invention, prog_is_recursive, remap_variables, rule_is_recursive, timeout def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): @@ -24,12 +27,12 @@ def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): settings.exact_maxsat_solver = 'rc2' settings.old_format = False elif settings.solver == 'uwr': - settings.exact_maxsat_solver='uwrmaxsat' - settings.exact_maxsat_solver_params="-v0 -no-sat -no-bin -m -bm" + settings.exact_maxsat_solver = 'uwrmaxsat' + settings.exact_maxsat_solver_params = "-v0 -no-sat -no-bin -m -bm" settings.old_format = False else: - settings.exact_maxsat_solver='wmaxcdcl' - settings.exact_maxsat_solver_params="" + settings.exact_maxsat_solver = 'wmaxcdcl' + settings.exact_maxsat_solver_params = "" settings.old_format = True if settings.noisy: @@ -62,6 +65,7 @@ def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): return Combiner(settings, tester, coverage_pos, coverage_neg, prog_lookup) + class Popper(): def __init__(self, settings, tester): self.settings = settings @@ -99,11 +103,14 @@ def run(self, bkcons): # AC: all very hacky until the refactoring is complete with settings.stats.duration('init'): if settings.single_solve: - from . gen2 import Generator + from .gen2 import Generator + print("using generator 2") elif settings.max_rules == 2 and not settings.pi_enabled: - from . gen3 import Generator + from .gen3 import Generator + print("using generator 3") else: - from . generate import Generator + from .generate import Generator + print("using generate") generator = self.generator = Generator(settings, bkcons) # track the success sets of tested hypotheses @@ -146,7 +153,7 @@ def run(self, bkcons): min_coverage = self.settings.min_coverage = 1 - for size in range(2, max_size+1): + for size in range(2, max_size + 1): if size > settings.max_literals: continue @@ -166,6 +173,8 @@ def run(self, bkcons): # generate a program with settings.stats.duration('generate'): prog = generator.get_prog() + print("Generated prog: {}".format(prog)) + settings.logger.info("Generated prog: {}".format(prog)) if prog is None: break @@ -190,14 +199,16 @@ def run(self, bkcons): with settings.stats.duration('test'): if settings.noisy: - pos_covered, neg_covered, inconsistent, skipped, skip_early_neg = tester.test_prog_noisy(prog, prog_size) + pos_covered, neg_covered, inconsistent, skipped, skip_early_neg = tester.test_prog_noisy(prog, + prog_size) else: pos_covered, inconsistent = tester.test_prog(prog) + print(f"pos_covered: {pos_covered}, inconsistent: {inconsistent}") # @CH: can you explain these? skipped, skip_early_neg = False, False tp = pos_covered.count(1) - fn = num_pos-tp + fn = num_pos - tp # if non-separable program covers all examples, stop if not inconsistent and tp == num_pos and not skipped: @@ -209,7 +220,7 @@ def run(self, bkcons): if settings.noisy and not skipped: fp = neg_covered.count(1) - tn = num_neg-fp + tn = num_neg - fp score = tp, fn, tn, fp, prog_size mdl = mdl_score(fn, fp, prog_size) if settings.debug: @@ -226,7 +237,7 @@ def run(self, bkcons): settings.best_prog_score = score settings.solution = prog settings.best_mdl = mdl - settings.max_literals = mdl-1 + settings.max_literals = mdl - 1 settings.print_incomplete_solution2(prog, tp, fn, tn, fp, prog_size) new_cons.extend(self.build_constraints_previous_hypotheses(mdl, prog_size)) @@ -255,17 +266,19 @@ def run(self, bkcons): new_cons.extend(cons_) pruned_more_general = len(cons_) > 0 - if tp > 0 and success_sets and (not settings.noisy or (settings.noisy and fp==0)): + if tp > 0 and success_sets and (not settings.noisy or (settings.noisy and fp == 0)): with settings.stats.duration('check subsumed and covers_too_few'): subsumed = pos_covered in success_sets or any(subset(pos_covered, xs) for xs in success_sets) subsumed_by_two = not subsumed and self.check_subsumed_by_two(pos_covered, prog_size) - covers_too_few = not subsumed and not subsumed_by_two and not settings.noisy and self.check_covers_too_few(prog_size, pos_covered) + covers_too_few = not subsumed and not subsumed_by_two and not settings.noisy and self.check_covers_too_few( + prog_size, pos_covered) if subsumed or subsumed_by_two or covers_too_few: add_spec = True noisy_subsumed = True - if not settings.noisy and not has_invention and not is_recursive and (subsumed or subsumed_by_two or covers_too_few): + if not settings.noisy and not has_invention and not is_recursive and ( + subsumed or subsumed_by_two or covers_too_few): # TODO: FIND MOST GENERAL SUBSUMED RECURSIVE PROGRAM # xs = self.subsumed_or_covers_too_few2(prog, check_coverage=False, check_subsumed=True) @@ -278,7 +291,6 @@ def run(self, bkcons): # print('\t', 'moo', format_rule(rule)) # new_cons.append((Constraint.SPECIALISATION, [functional_rename_vars(rule) for rule in x])) - # If a program is subsumed or dioesn't cover enough examples, we search for the most general subprogram that also is also subsumed or doesn't cover enough examples # only applies to non-recursive and non-PI programs subsumed_progs = [] @@ -339,7 +351,8 @@ def run(self, bkcons): spec_size_ = min([tp, fp + prog_size]) if spec_size_ <= prog_size: add_spec = True - elif len(prog) == 1 and spec_size_ < settings.max_body + 1 and spec_size_ < settings.max_literals: + elif len( + prog) == 1 and spec_size_ < settings.max_body + 1 and spec_size_ < settings.max_literals: spec_size = spec_size_ elif len(prog) > 1 and spec_size_ < settings.max_literals: spec_size = spec_size_ @@ -353,7 +366,7 @@ def run(self, bkcons): gen_size = gen_size_ else: # only prune if the generalisation bounds are smaller than existing bounds - gen_size_ = min([fn + prog_size, num_pos-fp, settings.best_mdl - mdl + num_pos + prog_size]) + gen_size_ = min([fn + prog_size, num_pos - fp, settings.best_mdl - mdl + num_pos + prog_size]) if gen_size_ <= prog_size: add_gen = True if gen_size_ < settings.max_literals: @@ -411,17 +424,17 @@ def run(self, bkcons): add_spec = True add_to_combiner = False - if settings.noisy and not skipped and not skip_early_neg and not is_recursive and not has_invention and tp > prog_size+fp and fp+prog_size < settings.best_mdl and not noisy_subsumed: + if settings.noisy and not skipped and not skip_early_neg and not is_recursive and not has_invention and tp > prog_size + fp and fp + prog_size < settings.best_mdl and not noisy_subsumed: local_delete = set() ignore_this_prog = (pos_covered, neg_covered) in success_sets_noise - # if pos_covered is a subset and neg_covered is a superset of a previously seen program (which must be of equal size or smaller) then we can ignore this program if not ignore_this_prog: # find all progs where pos_covered is a subset of pos_covered_ of the other prog # s_pos_ = set.intersection(*(covered_by_pos[ex] for ex in pos_covered)) - s_pos = set.intersection(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(pos_covered) if ex_covered_ == 1)) + s_pos = set.intersection( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(pos_covered) if ex_covered_ == 1)) # now check whether neg_covered is a superset of the other program for prog1 in s_pos: n1 = coverage_neg[prog1] @@ -431,9 +444,10 @@ def run(self, bkcons): # print('skip1') break - if not ignore_this_prog and (inconsistent or fp>0): + if not ignore_this_prog and (inconsistent or fp > 0): # neg_covered is a subset of all programs in s_neg - s_neg = set.intersection(*(covered_by_neg[ex] for ex, ex_covered_ in enumerate(neg_covered) if ex_covered_ == 1)) + s_neg = set.intersection( + *(covered_by_neg[ex] for ex, ex_covered_ in enumerate(neg_covered) if ex_covered_ == 1)) # if neg_covered(new) ⊆ neg_covered(old) for prog1 in s_neg: # if pos_covered(old) ⊆ pos_covered(new) @@ -445,21 +459,21 @@ def run(self, bkcons): local_delete.add(prog1) continue - if fp == fp1 and (tp-prog_size) < (tp1-size1): + if fp == fp1 and (tp - prog_size) < (tp1 - size1): # NEW tp:50 fp:1 size:6 memberofdomainregion(V0,V1):- synsetdomaintopicof(V2,V3),synsetdomaintopicof(V1,V3),hypernym(V2,V4),membermeronym(V0,V5),synsetdomaintopicof(V2,V4). # OLD tp:49 fp:1 size:4 memberofdomainregion(V0,V1):- synsetdomaintopicof(V1,V3),instancehypernym(V2,V3),membermeronym(V0,V4). # print('skip2') ignore_this_prog = True break - if tp == tp1 and (fp+prog_size) >= (fp1+size1): + if tp == tp1 and (fp + prog_size) >= (fp1 + size1): # NEW tp:10 fp:1 mdl:350 less_toxic(V0,V1):- ring_subst_3(V1,V4),ring_substitutions(V1,V3),alk_groups(V0,V3),x_subst(V0,V2,V5). # OLD tp:10 fp:2 mdl:350 less_toxic(V0,V1):- ring_substitutions(V0,V4),x_subst(V0,V3,V2),ring_subst_3(V1,V5). # print('skip3') ignore_this_prog = True break - if (tp-fp-prog_size) <= (tp1-fp1-size1): + if (tp - fp - prog_size) <= (tp1 - fp1 - size1): # NEW tp:12 fp:3 size:7 less_toxic(V0,V1):- gt(V2,V5),gt(V2,V3),ring_subst_2(V1,V4),ring_substitutions(V0,V3),alk_groups(V0,V2),ring_substitutions(V1,V5). # OLD tp:11 fp:4 size:3 less_toxic(V0,V1):- ring_subst_2(V1,V3),r_subst_3(V0,V2). # print('skip4') @@ -471,8 +485,11 @@ def run(self, bkcons): # new is consistent # if pos_covered(old) ⊆ pos_covered(new) and size(old) >= size(new) then ignore old not_covered = tester.pos_examples_ ^ pos_covered - progs_not_subsumed = set.union(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(not_covered) if ex_covered_ == 1)) - all_progs = set.union(*(covered_by_pos[ex] for ex, ex_covered_ in enumerate(tester.pos_examples_) if ex_covered_ == 1)) + progs_not_subsumed = set.union( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(not_covered) if ex_covered_ == 1)) + all_progs = set.union( + *(covered_by_pos[ex] for ex, ex_covered_ in enumerate(tester.pos_examples_) if + ex_covered_ == 1)) s_pos = all_progs.difference(progs_not_subsumed) for prog1 in s_pos: size1, tp1, fp1 = scores[prog1] @@ -480,7 +497,7 @@ def run(self, bkcons): local_delete.add(prog1) for k in local_delete: - assert(k in combiner.saved_progs|to_combine) + assert (k in combiner.saved_progs | to_combine) if k in combiner.saved_progs: combiner.saved_progs.remove(k) elif k in to_combine: @@ -524,7 +541,7 @@ def run(self, bkcons): if p == pos_covered: continue # print(p, pos_covered, p|pos_covered) - paired_success_sets[s+prog_size].add(p|pos_covered) + paired_success_sets[s + prog_size].add(p | pos_covered) elif not settings.noisy: # if consistent, covers at least one example, is not subsumed, and has no redundancy, try to find a solution @@ -546,7 +563,7 @@ def run(self, bkcons): elif prog2_hash in combiner.saved_progs: combiner.saved_progs.remove(prog2_hash) else: - assert(False) + assert (False) del success_sets[pos_covered2] del success_sets_aux[pos_covered2] del coverage_pos[prog2_hash] @@ -564,7 +581,7 @@ def run(self, bkcons): for p, s in success_sets.items(): if p == pos_covered: continue - paired_success_sets[s+prog_size].add(p|pos_covered) + paired_success_sets[s + prog_size].add(p | pos_covered) if self.min_size is None: self.min_size = prog_size @@ -585,7 +602,7 @@ def run(self, bkcons): else: settings.solution = prog uncovered = uncovered & ~pos_covered - tp = num_pos- uncovered.count(1) + tp = num_pos - uncovered.count(1) fn = uncovered.count(1) tn = num_neg fp = 0 @@ -595,14 +612,14 @@ def run(self, bkcons): if not uncovered.any(): settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples if min_coverage != num_pos and len(settings.solution) == 2: min_coverage = settings.min_coverage = num_pos # AC: sometimes adding these size constraints can take longer - for i in range(settings.max_literals+1, max_size+1): + for i in range(settings.max_literals + 1, max_size + 1): generator.prune_size(i) call_combine = not uncovered.any() @@ -616,7 +633,7 @@ def run(self, bkcons): with settings.stats.duration('combine'): is_new_solution_found = combiner.update_best_prog(to_combine) - to_combine=set() + to_combine = set() new_hypothesis_found = is_new_solution_found != None @@ -624,14 +641,14 @@ def run(self, bkcons): # if only adding nogoods, eliminate larger programs if new_hypothesis_found: # if settings.noisy: - # print('new_hypothesis_found', settings.best_mdl, combiner.best_cost) + # print('new_hypothesis_found', settings.best_mdl, combiner.best_cost) new_hypothesis, conf_matrix = is_new_solution_found tp, fn, tn, fp, hypothesis_size = conf_matrix settings.best_prog_score = conf_matrix settings.solution = new_hypothesis best_score = mdl_score(fn, fp, hypothesis_size) # if settings.noisy: - # print('new_hypothesis_found', settings.best_mdl, best_score) + # print('new_hypothesis_found', settings.best_mdl, best_score) # print('here???') settings.print_incomplete_solution2(new_hypothesis, tp, fn, tn, fp, hypothesis_size) @@ -642,12 +659,12 @@ def run(self, bkcons): new_cons.extend(self.build_constraints_previous_hypotheses(settings.best_mdl, prog_size)) if settings.single_solve: # AC: sometimes adding these size constraints can take longer - for i in range(best_score, max_size+1): + for i in range(best_score, max_size + 1): generator.prune_size(i) # print("HERE!!!", tp, fn, tn, fp) if not settings.noisy and fp == 0 and fn == 0: settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples @@ -660,7 +677,7 @@ def run(self, bkcons): return # AC: sometimes adding these size constraints can take longer - for i in range(hypothesis_size, max_size+1): + for i in range(hypothesis_size, max_size + 1): generator.prune_size(i) # BUILD CONSTRAINTS @@ -669,9 +686,10 @@ def run(self, bkcons): if not skipped: if settings.noisy and not add_spec and spec_size and not pruned_more_general: - if spec_size <= settings.max_literals and ((is_recursive or has_invention or spec_size <= settings.max_body)): + if spec_size <= settings.max_literals and ( + (is_recursive or has_invention or spec_size <= settings.max_body)): new_cons.append((Constraint.SPECIALISATION, prog, spec_size)) - self.seen_hyp_spec[fp+prog_size+mdl].append([prog, tp, fn, tn, fp, prog_size]) + self.seen_hyp_spec[fp + prog_size + mdl].append([prog, tp, fn, tn, fp, prog_size]) if add_gen and not pruned_sub_inconsistent: if settings.noisy or settings.recursion_enabled or settings.pi_enabled: @@ -682,9 +700,10 @@ def run(self, bkcons): new_cons.append((Constraint.GENERALISATION, prog)) if settings.noisy and not add_gen and gen_size and not pruned_sub_inconsistent: - if gen_size <= settings.max_literals and (settings.recursion_enabled or settings.pi_enabled) and not pruned_more_general: + if gen_size <= settings.max_literals and ( + settings.recursion_enabled or settings.pi_enabled) and not pruned_more_general: new_cons.append((Constraint.GENERALISATION, prog, gen_size)) - self.seen_hyp_gen[fn+prog_size+mdl].append([prog, tp, fn, tn, fp, prog_size]) + self.seen_hyp_gen[fn + prog_size + mdl].append([prog, tp, fn, tn, fp, prog_size]) if add_redund1 and not pruned_more_general: new_cons.append((Constraint.REDUNDANCY_CONSTRAINT1, prog)) @@ -707,7 +726,7 @@ def run(self, bkcons): # COMBINE with settings.stats.duration('combine'): is_new_solution_found = combiner.update_best_prog(to_combine) - to_combine=set() + to_combine = set() new_hypothesis_found = is_new_solution_found != None @@ -723,7 +742,7 @@ def run(self, bkcons): if not settings.noisy and fp == 0 and fn == 0: settings.solution_found = True - settings.max_literals = hypothesis_size-1 + settings.max_literals = hypothesis_size - 1 min_coverage = settings.min_coverage = 2 # if we have a solution with two rules then a new rule must entail all the examples if min_coverage != num_pos and len(settings.solution) == 2: @@ -731,10 +750,10 @@ def run(self, bkcons): # if size >= settings.max_literals and not settings.order_space: if size >= settings.max_literals: - assert(False) + assert (False) if settings.single_solve: break - assert(len(to_combine) == 0) + assert (len(to_combine) == 0) def check_redundant_literal(self, prog): tester, settings = self.tester, self.settings @@ -754,7 +773,6 @@ def check_redundant_literal(self, prog): # loop through each body literal for i in range(len(body)): - # potential redundant literal redundant_literal = body[i] @@ -777,7 +795,6 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept if len(body) == 0: return out - # print('\tA \t\t\t',format_prog(prog), '\t\t', format_literal(literal)) # prog_key = (prog, literal) prog_key = (body, literal) @@ -799,9 +816,9 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept return out if not connected(body | frozenset([literal])): - for new_body in combinations(body, len(body)-1): + for new_body in combinations(body, len(body) - 1): new_body = frozenset(new_body) - out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth+1)) + out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth + 1)) return out if any(body.issubset(seen_body) for seen_body in not_all_sat1): @@ -821,9 +838,9 @@ def check_redundant_literal_aux(self, body, literal, allsat1, not_all_sat1, dept allsat1.add(body) - for new_body in combinations(body, len(body)-1): + for new_body in combinations(body, len(body) - 1): new_body = frozenset(new_body) - out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth+1)) + out.extend(self.check_redundant_literal_aux(new_body, literal, allsat1, not_all_sat1, depth + 1)) if len(out) > 0: return out @@ -849,7 +866,8 @@ def check_neg_reducible(self, prog): if len(body) == 1: # AC: SPECIAL CASE FOR A SINGLE BODY LITERAL IMPLIED BY THE HEAD - if settings.non_datalog_flag and literal_args.issubset(head_vars) and tester.diff_subs_single(literal): + if settings.non_datalog_flag and literal_args.issubset(head_vars) and tester.diff_subs_single( + literal): bad_rule = (head, frozenset([literal])) bad_prog = frozenset([bad_rule]) return bad_prog @@ -882,7 +900,7 @@ def filter_combine_programs(self, combiner, to_combine): # assert(False) xs = combiner.saved_progs | to_combine min_size = min(self.cached_prog_size[prog] for prog in xs) - must_beat = self.settings.best_mdl-min_size + must_beat = self.settings.best_mdl - min_size to_delete = set() # FILTER COMBINE PROGRAMS @@ -900,7 +918,7 @@ def filter_combine_programs(self, combiner, to_combine): def check_subsumed_by_two(self, pos_covered, prog_size): paired_success_sets = self.paired_success_sets - for i in range(2, prog_size+2): + for i in range(2, prog_size + 2): if pos_covered in paired_success_sets[i]: return True for x in paired_success_sets[i]: @@ -909,7 +927,7 @@ def check_subsumed_by_two(self, pos_covered, prog_size): return False def check_subsumed_by_two_v2(self, prog_size, prog2_size, pos_covered, pos_covered2): - space = prog2_size-prog_size + space = prog2_size - prog_size if space < self.min_size: return False @@ -959,8 +977,8 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # MAX RULES = 2 - elif ((prog_size + (min_size*2)) > max_literals): - space_remaining = max_literals-prog_size + elif ((prog_size + (min_size * 2)) > max_literals): + space_remaining = max_literals - prog_size if space_remaining > self.settings.search_depth: return False @@ -974,15 +992,16 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # # MAX RULES = 3 - elif ((prog_size + (min_size*3)) > max_literals): - space_remaining = max_literals-prog_size + elif ((prog_size + (min_size * 3)) > max_literals): + space_remaining = max_literals - prog_size - if space_remaining-min_size > self.settings.search_depth: + if space_remaining - min_size > self.settings.search_depth: return False # uncovered = self.tester.pos_examples-pos_covered uncovered = self.tester.pos_examples_ & ~pos_covered - success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), key=lambda x: x[1]) + success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), + key=lambda x: x[1]) n = len(success_sets) for i in range(n): @@ -991,11 +1010,11 @@ def check_covers_too_few_(self, prog_size, pos_covered): break if subset(uncovered, pos_covered2): return False - space_remaining_ = space_remaining-size2 + space_remaining_ = space_remaining - size2 if space_remaining_ < min_size: continue uncovered2 = uncovered & ~pos_covered2 - for j in range(i+1, n): + for j in range(i + 1, n): pos_covered3, size3 = success_sets[j] if size3 > space_remaining_: break @@ -1004,17 +1023,18 @@ def check_covers_too_few_(self, prog_size, pos_covered): return True # # MAX RULES = 4 - elif prog_size + (min_size*4) > max_literals: - space_remaining = max_literals-prog_size - space_remaining -= (min_size*2) + elif prog_size + (min_size * 4) > max_literals: + space_remaining = max_literals - prog_size + space_remaining -= (min_size * 2) if space_remaining > self.settings.search_depth: return False missing = self.tester.pos_examples_ & ~pos_covered - success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), key=lambda x: x[1]) - space_remaining = max_literals-prog_size + success_sets = sorted(((pos_covered_, size) for (pos_covered_, size) in self.success_sets.items()), + key=lambda x: x[1]) + space_remaining = max_literals - prog_size n = len(success_sets) @@ -1024,23 +1044,23 @@ def check_covers_too_few_(self, prog_size, pos_covered): break if subset(missing, pos_covered2): return False - space_remaining_ = space_remaining-size2 + space_remaining_ = space_remaining - size2 if space_remaining_ < min_size: continue missing2 = missing & ~pos_covered2 - for j in range(i+1, n): + for j in range(i + 1, n): pos_covered3, size3 = success_sets[j] if size3 > space_remaining_: break # if pos_covered3.isdisjoint(missing2): - # continue + # continue if subset(missing2, pos_covered3): return False - space_remaining__ = space_remaining_-size3 + space_remaining__ = space_remaining_ - size3 if space_remaining__ < min_size: continue missing3 = missing2 & ~pos_covered3 - for k in range(j+1, n): + for k in range(j + 1, n): pos_covered4, size4 = success_sets[k] if size4 > space_remaining__: break @@ -1048,11 +1068,10 @@ def check_covers_too_few_(self, prog_size, pos_covered): return False return True # elif ((prog_size + (min_size*5)) > max_literals): - # print('MAX RULES IS 5!') + # print('MAX RULES IS 5!') return False - # find unsat cores def explain_incomplete(self, prog): @@ -1120,13 +1139,13 @@ def build_constraints_previous_hypotheses(self, score, best_size): seen_hyp_spec, seen_hyp_gen = self.seen_hyp_spec, self.seen_hyp_gen cons = [] # print(f"new best score {score}") - for k in [k for k in seen_hyp_spec if k > score+num_pos+best_size]: + for k in [k for k in seen_hyp_spec if k > score + num_pos + best_size]: to_delete = [] for prog, tp, fn, tn, fp, size in seen_hyp_spec[k]: # mdl = mdl_score(tuple((tp, fn, tn, fp, size))) mdl = mdl_score(fn, fp, size) - if score+num_pos+best_size < fp+size+mdl: - spec_size = score-mdl+num_pos+best_size + if score + num_pos + best_size < fp + size + mdl: + spec_size = score - mdl + num_pos + best_size if spec_size <= size: to_delete.append([prog, tp, fn, tn, fp, size]) # _, con = generator.build_specialisation_constraint(prog, rule_ordering, spec_size=spec_size) @@ -1165,7 +1184,7 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): # try the body with one literal removed (the literal at position i) for i in range(len(body)): - new_body = body[:i] + body[i+1:] + new_body = body[:i] + body[i + 1:] new_body = frozenset(new_body) if len(new_body) == 0: @@ -1181,7 +1200,8 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): new_prog = frozenset({new_rule}) # ensure at least one head variable is in the body - if not settings.non_datalog_flag and not any(x in head_vars for literal in new_body for x in literal.arguments): + if not settings.non_datalog_flag and not any( + x in head_vars for literal in new_body for x in literal.arguments): continue # check whether we have pruned any subset (HORRIBLE CODE) @@ -1207,9 +1227,11 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): sub_prog_pos_covered = tester.get_pos_covered(new_prog) # with self.settings.stats.duration('old'): - subsumed = sub_prog_pos_covered in success_sets or any(subset(sub_prog_pos_covered, xs) for xs in success_sets) + subsumed = sub_prog_pos_covered in success_sets or any( + subset(sub_prog_pos_covered, xs) for xs in success_sets) subsumed_by_two = not subsumed and self.check_subsumed_by_two(sub_prog_pos_covered, new_prog_size) - covers_too_few = not subsumed and not subsumed_by_two and self.check_covers_too_few(new_prog_size, sub_prog_pos_covered) + covers_too_few = not subsumed and not subsumed_by_two and self.check_covers_too_few(new_prog_size, + sub_prog_pos_covered) if not (subsumed or subsumed_by_two or covers_too_few): continue @@ -1231,7 +1253,7 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): elif covers_too_few: out.add((new_prog, ('COVERS TOO FEW (GENERALISATION)'))) else: - assert(False) + assert (False) return out def find_variants(self, rule): @@ -1281,7 +1303,7 @@ def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): # raw_prog2 = get_raw_prog2(subprog) # if raw_prog2 in self.seen_prog: - # continue + # continue subprog = frozenset(subprog) if hash(subprog) in self.seen_prog: @@ -1292,11 +1314,11 @@ def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): self.seen_prog.add(hash(subprog)) self.seen_prog.add(hash(raw_prog)) - # self.seen_prog.add(raw_prog2) + # self.seen_prog.add(raw_prog2) # for rule in subprog: - # print('\t', 'B', format_rule(rule)) + # print('\t', 'B', format_rule(rule)) def should_skip(): if len(subprog) > 0: @@ -1335,8 +1357,7 @@ def should_skip(): # # pass # for rule in subprog: - # print('\t', 'C', format_rule(rule)) - + # print('\t', 'C', format_rule(rule)) if not self.prog_is_ok(subprog): xs = self.explain_totally_incomplete_aux2(subprog, unsat2, unsat) @@ -1344,14 +1365,13 @@ def should_skip(): continue # for rule in subprog: - # print('\t', 'D', format_rule(rule)) + # print('\t', 'D', format_rule(rule)) if self.tester.has_redundant_literal(frozenset(subprog)): xs = self.explain_totally_incomplete_aux2(subprog, unsat2, unsat) out.extend(xs) continue - # if len(subprog) > 2 and self.tester.has_redundant_rule(subprog): # xs = self.explain_totally_incomplete_aux2(subprog, directions, sat, unsat, noisy) # out.extend(xs) @@ -1362,7 +1382,6 @@ def should_skip(): # headless = is_headless(subprog) headless = any(head is None for head, body in subprog) - # print('\t\t\t testing',format_prog(subprog)) if headless: @@ -1389,7 +1408,6 @@ def should_skip(): out.append((subprog, headless)) return out - def has_valid_directions(self, rule): if self.settings.has_directions: return self.has_valid_directions_(rule) @@ -1493,22 +1511,21 @@ def prog_is_ok(self, prog): if not has_recursion: return False - if self.needs_datalog(prog) and not tmp(prog): return False return True def print_incomplete_solution2(self, prog, tp, fn, tn, fp, size): - self.logger.info('*'*20) + self.logger.info('*' * 20) self.logger.info('New best hypothesis:') if self.noisy: - self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size+fn+fp}') + self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size + fn + fp}') else: self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size}') for rule in order_prog(prog): - self.logger.info(format_rule(order_rule(rule))) - self.logger.info('*'*20) + self.logger.info(format_rule(self.settings.order_rule(rule))) + self.logger.info('*' * 20) def needs_datalog(self, prog): if not self.settings.has_directions: @@ -1531,19 +1548,21 @@ def needs_datalog(self, prog): return True return False + def popper(settings, tester, bkcons): Popper(settings, tester).run(bkcons) + def get_bk_cons(settings, tester): bkcons = [] with settings.stats.duration('find_pointless_relations'): pointless = settings.pointless = tester.find_pointless_relations() - for p,a in pointless: + for p, a in pointless: if settings.showcons: print('remove pointless relation', p, a) - settings.body_preds.remove((p,a)) + settings.body_preds.remove((p, a)) # if settings.datalog: settings.logger.debug(f'Loading recalls') @@ -1567,14 +1586,12 @@ def get_bk_cons(settings, tester): print('singletons', x) bkcons.extend(xs) - type_cons = tuple(deduce_type_cons(settings)) if settings.showcons: for x in type_cons: print('type_con', x) bkcons.extend(type_cons) - if not settings.datalog: settings.logger.debug(f'Loading recalls FAILURE') else: @@ -1600,6 +1617,7 @@ def handler(signum, frame): bkcons.extend(xs) return bkcons + def learn_solution(settings): t1 = time.time() settings.nonoise = not settings.noisy @@ -1609,12 +1627,13 @@ def learn_solution(settings): tester = Tester(settings) bkcons = get_bk_cons(settings, tester) - time_so_far = time.time()-t1 - timeout(settings, popper, (settings, tester, bkcons), timeout_duration=int(settings.timeout-time_so_far),) + print("Have incorporated bk cons.") + time_so_far = time.time() - t1 + timeout(settings, popper, (settings, tester, bkcons), timeout_duration=int(settings.timeout - time_so_far), ) return settings.solution, settings.best_prog_score, settings.stats -def generalisations(prog, allow_headless=True, recursive=False): +def generalisations(prog, allow_headless=True, recursive=False): if len(prog) == 1: rule = list(prog)[0] head, body = rule @@ -1631,7 +1650,7 @@ def generalisations(prog, allow_headless=True, recursive=False): # do not remove recursive literals if recursive and body[i].predicate == head.predicate: continue - new_body = body[:i] + body[i+1:] + new_body = body[:i] + body[i + 1:] new_rule = (head, frozenset(new_body)) new_prog = [new_rule] yield new_prog @@ -1642,9 +1661,10 @@ def generalisations(prog, allow_headless=True, recursive=False): subrule = prog[i] recursive = rule_is_recursive(subrule) for new_subrule in generalisations([subrule], allow_headless=False, recursive=recursive): - new_prog = prog[:i] + new_subrule + prog[i+1:] + new_prog = prog[:i] + new_subrule + prog[i + 1:] yield new_prog + def tmp(prog): for rule in prog: head, body = rule @@ -1654,6 +1674,7 @@ def tmp(prog): return False return True + def explain_none_functional(settings, tester, prog): new_cons = [] @@ -1683,12 +1704,13 @@ def explain_none_functional(settings, tester, prog): for r1 in base: for r2 in rec: - subprog = frozenset([r1,r2]) + subprog = frozenset([r1, r2]) if tester.is_non_functional(subprog): new_cons.append((Constraint.GENERALISATION, subprog)) return new_cons + # TODO: THIS CHECK IS NOT COMPLETE # IT DOES NOT ACCOUNT FOR VARIABLE RENAMING # R1 = (None, frozenset({('c3', ('A',)), ('c2', ('A',))})) @@ -1701,13 +1723,16 @@ def rule_subsumes(r1, r2): return False return b1.issubset(b2) + # P1 subsumes P2 if for every rule R2 in P2 there is a rule R1 in P1 such that R1 subsumes R2 def theory_subsumes(prog1, prog2): return all(any(rule_subsumes(r1, r2) for r1 in prog1) for r2 in prog2) + def seen_more_general_unsat(prog, unsat): return any(theory_subsumes(seen, prog) for seen in unsat) + def head_connected(rule): head, body = rule head_connected_vars = set(head.arguments) @@ -1716,7 +1741,7 @@ def head_connected(rule): while body_literals: connected = [] for literal in body_literals: - if any (x in head_connected_vars for x in literal.arguments): + if any(x in head_connected_vars for x in literal.arguments): head_connected_vars.update(literal.arguments) connected.append(literal) if not connected and body_literals: @@ -1724,6 +1749,7 @@ def head_connected(rule): body_literals.difference_update(connected) return True + def connected(body): if len(body) == 1: return True @@ -1735,7 +1761,7 @@ def connected(body): while body_literals: connected = [] for literal in body_literals: - if any (x in connected_vars for x in literal.arguments): + if any(x in connected_vars for x in literal.arguments): connected_vars.update(literal.arguments) connected.append(literal) if not connected and body_literals: @@ -1743,6 +1769,7 @@ def connected(body): body_literals.difference_update(connected) return True + def non_empty_powerset(iterable): s = tuple(iterable) - return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)) + return chain.from_iterable(combinations(s, r) for r in range(1, len(s) + 1)) diff --git a/popper/tester.py b/popper/tester.py index 49a9ca5f..57b442d1 100644 --- a/popper/tester.py +++ b/popper/tester.py @@ -1,14 +1,17 @@ import os -import time -import pkg_resources -from janus_swi import query_once, consult -from functools import cache +from collections import defaultdict from contextlib import contextmanager -from . util import order_prog, prog_is_recursive, rule_is_recursive, calc_rule_size, calc_prog_size, prog_hash, format_rule, format_literal, Literal +from functools import cache +from itertools import product +from typing import Tuple + +import pkg_resources from bitarray import bitarray, frozenbitarray from bitarray.util import ones -from collections import defaultdict -from itertools import product +from janus_swi import consult, query_once + +from .util import Literal, calc_prog_size, calc_rule_size, format_rule, order_prog, prog_hash, prog_is_recursive + def format_literal_janus(literal): args = ','.join(f'_V{i}' for i in literal.arguments) @@ -84,7 +87,7 @@ def test_prog_noisy(self, prog, prog_size): settings = self.settings neg_covered = None skipped, skip_early_neg = False, False - inconsistent = False + inconsistent: bool = False if settings.recursion_enabled or settings.pi_enabled: pos_covered, neg_covered = self.test_prog_all(prog) @@ -111,8 +114,9 @@ def test_prog_noisy(self, prog, prog_size): return pos_covered, neg_covered, inconsistent, skipped, skip_early_neg - def test_prog(self, prog): + def test_prog(self, prog) -> Tuple[frozenbitarray, bool]: + # sourcery skip: no-conditionals-in-tests if self.settings.recursion_enabled or self.settings.pi_enabled: if len(prog) == 1: @@ -156,7 +160,7 @@ def test_prog(self, prog): self.cached_pos_covered[hash(prog)] = pos_covered return pos_covered, inconsistent - def test_prog_all(self, prog): + def test_prog_all(self, prog) -> Tuple[frozenbitarray, frozenbitarray]: if len(prog) == 1: atom_str, body_str = self.parse_single_rule(prog) @@ -174,15 +178,15 @@ def test_prog_all(self, prog): pos_covered_bits = bitarray(self.num_pos) pos_covered_bits[pos_covered] = 1 - pos_covered = frozenbitarray(pos_covered_bits) + pos_covered_arr: frozenbitarray = frozenbitarray(pos_covered_bits) neg_covered_bits = bitarray(self.num_neg) neg_covered_bits[neg_covered] = 1 - neg_covered = frozenbitarray(neg_covered_bits) + neg_covered_arr: frozenbitarray = frozenbitarray(neg_covered_bits) - return pos_covered, neg_covered + return pos_covered_arr, neg_covered_arr - def test_prog_pos(self, prog): + def test_prog_pos(self, prog) -> frozenbitarray: if len(prog) == 1: atom_str, body_str = self.parse_single_rule(prog) @@ -192,7 +196,7 @@ def test_prog_pos(self, prog): with self.using(prog): pos_covered = query_once('pos_covered(S)')['S'] - pos_covered_bits = bitarray(self.num_pos) + pos_covered_bits: bitarray = bitarray(self.num_pos) pos_covered_bits[pos_covered] = 1 pos_covered = frozenbitarray(pos_covered_bits) return pos_covered @@ -374,35 +378,35 @@ def has_redundant_literal(self, prog): # print(q, False) return False - # # WE ASSUME THAT THERE IS A REUNDANT RULE - def find_redundant_rule_(self, prog): - prog_ = [] - for i, (head, body) in enumerate(prog): - c = f"{i}-[{','.join(('not_'+ format_literal(head),) + tuple(format_literal(lit) for lit in body))}]" - prog_.append(c) - prog_ = f"[{','.join(prog_)}]" - prog_ = janus_format_rule(prog_) - q = f'find_redundant_rule({prog_}, K1, K2)' - res = query_once(q) - k1 = res['K1'] - k2 = res['K2'] - return prog[k1], prog[k2] - - def find_redundant_rules(self, prog): - # assert(False) - # AC: if the overhead of this call becomes too high, such as when learning programs with lots of clauses, we can improve it by not comparing already compared clauses - base = [] - step = [] - for rule in prog: - if rule_is_recursive(rule): - step.append(rule) - else: - base.append(rule) - if len(base) > 1 and self.has_redundant_rule(base): - return self.find_redundant_rule_(base) - if len(step) > 1 and self.has_redundant_rule(step): - return self.find_redundant_rule_(step) - return None + # # WE ASSUME THAT THERE IS A REDUNDANT RULE + # def find_redundant_rule_(self, prog): + # prog_ = [] + # for i, (head, body) in enumerate(prog): + # c = f"{i}-[{','.join(('not_'+ format_literal(head),) + tuple(format_literal(lit) for lit in body))}]" + # prog_.append(c) + # prog_ = f"[{','.join(prog_)}]" + # prog_ = janus_format_rule(prog_) + # q = f'find_redundant_rule({prog_}, K1, K2)' + # res = query_once(q) + # k1 = res['K1'] + # k2 = res['K2'] + # return prog[k1], prog[k2] + # + # def find_redundant_rules(self, prog): + # # assert(False) + # # AC: if the overhead of this call becomes too high, such as when learning programs with lots of clauses, we can improve it by not comparing already compared clauses + # base = [] + # step = [] + # for rule in prog: + # if rule_is_recursive(rule): + # step.append(rule) + # else: + # base.append(rule) + # if len(base) > 1 and self.has_redundant_rule(base): + # return self.find_redundant_rule_(base) + # if len(step) > 1 and self.has_redundant_rule(step): + # return self.find_redundant_rule_(step) + # return None def find_pointless_relations(self): settings = self.settings @@ -452,7 +456,7 @@ def find_pointless_relations(self): if query_once(query1)['truth'] or query_once(query2)['truth']: continue except Exception as Err: - print('ERROR detecting pointless relations', Err) + settings.logger.error(f'ERROR detecting pointless relations: {Err}') return pointless a, b = (p,pa), (q,qa) diff --git a/popper/util.py b/popper/util.py index a756c051..cf53cf41 100644 --- a/popper/util.py +++ b/popper/util.py @@ -1,36 +1,38 @@ -import clingo -import clingo.script -import signal import argparse -import os import logging -from itertools import permutations, chain, combinations +import signal from collections import defaultdict -from typing import NamedTuple -from time import perf_counter from contextlib import contextmanager +from itertools import permutations +from time import perf_counter +from typing import NamedTuple + +import clingo +import clingo.script + class Literal(NamedTuple): predicate: str arguments: tuple + clingo.script.enable_python() -TIMEOUT=1200 -EVAL_TIMEOUT=0.001 -MAX_LITERALS=40 -MAX_SOLUTIONS=1 -CLINGO_ARGS='' -MAX_RULES=2 -MAX_VARS=6 -MAX_BODY=6 -MAX_EXAMPLES=10000 -BATCH_SIZE=20000 -ANYTIME_TIMEOUT=10 -BKCONS_TIMEOUT=10 +TIMEOUT = 1200 +EVAL_TIMEOUT = 0.001 +MAX_LITERALS = 40 +MAX_SOLUTIONS = 1 +CLINGO_ARGS = '' +MAX_RULES = 2 +MAX_VARS = 6 +MAX_BODY = 6 +MAX_EXAMPLES = 10000 +BATCH_SIZE = 20000 +ANYTIME_TIMEOUT = 10 +BKCONS_TIMEOUT = 10 +savings = 0 -savings=0 class Constraint: GENERALISATION = 1 @@ -41,26 +43,38 @@ class Constraint: TMP_ANDY = 6 BANISH = 7 + def parse_args(): parser = argparse.ArgumentParser(description='Popper is an ILP system based on learning from failures') parser.add_argument('kbpath', help='Path to files to learn from') parser.add_argument('--noisy', default=False, action='store_true', help='tell Popper that there is noise') # parser.add_argument('--bkcons', default=False, action='store_true', help='deduce background constraints from Datalog background (EXPERIMENTAL!)') - parser.add_argument('--timeout', type=float, default=TIMEOUT, help=f'Overall timeout in seconds (default: {TIMEOUT})') - parser.add_argument('--max-literals', type=int, default=MAX_LITERALS, help=f'Maximum number of literals allowed in program (default: {MAX_LITERALS})') - parser.add_argument('--max-body', type=int, default=None, help=f'Maximum number of body literals allowed in rule (default: {MAX_BODY})') - parser.add_argument('--max-vars', type=int, default=None, help=f'Maximum number of variables allowed in rule (default: {MAX_VARS})') - parser.add_argument('--max-rules', type=int, default=None, help=f'Maximum number of rules allowed in a recursive program (default: {MAX_RULES})') - parser.add_argument('--eval-timeout', type=float, default=EVAL_TIMEOUT, help=f'Prolog evaluation timeout in seconds (default: {EVAL_TIMEOUT})') + parser.add_argument('--timeout', type=float, default=TIMEOUT, + help=f'Overall timeout in seconds (default: {TIMEOUT})') + parser.add_argument('--max-literals', type=int, default=MAX_LITERALS, + help=f'Maximum number of literals allowed in program (default: {MAX_LITERALS})') + parser.add_argument('--max-body', type=int, default=None, + help=f'Maximum number of body literals allowed in rule (default: {MAX_BODY})') + parser.add_argument('--max-vars', type=int, default=None, + help=f'Maximum number of variables allowed in rule (default: {MAX_VARS})') + parser.add_argument('--max-rules', type=int, default=None, + help=f'Maximum number of rules allowed in a recursive program (default: {MAX_RULES})') + parser.add_argument('--eval-timeout', type=float, default=EVAL_TIMEOUT, + help=f'Prolog evaluation timeout in seconds (default: {EVAL_TIMEOUT})') parser.add_argument('--stats', default=False, action='store_true', help='Print statistics at end of execution') parser.add_argument('--quiet', '-q', default=False, action='store_true', help='Hide information during learning') parser.add_argument('--debug', default=False, action='store_true', help='Print debugging information to stderr') - parser.add_argument('--showcons', default=False, action='store_true', help='Show constraints deduced during the search') - parser.add_argument('--solver', default='rc2', choices=['rc2', 'uwr', 'wmaxcdcl'], help='Select a solver for the combine stage (default: rc2)') - parser.add_argument('--anytime-solver', default=None, choices=['wmaxcdcl', 'nuwls'], help='Select an anytime MaxSAT solver (default: None)') - parser.add_argument('--anytime-timeout', type=int, default=ANYTIME_TIMEOUT, help=f'Maximum timeout (seconds) for each anytime MaxSAT call (default: {ANYTIME_TIMEOUT})') - parser.add_argument('--batch-size', type=int, default=BATCH_SIZE, help=f'Combine batch size (default: {BATCH_SIZE})') + parser.add_argument('--showcons', default=False, action='store_true', + help='Show constraints deduced during the search') + parser.add_argument('--solver', default='rc2', choices=['clingo', 'rc2', 'uwr', 'wmaxcdcl'], + help='Select a solver for the combine stage (default: rc2)') + parser.add_argument('--anytime-solver', default=None, choices=['wmaxcdcl', 'nuwls'], + help='Select an anytime MaxSAT solver (default: None)') + parser.add_argument('--anytime-timeout', type=int, default=ANYTIME_TIMEOUT, + help=f'Maximum timeout (seconds) for each anytime MaxSAT call (default: {ANYTIME_TIMEOUT})') + parser.add_argument('--batch-size', type=int, default=BATCH_SIZE, + help=f'Combine batch size (default: {BATCH_SIZE})') parser.add_argument('--functional-test', default=False, action='store_true', help='Run functional test') # parser.add_argument('--datalog', default=False, action='store_true', help='EXPERIMENTAL FEATURE: use recall to order literals in rules') # parser.add_argument('--no-bias', default=False, action='store_true', help='EXPERIMENTAL FEATURE: do not use language bias') @@ -68,8 +82,10 @@ def parse_args(): return parser.parse_args() + def timeout(settings, func, args=(), kwargs={}, timeout_duration=1): result = None + class TimeoutError(Exception): pass @@ -94,14 +110,17 @@ def handler(signum, frame): return result + def load_kbpath(kbpath): def fix_path(filename): full_filename = os.path.join(kbpath, filename) return full_filename.replace('\\', '\\\\') if os.name == 'nt' else full_filename + return fix_path("bk.pl"), fix_path("exs.pl"), fix_path("bias.pl") + class Stats: - def __init__(self, info = False, debug = False): + def __init__(self, info=False, debug=False): self.exec_start = perf_counter() self.total_programs = 0 self.durations = {} @@ -114,7 +133,7 @@ def show(self): total_op_time = sum(summary.total for summary in self.duration_summary()) for summary in self.duration_summary(): - percentage = int((summary.total/total_op_time)*100) + percentage = int((summary.total / total_op_time) * 100) message += f'{summary.operation}:\n\tCalled: {summary.called} times \t ' + \ f'Total: {summary.total:0.2f} \t Mean: {summary.mean:0.4f} \t ' + \ f'Max: {summary.maximum:0.3f} \t Percentage: {percentage}%\n' @@ -125,11 +144,11 @@ def show(self): def duration_summary(self): summary = [] - stats = sorted(self.durations.items(), key = lambda x: sum(x[1]), reverse=True) + stats = sorted(self.durations.items(), key=lambda x: sum(x[1]), reverse=True) for operation, durations in stats: called = len(durations) total = sum(durations) - mean = sum(durations)/len(durations) + mean = sum(durations) / len(durations) maximum = max(durations) summary.append(DurationSummary(operation.title(), called, total, mean, maximum)) return summary @@ -150,13 +169,14 @@ def duration(self, operation): # def format_prog2(prog): - # return '\n'.join(format_rule(order_rule2(rule)) for rule in order_prog(prog)) +# return '\n'.join(format_rule(order_rule2(rule)) for rule in order_prog(prog)) def format_literal(literal): pred, args = literal args = ','.join(f'V{i}' for i in args) return f'{pred}({args})' + def format_rule(rule): head, body = rule head_str = '' @@ -165,13 +185,16 @@ def format_rule(rule): body_str = ','.join(format_literal(literal) for literal in body) return f'{head_str}:- {body_str}.' + def calc_prog_size(prog): return sum(calc_rule_size(rule) for rule in prog) + def calc_rule_size(rule): head, body = rule return 1 + len(body) + def reduce_prog(prog): reduced = {} for rule in prog: @@ -180,9 +203,11 @@ def reduce_prog(prog): reduced[k] = rule return reduced.values() + def order_prog(prog): return sorted(list(prog), key=lambda rule: (rule_is_recursive(rule), len(rule[1]))) + def rule_is_recursive(rule): head, body = rule head_pred, _head_args = head @@ -190,16 +215,19 @@ def rule_is_recursive(rule): return False return any(head_pred == pred for pred, _args in body) + def prog_is_recursive(prog): if len(prog) < 2: return False return any(rule_is_recursive(rule) for rule in prog) + def prog_has_invention(prog): if len(prog) < 2: return False return any(rule_is_invented(rule) for rule in prog) + def rule_is_invented(rule): head, body = rule if not head: @@ -207,9 +235,11 @@ def rule_is_invented(rule): head_pred, _head_arg = head return head_pred.startswith('inv') + def mdl_score(fn, fp, size): return fn + fp + size + class DurationSummary: def __init__(self, operation, called, total, mean, maximum): self.operation = operation @@ -218,11 +248,17 @@ def __init__(self, operation, called, total, mean, maximum): self.mean = mean self.maximum = maximum + def flatten(xs): return [item for sublist in xs for item in sublist] + class Settings: - def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, max_rules=None, max_vars=None, functional_test=False, kbpath=False, ex_file=False, bk_file=False, bias_file=False, showcons=False, no_bias=False, order_space=False, noisy=False, batch_size=BATCH_SIZE, solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT): + def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, + timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, + max_rules=None, max_vars=None, functional_test=False, kbpath=False, ex_file=False, bk_file=False, + bias_file=False, showcons=False, no_bias=False, order_space=False, noisy=False, batch_size=BATCH_SIZE, + solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT): if cmd_line: args = parse_args() @@ -331,8 +367,6 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ for x in solver.symbolic_atoms.by_signature('non_datalog', arity=0): self.non_datalog_flag = True - - # read directions from bias file when there is no PI # if not self.pi_enabled: self.directions = directions = defaultdict(dict) @@ -348,7 +382,6 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ arg_dir = '-' directions[pred][i] = arg_dir - self.max_arity = 0 for x in solver.symbolic_atoms.by_signature('head_pred', arity=2): self.max_arity = max(self.max_arity, x.symbol.arguments[1].number) @@ -395,13 +428,12 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ exit() # self.body_modes[pred] = tuple(directions[pred][i] for i in range(arity)) - # TODO: EVENTUALLY # print(directions) self.cached_atom_args = {} - for i in range(1, self.max_arity+1): + for i in range(1, self.max_arity + 1): for args in permutations(range(0, self.max_vars), i): k = tuple(clingo.Number(x) for x in args) self.cached_atom_args[k] = args @@ -426,11 +458,13 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ literal = Literal(pred, args) self.cached_literals[(pred, k)] = literal if self.has_directions: - self.literal_inputs[(pred, args)] = frozenset(arg for i, arg in enumerate(args) if directions[pred][i] == '+') - self.literal_outputs[(pred, args)] = frozenset(arg for i, arg in enumerate(args) if directions[pred][i] == '-') + self.literal_inputs[(pred, args)] = frozenset( + arg for i, arg in enumerate(args) if directions[pred][i] == '+') + self.literal_outputs[(pred, args)] = frozenset( + arg for i, arg in enumerate(args) if directions[pred][i] == '-') # for k, vs in self.literal_inputs.items(): - # print(k, vs) + # print(k, vs) # print('head_inputs', head_inputs) # print('head_outputs', head_outputs) # exit() @@ -452,18 +486,15 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.head_types, self.body_types = load_types(self) - if len(self.body_types) > 0 or not self.head_types is None: if self.head_types is None: print('WARNING: MISSING HEAD TYPE') # exit() - for p,a in self.body_preds: + for p, a in self.body_preds: if p not in self.body_types: print(f'WARNING: MISSING BODY TYPE FOR {p}') # exit() - - self.single_solve = not (self.recursion_enabled or self.pi_enabled) self.logger.debug(f'Max rules: {self.max_rules}') @@ -473,34 +504,35 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.single_solve = not (self.recursion_enabled or self.pi_enabled) def print_incomplete_solution2(self, prog, tp, fn, tn, fp, size): - self.logger.info('*'*20) + self.logger.info('*' * 20) self.logger.info('New best hypothesis:') if self.noisy: - self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size+fn+fp}') + self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size} mdl:{size + fn + fp}') else: self.logger.info(f'tp:{tp} fn:{fn} tn:{tn} fp:{fp} size:{size}') for rule in order_prog(prog): self.logger.info(format_rule(self.order_rule(rule))) - self.logger.info('*'*20) + self.logger.info('*' * 20) def print_prog_score(self, prog, score): tp, fn, tn, fp, size = score precision = 'n/a' - if (tp+fp) > 0: - precision = f'{tp / (tp+fp):0.2f}' + if (tp + fp) > 0: + precision = f'{tp / (tp + fp):0.2f}' recall = 'n/a' - if (tp+fn) > 0: - recall = f'{tp / (tp+fn):0.2f}' - print('*'*10 + ' SOLUTION ' + '*'*10) + if (tp + fn) > 0: + recall = f'{tp / (tp + fn):0.2f}' + print('*' * 10 + ' SOLUTION ' + '*' * 10) if self.noisy: - print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size} MDL:{size+fn+fp}') + print( + f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size} MDL:{size + fn + fp}') else: - print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size}') + print(f'Precision:{precision} Recall:{recall} TP:{tp} FN:{fn} TN:{tn} FP:{fp} Size:{size}') # print(self.format_prog(order_prog(prog))) for rule in order_prog(prog): print(format_rule(self.order_rule(rule))) # print(self.format_prog(order_prog(prog))) - print('*'*30) + print('*' * 30) def order_rule(self, rule): head, body = rule @@ -514,7 +546,6 @@ def order_rule(self, rule): if not self.has_directions: return rule - ordered_body = [] grounded_variables = set() @@ -527,7 +558,6 @@ def order_rule(self, rule): body_literals = set(body) - while body_literals: selected_literal = None for literal in body_literals: @@ -595,6 +625,7 @@ def tmp_score_(self, seen_vars, literal): pred, args = literal return self.recall[pred, tuple(1 if x in seen_vars else 0 for x in args)] + # def non_empty_powerset(iterable): # s = tuple(iterable) # return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1)) @@ -632,6 +663,7 @@ def load_types(settings): return head_types, body_types + # def bias_order(settings, max_size): # if not (settings.no_bias or settings.order_space): @@ -704,6 +736,8 @@ def load_types(settings): # return True import os + + # AC: I do not know what this code below really does, but it works class suppress_stdout_stderr(object): ''' @@ -715,25 +749,27 @@ class suppress_stdout_stderr(object): exited (at least, I think that is why it lets exceptions through). ''' + def __init__(self): # Open a pair of null files - self.null_fds = [os.open(os.devnull,os.O_RDWR) for x in range(2)] + self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] # Save the actual stdout (1) and stderr (2) file descriptors. self.save_fds = [os.dup(1), os.dup(2)] def __enter__(self): # Assign the null pointers to stdout and stderr. - os.dup2(self.null_fds[0],1) - os.dup2(self.null_fds[1],2) + os.dup2(self.null_fds[0], 1) + os.dup2(self.null_fds[1], 2) def __exit__(self, *_): # Re-assign the real stdout/stderr back to (1) and (2) - os.dup2(self.save_fds[0],1) - os.dup2(self.save_fds[1],2) + os.dup2(self.save_fds[0], 1) + os.dup2(self.save_fds[1], 2) # Close all file descriptors for fd in self.null_fds + self.save_fds: os.close(fd) + def rename_variables(rule): head, body = rule if head: @@ -751,11 +787,12 @@ def rename_variables(rule): continue elif var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_body.append((pred, tuple(new_args))) return (head, new_body) + def get_raw_prog(prog): xs = set() for rule in prog: @@ -763,18 +800,19 @@ def get_raw_prog(prog): xs.add((h, frozenset(b))) return frozenset(xs) + def prog_hash(prog): new_prog = get_raw_prog(prog) return hash(new_prog) + def remap_variables(rule): head, body = rule head_vars = frozenset(head.arguments) if head else frozenset() - next_var = len(head_vars) - lookup = {i:i for i in head_vars} + lookup = {i: i for i in head_vars} new_body = [] for pred, args in body: @@ -782,12 +820,13 @@ def remap_variables(rule): for var in args: if var not in lookup: lookup[var] = next_var - next_var+=1 + next_var += 1 new_args.append(lookup[var]) new_atom = Literal(pred, tuple(new_args)) new_body.append(new_atom) return head, frozenset(new_body) + def format_prog(prog): - return '\n'.join(format_rule(rule) for rule in prog) \ No newline at end of file + return '\n'.join(format_rule(rule) for rule in prog) From 9b4fabbf8883032df507bdffdf6411eeeb8396a0 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Tue, 13 May 2025 13:28:39 -0500 Subject: [PATCH 02/18] Additional linter repairs Removed mutable default values. Removed equality comparisons against None. --- popper/loop.py | 17 ++++++++++++----- popper/util.py | 6 ++++-- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/popper/loop.py b/popper/loop.py index 649cc03d..8d6aeefc 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -291,7 +291,8 @@ def run(self, bkcons): # print('\t', 'moo', format_rule(rule)) # new_cons.append((Constraint.SPECIALISATION, [functional_rename_vars(rule) for rule in x])) - # If a program is subsumed or dioesn't cover enough examples, we search for the most general subprogram that also is also subsumed or doesn't cover enough examples + # If a program is subsumed or doesn't cover enough examples, we search for the most general + # subprogram that is also subsumed or doesn't cover enough examples # only applies to non-recursive and non-PI programs subsumed_progs = [] with settings.stats.duration('find most general subsumed/covers_too_few'): @@ -635,7 +636,7 @@ def run(self, bkcons): to_combine = set() - new_hypothesis_found = is_new_solution_found != None + new_hypothesis_found = is_new_solution_found is not None # if we find a new solution, update the maximum program size # if only adding nogoods, eliminate larger programs @@ -728,7 +729,7 @@ def run(self, bkcons): is_new_solution_found = combiner.update_best_prog(to_combine) to_combine = set() - new_hypothesis_found = is_new_solution_found != None + new_hypothesis_found = is_new_solution_found is not None # if we find a new solution, update the maximum program size # if only adding nogoods, eliminate larger programs @@ -1171,7 +1172,7 @@ def build_constraints_previous_hypotheses(self, score, best_size): seen_hyp_gen[k].remove(to_del) return cons - def subsumed_or_covers_too_few(self, prog, seen=set()): + def subsumed_or_covers_too_few(self, prog, seen:Optional[Set] = None): tester, success_sets, settings = self.tester, self.success_sets, self.settings head, body = list(prog)[0] body = list(body) @@ -1180,6 +1181,8 @@ def subsumed_or_covers_too_few(self, prog, seen=set()): return [] out = set() + if seen is None: + seen = set() head_vars = set(head.arguments) # try the body with one literal removed (the literal at position i) @@ -1290,10 +1293,14 @@ def build_test_prog(self, subprog): def explain_totally_incomplete(self, prog): return list(self.explain_totally_incomplete_aux2(prog, set(), set())) - def explain_totally_incomplete_aux2(self, prog, unsat2=set(), unsat=set()): + def explain_totally_incomplete_aux2(self, prog, unsat2: Optional[Set] = None, unsat: Optional[Set] = None): has_recursion = prog_is_recursive(prog) out = [] + if unsat is None: + unsat = set() + if unsat2 is None: + unsat2 = set() for subprog in generalisations(prog, allow_headless=True, recursive=has_recursion): # print('---') diff --git a/popper/util.py b/popper/util.py index cf53cf41..d5ed21ed 100644 --- a/popper/util.py +++ b/popper/util.py @@ -5,7 +5,7 @@ from contextlib import contextmanager from itertools import permutations from time import perf_counter -from typing import NamedTuple +from typing import NamedTuple, Optional, Dict import clingo import clingo.script @@ -83,8 +83,10 @@ def parse_args(): return parser.parse_args() -def timeout(settings, func, args=(), kwargs={}, timeout_duration=1): +def timeout(settings, func, args=(), kwargs: Optional[Dict] = None, timeout_duration=1): result = None + if kwargs is None: + kwargs = {} class TimeoutError(Exception): pass From 8b63e255e33d2c2cbff5ce286451dadd6495dfe3 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Tue, 13 May 2025 14:12:11 -0500 Subject: [PATCH 03/18] Fix README code snippet. Looking at the README in the PyCharm IDE, discovered that it incorrectly referenced `print_prog_score`. This is not a function, but a method on `Settings`. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c2bb6df3..1db3c0f6 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,11 @@ You can download and compile these solvers from the [MaxSAT 2023 evaluation](htt You can import Popper and use it in your Python code like so: ```python -from popper.util import Settings, print_prog_score +from popper.util import Settings from popper.loop import learn_solution settings = Settings(kbpath='input_dir') prog, score, stats = learn_solution(settings) if prog != None: - print_prog_score(prog, score) + settings.print_prog_score(prog, score) ``` From d34ce59718bb674151f0fb16b5e83564c40d2dba Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Mon, 12 May 2025 15:11:50 -0500 Subject: [PATCH 04/18] Modify logging. Change to explicitly make the default logging level be WARNING. --- popper/util.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/popper/util.py b/popper/util.py index d5ed21ed..9c0d24e3 100644 --- a/popper/util.py +++ b/popper/util.py @@ -256,11 +256,16 @@ def flatten(xs): class Settings: + + showcons: bool + datalog: bool + show_failures: bool # display detailed FP and FN information + def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, max_rules=None, max_vars=None, functional_test=False, kbpath=False, ex_file=False, bk_file=False, bias_file=False, showcons=False, no_bias=False, order_space=False, noisy=False, batch_size=BATCH_SIZE, - solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT): + solver='rc2', anytime_solver=None, anytime_timeout=ANYTIME_TIMEOUT, show_failures=False): if cmd_line: args = parse_args() @@ -299,14 +304,15 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ self.logger = logging.getLogger("popper") if quiet: - pass + log_level = logging.ERROR elif debug: log_level = logging.DEBUG # logging.basicConfig(format='%(asctime)s %(message)s', level=log_level, datefmt='%H:%M:%S') logging.basicConfig(format='%(message)s', level=log_level, datefmt='%H:%M:%S') elif info: log_level = logging.INFO - logging.basicConfig(format='%(asctime)s %(message)s', level=log_level, datefmt='%H:%M:%S') + else: + log_level = logging.WARNING self.info = info self.debug = debug @@ -516,9 +522,9 @@ def print_incomplete_solution2(self, prog, tp, fn, tn, fp, size): self.logger.info(format_rule(self.order_rule(rule))) self.logger.info('*' * 20) - def print_prog_score(self, prog, score): + def print_prog_score(self, prog, score: Tuple[int, int, int, int, int]) -> None: tp, fn, tn, fp, size = score - precision = 'n/a' + precision: str = 'n/a' if (tp + fp) > 0: precision = f'{tp / (tp + fp):0.2f}' recall = 'n/a' From 93cbd8d0d023f8fcb267c6de003d98b782275b6d Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Mon, 12 May 2025 15:24:10 -0500 Subject: [PATCH 05/18] Refactor generators. --- popper/abs_generate.py | 37 +++++++++++++++++++++++++++ popper/gen2.py | 14 ++++++----- popper/gen3.py | 4 ++- popper/generate.py | 3 ++- popper/loop.py | 57 ++++++++++++++++++++++++++++-------------- 5 files changed, 88 insertions(+), 27 deletions(-) create mode 100644 popper/abs_generate.py diff --git a/popper/abs_generate.py b/popper/abs_generate.py new file mode 100644 index 00000000..55de0e6b --- /dev/null +++ b/popper/abs_generate.py @@ -0,0 +1,37 @@ +import abc + + +class Generator(abc.ABC): + + # @profile + def get_prog(self): + pass + + def gen_symbol(self, literal, backend): + pass + + def update_solver(self, size): + pass + self.update_number_of_literals(size) + + def update_number_of_literals(self, size): + pass + + def update_number_of_vars(self, size): + pass + + def update_number_of_rules(self, size): + pass + + def prune_size(self, size): + pass + + # @profile + def get_ground_rules(self, rule): + pass + + def parse_handles(self, new_handles): + pass + + def constrain(self, tmp_new_cons): + pass \ No newline at end of file diff --git a/popper/gen2.py b/popper/gen2.py index ce7a4f98..60fec8e4 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -8,7 +8,8 @@ import pkg_resources from clingo import Function, Number, Tuple_ -from .util import Constraint, Literal +from .util import Constraint, Literal, Settings +from . abs_generate import Generator as AbstractGenerator def arg_to_symbol(arg): @@ -20,7 +21,7 @@ def arg_to_symbol(arg): return Function(arg) -def atom_to_symbol(pred, args): +def atom_to_symbol(pred, args) -> Function: xs = tuple(arg_to_symbol(arg) for arg in args) return Function(name=pred, arguments=xs) @@ -35,9 +36,10 @@ def atom_to_symbol(pred, args): """ -class Generator: +class Generator(AbstractGenerator): + settings: Settings - def __init__(self, settings, bkcons=[]): + def __init__(self, settings: Settings, bkcons=[]): self.savings = 0 self.settings = settings self.cached_clingo_atoms = {} @@ -109,8 +111,8 @@ def __init__(self, settings, bkcons=[]): encoding = '\n'.join(encoding) - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) + with open('/tmp/ENCODING-GEN.pro', 'w') as f: + f.write(encoding) if self.settings.single_solve: solver = clingo.Control(['--heuristic=Domain', '-Wnone']) diff --git a/popper/gen3.py b/popper/gen3.py index 116ff5c8..76ae4932 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -10,6 +10,8 @@ import pkg_resources from collections import defaultdict from . util import rule_is_recursive, Constraint, Literal +from . abs_generate import Generator as AbstractGenerator + clingo.script.enable_python() from clingo import Function, Number, Tuple_ from itertools import permutations @@ -30,7 +32,7 @@ def atom_to_symbol(pred, args): xs = tuple(arg_to_symbol(arg) for arg in args) return Function(name = pred, arguments = xs) -class Generator: +class Generator(AbstractGenerator): def __init__(self, settings, bkcons=[]): self.settings = settings diff --git a/popper/generate.py b/popper/generate.py index 6a98646d..68ffb4b8 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -14,6 +14,7 @@ from clingo import Function, Number, Tuple_ from itertools import permutations import dataclasses +from . abs_generate import Generator as AbstractGenerator @dataclasses.dataclass(frozen=True) class Var: @@ -75,7 +76,7 @@ def build_rule_literals(rule, rule_var, pi=False): if rule_is_recursive(rule): yield gteq(rule_var, 1) -class Generator: +class Generator(AbstractGenerator): def __init__(self, settings, bkcons=[]): self.savings = 0 diff --git a/popper/loop.py b/popper/loop.py index 8d6aeefc..560aab6f 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -2,17 +2,21 @@ from collections import defaultdict from functools import cache from itertools import chain, combinations, permutations +from typing import Any, List, Optional, Set, Tuple from bitarray.util import any_and, ones, subset from .bkcons import deduce_bk_cons, deduce_non_singletons, deduce_recalls, deduce_type_cons from .combine import Combiner from .tester import Tester -from .util import Constraint, Literal, calc_prog_size, format_literal, format_prog, format_rule, get_raw_prog, \ +from .util import Constraint, Literal, Settings, calc_prog_size, format_literal, format_prog, format_rule, get_raw_prog, \ mdl_score, order_prog, prog_has_invention, prog_is_recursive, remap_variables, rule_is_recursive, timeout +from .abs_generate import Generator +from .generate import Generator as Generator1 +from .gen2 import Generator as Generator2 +from .gen3 import Generator as Generator3 - -def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): +def load_solver(settings: Settings, tester: Tester, coverage_pos, coverage_neg, prog_lookup): if settings.debug: settings.logger.debug(f'Load exact solver: {settings.solver}') @@ -67,7 +71,12 @@ def load_solver(settings, tester, coverage_pos, coverage_neg, prog_lookup): class Popper(): - def __init__(self, settings, tester): + settings: Settings + tester: Tester + # the following 2 type hints are conjectural -- rpg + num_pos: int + num_neg: int + def __init__(self, settings: Settings, tester: Tester): self.settings = settings self.tester = tester self.pruned2 = set() @@ -78,6 +87,21 @@ def __init__(self, settings, tester): self.tmp = {} self.seen_allsat = set() + def select_generator(self, bkcons): + settings = self.settings + gen: type[Generator] + if settings.single_solve: + gen = Generator2 + settings.logger.debug("using generator 2") + + elif settings.max_rules == 2 and not settings.pi_enabled: + gen = Generator3 + settings.logger.debug("using generator 3") + else: + gen = Generator1 + settings.logger.debug("using generate") + return gen(settings, bkcons) + def run(self, bkcons): settings, tester = self.settings, self.tester @@ -102,16 +126,7 @@ def run(self, bkcons): # generator that builds programs # AC: all very hacky until the refactoring is complete with settings.stats.duration('init'): - if settings.single_solve: - from .gen2 import Generator - print("using generator 2") - elif settings.max_rules == 2 and not settings.pi_enabled: - from .gen3 import Generator - print("using generator 3") - else: - from .generate import Generator - print("using generate") - generator = self.generator = Generator(settings, bkcons) + generator = self.generator = self.select_generator(bkcons) # track the success sets of tested hypotheses @@ -173,7 +188,6 @@ def run(self, bkcons): # generate a program with settings.stats.duration('generate'): prog = generator.get_prog() - print("Generated prog: {}".format(prog)) settings.logger.info("Generated prog: {}".format(prog)) if prog is None: break @@ -203,7 +217,7 @@ def run(self, bkcons): prog_size) else: pos_covered, inconsistent = tester.test_prog(prog) - print(f"pos_covered: {pos_covered}, inconsistent: {inconsistent}") + settings.logger.debug(f"pos_covered: {pos_covered}, inconsistent: {inconsistent}") # @CH: can you explain these? skipped, skip_early_neg = False, False @@ -258,7 +272,8 @@ def run(self, bkcons): if tp == num_pos: add_gen = True - # if the program does not cover any positive examples, check whether it is has an unsat core + # if the program does not cover any positive examples, check whether + # it has an unsat core if not has_invention: if tp < min_coverage or (settings.noisy and tp <= prog_size): with settings.stats.duration('find mucs'): @@ -1560,8 +1575,10 @@ def popper(settings, tester, bkcons): Popper(settings, tester).run(bkcons) -def get_bk_cons(settings, tester): +def get_bk_cons(settings: Settings, tester: Tester): bkcons = [] + recalls: Optional[List[str]] + pointless: Set[Tuple[str, Any]] with settings.stats.duration('find_pointless_relations'): pointless = settings.pointless = tester.find_pointless_relations() @@ -1576,9 +1593,11 @@ def get_bk_cons(settings, tester): with settings.stats.duration('recalls'): recalls = deduce_recalls(settings) - if recalls == None: + if recalls is None: + settings.logger.debug('No recalls; datalog is False') settings.datalog = False else: + settings.logger.debug('Non-empty recalls: datalog is True') settings.datalog = True if settings.showcons: for x in recalls: From 52ff2a89e0ba54a5e9ed4e602976a947c57e7d67 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 15 May 2025 13:57:55 -0500 Subject: [PATCH 06/18] Incremental Popper fixes. --- popper/abs_generate.py | 9 ++++++++- popper/generate.py | 39 ++++++++++++++++++++++----------------- popper/loop.py | 1 + popper/util.py | 7 +++++-- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index 55de0e6b..57c059df 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -1,10 +1,17 @@ import abc +from typing import FrozenSet, Tuple, TYPE_CHECKING +if TYPE_CHECKING: + from . util import Literal, Settings + +Rule = Tuple['Literal', FrozenSet['Literal']] +RuleBase = FrozenSet[Rule] class Generator(abc.ABC): + settings: 'Settings' # @profile - def get_prog(self): + def get_prog(self) -> RuleBase: pass def gen_symbol(self, literal, backend): diff --git a/popper/generate.py b/popper/generate.py index 68ffb4b8..325c0090 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -1,20 +1,24 @@ -import time import re -from pysat.formula import CNF -from pysat.solvers import Solver -from pysat.card import * -import clingo -import operator +from typing import Optional, Sequence, Set + import numbers +import operator +import re +from collections import defaultdict +from typing import Optional, Sequence, Set + +import clingo import clingo.script import pkg_resources -from collections import defaultdict -from . util import rule_is_recursive, Constraint, Literal, format_rule, remap_variables + +from .util import rule_is_recursive, Constraint, Literal, remap_variables + clingo.script.enable_python() -from clingo import Function, Number, Tuple_ +from clingo import Function, Number, Tuple_, Model, Symbol from itertools import permutations import dataclasses from . abs_generate import Generator as AbstractGenerator +from . abs_generate import Rule, RuleBase @dataclasses.dataclass(frozen=True) class Var: @@ -78,6 +82,7 @@ def build_rule_literals(rule, rule_var, pi=False): class Generator(AbstractGenerator): + model: Optional[Model] def __init__(self, settings, bkcons=[]): self.savings = 0 self.settings = settings @@ -245,13 +250,13 @@ def __init__(self, settings, bkcons=[]): self.solver = solver # @profile - def get_prog(self): + def get_prog(self) -> Optional[RuleBase]: if self.handle is None: self.handle = iter(self.solver.solve(yield_ = True)) self.model = next(self.handle, None) if self.model is None: return None - atoms = self.model.symbols(shown = True) + atoms: Sequence[Symbol] = self.model.symbols(shown = True) if self.settings.single_solve: return self.parse_model_single_rule(atoms) @@ -271,7 +276,7 @@ def gen_symbol(self, literal, backend): self.seen_symbols[k] = symbol return symbol - def parse_model_recursion(self, model): + def parse_model_recursion(self, model) -> RuleBase: settings = self.settings rule_index_to_body = defaultdict(set) head = settings.head_literal @@ -293,10 +298,10 @@ def parse_model_recursion(self, model): return frozenset(prog) - def parse_model_single_rule(self, model): + def parse_model_single_rule(self, model: Sequence[Symbol]) -> RuleBase: settings = self.settings - head = settings.head_literal - body = set() + head: Literal = settings.head_literal + body: Set[Literal] = set() cached_literals = settings.cached_literals for atom in model: args = atom.arguments @@ -304,10 +309,10 @@ def parse_model_single_rule(self, model): atom_args = tuple(args[3].arguments) literal = cached_literals[(predicate, atom_args)] body.add(literal) - rule = head, frozenset(body) + rule: Rule = head, frozenset(body) return frozenset([rule]) - def parse_model_pi(self, model): + def parse_model_pi(self, model) -> RuleBase: settings = self.settings # directions = defaultdict(lambda: defaultdict(lambda: '?')) rule_index_to_body = defaultdict(set) diff --git a/popper/loop.py b/popper/loop.py index 560aab6f..507a9668 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -186,6 +186,7 @@ def run(self, bkcons): new_cons = [] # generate a program + # prog: Optional[Program] with settings.stats.duration('generate'): prog = generator.get_prog() settings.logger.info("Generated prog: {}".format(prog)) diff --git a/popper/util.py b/popper/util.py index 9c0d24e3..2c32f537 100644 --- a/popper/util.py +++ b/popper/util.py @@ -5,11 +5,13 @@ from contextlib import contextmanager from itertools import permutations from time import perf_counter -from typing import NamedTuple, Optional, Dict +from typing import NamedTuple, Optional, Dict, Tuple, Any import clingo import clingo.script +from popper.abs_generate import Rule + class Literal(NamedTuple): predicate: str @@ -260,6 +262,7 @@ class Settings: showcons: bool datalog: bool show_failures: bool # display detailed FP and FN information + cached_literals: Dict[Tuple[str, Any], Literal] def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, @@ -814,7 +817,7 @@ def prog_hash(prog): return hash(new_prog) -def remap_variables(rule): +def remap_variables(rule: Tuple[Any, Any]) -> Rule: head, body = rule head_vars = frozenset(head.arguments) if head else frozenset() From 70bbb854f517dd5a8993f9369d81cbe5a3c7d41d Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 29 May 2025 12:47:39 -0500 Subject: [PATCH 07/18] Handle deprecation/removal of pkg_resources. Add `resource_string.py` that provides a compatibility layer between versions of Python that do and don't have the `pkg_resources` library, removed in version 3.12. --- popper/gen2.py | 4 ++-- popper/gen3.py | 6 +++--- popper/generate.py | 6 +++--- popper/resources.py | 26 ++++++++++++++++++++++++++ popper/tester.py | 8 +++++--- 5 files changed, 39 insertions(+), 11 deletions(-) create mode 100644 popper/resources.py diff --git a/popper/gen2.py b/popper/gen2.py index 30c2444d..f59a3a79 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -5,11 +5,11 @@ import clingo import clingo.script -import pkg_resources from clingo import Function, Number, Tuple_ from .util import Constraint, Literal, Settings from . abs_generate import Generator as AbstractGenerator +from .resources import resource_string def arg_to_symbol(arg): @@ -47,7 +47,7 @@ def __init__(self, settings: Settings, bkcons=[]): self.pruned_sizes = set() encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan.pl").decode() + alan = resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) with open(settings.bias_file) as f: diff --git a/popper/gen3.py b/popper/gen3.py index 76ae4932..540a1f77 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -7,10 +7,10 @@ import operator import numbers import clingo.script -import pkg_resources from collections import defaultdict from . util import rule_is_recursive, Constraint, Literal from . abs_generate import Generator as AbstractGenerator +from .resources import resource_string clingo.script.enable_python() from clingo import Function, Number, Tuple_ @@ -56,7 +56,7 @@ def __init__(self, settings, bkcons=[]): self.new_ground_cons = set() encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan-old.pl").decode() + alan = resource_string(__name__, "lp/alan-old.pl").decode() encoding.append(alan) with open(settings.bias_file) as f: @@ -717,4 +717,4 @@ def remap_variables(rule): new_atom = Literal(atom.predicate, tuple(new_args)) new_body.add(new_atom) - return head, frozenset(new_body) \ No newline at end of file + return head, frozenset(new_body) diff --git a/popper/generate.py b/popper/generate.py index 325c0090..c21ec76c 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -9,7 +9,6 @@ import clingo import clingo.script -import pkg_resources from .util import rule_is_recursive, Constraint, Literal, remap_variables @@ -19,6 +18,7 @@ import dataclasses from . abs_generate import Generator as AbstractGenerator from . abs_generate import Rule, RuleBase +from .resources import resource_string @dataclasses.dataclass(frozen=True) class Var: @@ -109,7 +109,7 @@ def __init__(self, settings, bkcons=[]): self.new_ground_cons = set() encoding = [] - alan = pkg_resources.resource_string(__name__, "lp/alan-old.pl").decode() + alan = resource_string(__name__, "lp/alan-old.pl").decode() # alan = pkg_resources.resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) @@ -1204,4 +1204,4 @@ def gteq(a, b): return Literal('>=', (a,b)) def body_size_literal(clause_var, body_size): - return Literal('body_size', (clause_var, body_size)) \ No newline at end of file + return Literal('body_size', (clause_var, body_size)) diff --git a/popper/resources.py b/popper/resources.py new file mode 100644 index 00000000..1fd0de74 --- /dev/null +++ b/popper/resources.py @@ -0,0 +1,26 @@ +from typing import Dict, Any + + +try: + from pkg_resources import resource_string, resource_filename + def close_resource_file(ref: Any): + pass +except: + import importlib.resources + from contextlib import ExitStack + from importlib.resources.abc import Traversable + def resource_string(package: str, path: str) -> bytes: + ref = importlib.resources.files(package).joinpath(path) + with ref.open('rb') as fp: + my_bytes = fp.read() + return my_bytes + managers: Dict[Traversable, ExitStack] = {} + def resource_filename(pkg: str, path: str) -> Traversable: + manager = ExitStack() + ref = manager.enter_context(importlib.resources.files(pkg) / path) + managers[ref] = manager + return ref + def close_resource_file(ref: Traversable): + if ref in managers: + managers[ref].close() + del managers[ref] diff --git a/popper/tester.py b/popper/tester.py index 57b442d1..2b65249f 100644 --- a/popper/tester.py +++ b/popper/tester.py @@ -5,12 +5,12 @@ from itertools import product from typing import Tuple -import pkg_resources from bitarray import bitarray, frozenbitarray from bitarray.util import ones from janus_swi import consult, query_once from .util import Literal, calc_prog_size, calc_rule_size, format_rule, order_prog, prog_hash, prog_is_recursive +from .resources import resource_string, resource_filename, close_resource_file def format_literal_janus(literal): @@ -27,7 +27,8 @@ def __init__(self, settings): bk_pl_path = self.settings.bk_file exs_pl_path = self.settings.ex_file - test_pl_path = pkg_resources.resource_filename(__name__, "lp/test.pl") + test_pl_path = str(resource_filename(__name__, "lp/test.pl")) + assert isinstance(test_pl_path, str) if not settings.pi_enabled: consult('prog', f':- dynamic {settings.head_literal.predicate}/{len(settings.head_literal.arguments)}.') @@ -36,6 +37,7 @@ def __init__(self, settings): if os.name == 'nt': # if on Windows, SWI requires escaped directory separators x = x.replace('\\', '\\\\') consult(x) + close_resource_file(x) query_once('load_examples') @@ -521,4 +523,4 @@ def deduce_neg_example_recalls(settings, atoms): return all_recalls def generate_binary_strings(bit_count): - return list(product((0,1), repeat=bit_count))[1:-1] \ No newline at end of file + return list(product((0,1), repeat=bit_count))[1:-1] From be1b476f5db776873c376df2c38c2e0868b702bc Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Wed, 4 Jun 2025 08:13:53 -0500 Subject: [PATCH 08/18] Factor out ASP solver initialization. Create two new methods, `build_encoding` and `init_solver` that encapsulate the set up of the solver. Did this for my own purposes, but it simplifies the code, as well, and could be used later to reduce code duplication. --- popper/abs_generate.py | 16 +++++- popper/gen2.py | 50 ++++++++++--------- popper/gen3.py | 67 ++++++++++++------------- popper/generate.py | 109 +++++++++++++++++++---------------------- 4 files changed, 122 insertions(+), 120 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index 57c059df..6cdc19e7 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -1,8 +1,10 @@ import abc -from typing import FrozenSet, Tuple, TYPE_CHECKING +from typing import FrozenSet, Tuple, TYPE_CHECKING, List + +import clingo if TYPE_CHECKING: - from . util import Literal, Settings + from . util import Settings Rule = Tuple['Literal', FrozenSet['Literal']] RuleBase = FrozenSet[Rule] @@ -41,4 +43,14 @@ def parse_handles(self, new_handles): pass def constrain(self, tmp_new_cons): + pass + + @abc.abstractmethod + def build_encoding(self, bkcons: List, settings: "Settings") -> str: + """Build and return a string for an ASP solver, used to generate hypotheses.""" + pass + + @abc.abstractmethod + def init_solver(self, encoding: str) -> clingo.Control: + """Incorporate the `encoding` into a new solver (`clingo.Control`) and return it.""" pass \ No newline at end of file diff --git a/popper/gen2.py b/popper/gen2.py index f59a3a79..ef06a644 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -1,7 +1,7 @@ import numbers import re from itertools import permutations -from typing import Any, Set, Tuple +from typing import Any, Set, Tuple, Optional import clingo import clingo.script @@ -19,6 +19,7 @@ def arg_to_symbol(arg): return Number(arg) if isinstance(arg, str): return Function(arg) + raise TypeError(f"Cannot translate {arg} to clingo Symbol") def atom_to_symbol(pred, args) -> Function: @@ -38,6 +39,9 @@ def atom_to_symbol(pred, args) -> Function: class Generator(AbstractGenerator): settings: Settings + model: Optional[clingo.Model] + solver: Optional[clingo.Control] + handler: Optional[clingo.SolveHandle] def __init__(self, settings: Settings, bkcons=[]): self.savings = 0 @@ -46,26 +50,41 @@ def __init__(self, settings: Settings, bkcons=[]): self.handle = None self.pruned_sizes = set() + encoding = self.build_encoding(bkcons, settings) + + with open('/tmp/ENCODING-GEN.pro', 'w') as f: + f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + self.model = None + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] alan = resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) - # AC: NEED TO COMPLETELY REFACTOR THIS CODE - for p,a in settings.pointless: + for p, a in settings.pointless: bias_text = re.sub(rf'body_pred\({p},\s*{a}\)\.', '', bias_text) bias_text = re.sub(rf'constant\({p},.*?\).*', '', bias_text, flags=re.MULTILINE) - encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -76,7 +95,6 @@ def __init__(self, settings: Settings, bkcons=[]): encoding.append(f'vars({arity}, {tuple(xs)}).') for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - type_encoding = set() if self.settings.head_types: types = tuple(self.settings.head_types) @@ -90,38 +108,22 @@ def __init__(self, settings: Settings, bkcons=[]): for i, x in enumerate(types): type_encoding.add(f'type_pos({str_types}, {i}, {x}).') encoding.extend(type_encoding) - for pred, xs in self.settings.directions.items(): for i, v in xs.items(): if v == '+': encoding.append(f'direction_({pred}, {i}, in).') if v == '-': encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.noisy: encoding.append(NOISY_ENCODING) - encoding.extend(bkcons) - if settings.single_solve: encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - with open('/tmp/ENCODING-GEN.pro', 'w') as f: - f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain', '-Wnone']) - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding def update_solver(self, size): # not used when learning programs without pi or recursion diff --git a/popper/gen3.py b/popper/gen3.py index 540a1f77..cd118672 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -55,20 +55,45 @@ def __init__(self, settings, bkcons=[]): self.new_seen_rules = set() self.new_ground_cons = set() + encoding = self.build_encoding(bkcons, settings) + + # with open('ENCODING-GEN.pl', 'w') as f: + # f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + else: + solver = clingo.Control(['-Wnone']) + NUM_OF_LITERALS = """ + %%% External atom for number of literals in the program %%%%% + #external size_in_literals(n). + :- + size_in_literals(n), + #sum{K+1,Clause : body_size(Clause,K)} != n. + """ + solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] alan = resource_string(__name__, "lp/alan-old.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -80,68 +105,38 @@ def __init__(self, settings, bkcons=[]): for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - # types = tuple(self.settings.head_types) # str_types = str(types).replace("'","") # for x, i in enumerate(self.settings.head_types): # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, types in self.settings.body_types.items(): # types = tuple(types) # str_types = str(types).replace("'","") # for i, x in enumerate(types): - # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, xs in self.settings.directions.items(): # for i, v in xs.items(): # if v == '+': # encoding.append(f'direction_({pred}, {i}, in).') # if v == '-': # encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.pi_enabled: encoding.append(f'#show head_literal/4.') - if settings.noisy: encoding.append(""" program_bounds(0..K):- max_size(K). program_size_at_least(M):- size(N), program_bounds(M), M <= N. """) - # if settings.bkcons: encoding.extend(bkcons) - if settings.single_solve: if settings.order_space: encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) - else: - solver = clingo.Control(['-Wnone']) - NUM_OF_LITERALS = """ - %%% External atom for number of literals in the program %%%%% - #external size_in_literals(n). - :- - size_in_literals(n), - #sum{K+1,Clause : body_size(Clause,K)} != n. - """ - solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding def get_prog(self): if self.handle is None: diff --git a/popper/generate.py b/popper/generate.py index c21ec76c..5cad3148 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -108,21 +108,65 @@ def __init__(self, settings, bkcons=[]): # TODO: dunno self.new_ground_cons = set() + encoding = self.build_encoding(bkcons, settings) + + # with open('ENCODING-GEN.pl', 'w') as f: + # f.write(encoding) + + solver = self.init_solver(encoding) + self.solver = solver + + def init_solver(self, encoding): + if self.settings.single_solve: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + else: + solver = clingo.Control(['-Wnone']) + NUM_OF_LITERALS = """ + %%% External atom for number of literals in the program %%%%% + #external size_in_literals(n). + :- + size_in_literals(n), + #sum{K+1,Clause : body_size(Clause,K)} != n. + """ + solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) + + if self.settings.no_bias: + NUM_OF_VARS = """ + %%% External atom for number of variables in the program %%%%% + #external size_in_vars(v). + :- + size_in_vars(v), + #max{V : clause_var(_,V)} != v - 1. + """ + solver.add('number_of_vars', ['v'], NUM_OF_VARS) + + NUM_OF_RULES = """ + %%% External atom for number of rules in the program %%%%% + #external size_in_rules(r). + :- + size_in_rules(r), + #max{R : clause(R)} != r - 1. + """ + solver.add('number_of_rules', ['r'], NUM_OF_RULES) + solver.configuration.solve.models = 0 + solver.add('base', [], encoding) + solver.ground([('base', [])]) + return solver + + def build_encoding(self, bkcons, settings): encoding = [] alan = resource_string(__name__, "lp/alan-old.pl").decode() # alan = pkg_resources.resource_string(__name__, "lp/alan.pl").decode() encoding.append(alan) - with open(settings.bias_file) as f: bias_text = f.read() - bias_text = re.sub(r'max_vars\(\d*\).','', bias_text) - bias_text = re.sub(r'max_body\(\d*\).','', bias_text) - bias_text = re.sub(r'max_clauses\(\d*\).','', bias_text) + bias_text = re.sub(r'max_vars\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_body\(\d*\).', '', bias_text) + bias_text = re.sub(r'max_clauses\(\d*\).', '', bias_text) encoding.append(bias_text) encoding.append(f'max_clauses({settings.max_rules}).') encoding.append(f'max_body({settings.max_body}).') encoding.append(f'max_vars({settings.max_vars}).') - # ADD VARS, DIRECTIONS, AND TYPES head_arity = len(settings.head_literal.arguments) encoding.append(f'head_vars({head_arity}, {tuple(range(head_arity))}).') @@ -134,48 +178,39 @@ def __init__(self, settings, bkcons=[]): for i, x in enumerate(xs): encoding.append(f'var_pos({x}, {tuple(xs)}, {i}).') - # types = tuple(self.settings.head_types) # str_types = str(types).replace("'","") # for x, i in enumerate(self.settings.head_types): # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, types in self.settings.body_types.items(): # types = tuple(types) # str_types = str(types).replace("'","") # for i, x in enumerate(types): - # encoding.append(f'type_pos({str_types}, {i}, {x}).') - # for pred, xs in self.settings.directions.items(): # for i, v in xs.items(): # if v == '+': # encoding.append(f'direction_({pred}, {i}, in).') # if v == '-': # encoding.append(f'direction_({pred}, {i}, out).') - max_size = (1 + settings.max_body) * settings.max_rules if settings.max_literals < max_size: encoding.append(f'custom_max_size({settings.max_literals}).') - if settings.pi_enabled: encoding.append(f'#show head_literal/4.') - if settings.noisy: encoding.append(""" program_bounds(0..K):- max_size(K). program_size_at_least(M):- size(N), program_bounds(M), M <= N. """) - # if settings.bkcons: encoding.extend(bkcons) - # FG Heuristic for single solve # - considering a default order of minimum rules, then minimum literals, and then minimum variables # - considering a preference for minimum hspace size parameters configuration if settings.single_solve: if settings.order_space: - assert(False) + assert (False) pass # horder = bias_order(settings, max_size) # iorder = 1 @@ -204,50 +239,8 @@ def __init__(self, settings, bkcons=[]): #heuristic size(N). [1000-N,true] """ encoding.append(DEFAULT_HEURISTIC) - encoding = '\n'.join(encoding) - - # with open('ENCODING-GEN.pl', 'w') as f: - # f.write(encoding) - - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain','-Wnone']) - else: - solver = clingo.Control(['-Wnone']) - NUM_OF_LITERALS = """ - %%% External atom for number of literals in the program %%%%% - #external size_in_literals(n). - :- - size_in_literals(n), - #sum{K+1,Clause : body_size(Clause,K)} != n. - """ - solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) - - if self.settings.no_bias: - NUM_OF_VARS = """ - %%% External atom for number of variables in the program %%%%% - #external size_in_vars(v). - :- - size_in_vars(v), - #max{V : clause_var(_,V)} != v - 1. - """ - solver.add('number_of_vars', ['v'], NUM_OF_VARS) - - NUM_OF_RULES = """ - %%% External atom for number of rules in the program %%%%% - #external size_in_rules(r). - :- - size_in_rules(r), - #max{R : clause(R)} != r - 1. - """ - solver.add('number_of_rules', ['r'], NUM_OF_RULES) - - - - solver.configuration.solve.models = 0 - solver.add('base', [], encoding) - solver.ground([('base', [])]) - self.solver = solver + return encoding # @profile def get_prog(self) -> Optional[RuleBase]: From 8adc7d7bc603cd3e4247ff2dca8f1c5f7335e579 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Fri, 6 Jun 2025 14:17:39 -0500 Subject: [PATCH 09/18] Fix bug in logger configuration. --- popper/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/popper/util.py b/popper/util.py index 2c32f537..3910ffc4 100644 --- a/popper/util.py +++ b/popper/util.py @@ -311,11 +311,11 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ elif debug: log_level = logging.DEBUG # logging.basicConfig(format='%(asctime)s %(message)s', level=log_level, datefmt='%H:%M:%S') - logging.basicConfig(format='%(message)s', level=log_level, datefmt='%H:%M:%S') elif info: log_level = logging.INFO else: log_level = logging.WARNING + logging.basicConfig(format='%(message)s', level=log_level, datefmt='%H:%M:%S') self.info = info self.debug = debug From 67ca3c57b5b8c759b3ba1421000275a1e9acc839 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Mon, 9 Jun 2025 20:26:53 -0500 Subject: [PATCH 10/18] Clean up imports: linter fixes. --- popper/gen3.py | 20 ++++++++++---------- popper/generate.py | 3 --- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/popper/gen3.py b/popper/gen3.py index cd118672..6bd57f10 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -1,19 +1,18 @@ -import time +import numbers +import operator import re -from pysat.formula import CNF -from pysat.solvers import Solver -from pysat.card import * +from collections import defaultdict +from typing import Optional + import clingo -import operator -import numbers import clingo.script -from collections import defaultdict -from . util import rule_is_recursive, Constraint, Literal -from . abs_generate import Generator as AbstractGenerator + +from .abs_generate import Generator as AbstractGenerator from .resources import resource_string +from .util import rule_is_recursive, Constraint, Literal clingo.script.enable_python() -from clingo import Function, Number, Tuple_ +from clingo import Function, Number, Tuple_, Model from itertools import permutations DEFAULT_HEURISTIC = """ @@ -33,6 +32,7 @@ def atom_to_symbol(pred, args): return Function(name = pred, arguments = xs) class Generator(AbstractGenerator): + model: Optional[Model] def __init__(self, settings, bkcons=[]): self.settings = settings diff --git a/popper/generate.py b/popper/generate.py index 5cad3148..574ed197 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -1,6 +1,3 @@ -import re -from typing import Optional, Sequence, Set - import numbers import operator import re From d5a3c4bf93ff1579644478c96de2abf2ac880b94 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Wed, 18 Jun 2025 15:50:46 -0400 Subject: [PATCH 11/18] Sort format_prog output. --- popper/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/popper/util.py b/popper/util.py index 3910ffc4..a80fe3b9 100644 --- a/popper/util.py +++ b/popper/util.py @@ -840,4 +840,4 @@ def remap_variables(rule: Tuple[Any, Any]) -> Rule: def format_prog(prog): - return '\n'.join(format_rule(rule) for rule in prog) + return '\n'.join(sorted(format_rule(rule) for rule in prog)) From 88e14bdfa51b4d325bee76c339213d715b26fba3 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 19 Jun 2025 17:47:44 -0500 Subject: [PATCH 12/18] Reorganize type declarations. To avoid circularities introduced by type checking, move some type declarations up into a file, type_defs.py, that can be loaded first. --- popper/abs_generate.py | 8 +++++--- popper/generate.py | 4 +++- popper/type_defs.py | 10 ++++++++++ popper/util.py | 9 ++------- 4 files changed, 20 insertions(+), 11 deletions(-) create mode 100644 popper/type_defs.py diff --git a/popper/abs_generate.py b/popper/abs_generate.py index 6cdc19e7..a65ca815 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -1,13 +1,15 @@ import abc -from typing import FrozenSet, Tuple, TYPE_CHECKING, List +from collections import defaultdict +from typing import Set, TYPE_CHECKING, List import clingo +from . type_defs import Literal, RuleBase + + if TYPE_CHECKING: from . util import Settings -Rule = Tuple['Literal', FrozenSet['Literal']] -RuleBase = FrozenSet[Rule] class Generator(abc.ABC): settings: 'Settings' diff --git a/popper/generate.py b/popper/generate.py index 574ed197..42fa6439 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -10,11 +10,13 @@ from .util import rule_is_recursive, Constraint, Literal, remap_variables clingo.script.enable_python() +# ruff: noqa: E402 +# flake8: noqa: E402 from clingo import Function, Number, Tuple_, Model, Symbol from itertools import permutations import dataclasses from . abs_generate import Generator as AbstractGenerator -from . abs_generate import Rule, RuleBase +from . type_defs import Rule, RuleBase from .resources import resource_string @dataclasses.dataclass(frozen=True) diff --git a/popper/type_defs.py b/popper/type_defs.py new file mode 100644 index 00000000..0e6b1d24 --- /dev/null +++ b/popper/type_defs.py @@ -0,0 +1,10 @@ +from typing import FrozenSet, NamedTuple, Tuple + + +class Literal(NamedTuple): + predicate: str + arguments: tuple + + +Rule = Tuple[Literal, FrozenSet[Literal]] +RuleBase = FrozenSet[Rule] diff --git a/popper/util.py b/popper/util.py index a80fe3b9..b056008e 100644 --- a/popper/util.py +++ b/popper/util.py @@ -5,17 +5,12 @@ from contextlib import contextmanager from itertools import permutations from time import perf_counter -from typing import NamedTuple, Optional, Dict, Tuple, Any +from typing import Any, Dict, Optional, Set, Tuple import clingo import clingo.script -from popper.abs_generate import Rule - - -class Literal(NamedTuple): - predicate: str - arguments: tuple +from popper.type_defs import Literal, Rule clingo.script.enable_python() From 66bbb94a0a85c3872e2a2c87f3e129d0564fe088 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 19 Jun 2025 17:49:36 -0500 Subject: [PATCH 13/18] Move parse_model_pi method. This method definition appeared only in generate.py, but it was also invoked in gen3.py, where it was not defined. Added it to the abstract parent class of all the generators. --- popper/abs_generate.py | 43 +++++++++++++++++++++++++++++++- popper/generate.py | 56 ------------------------------------------ 2 files changed, 42 insertions(+), 57 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index a65ca815..fbd88a44 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -55,4 +55,45 @@ def build_encoding(self, bkcons: List, settings: "Settings") -> str: @abc.abstractmethod def init_solver(self, encoding: str) -> clingo.Control: """Incorporate the `encoding` into a new solver (`clingo.Control`) and return it.""" - pass \ No newline at end of file + + def parse_model_pi(self, model) -> RuleBase: + settings = self.settings + # directions = defaultdict(lambda: defaultdict(lambda: '?')) + rule_index_to_body = defaultdict(set) + rule_index_to_head = {} + # rule_index_ordering = defaultdict(set) + + for atom in model: + args = atom.arguments + name = atom.name + + if name == 'body_literal': + rule_index = args[0].number + predicate = args[1].name + atom_args = args[3].arguments + atom_args = settings.cached_atom_args[tuple(atom_args)] + arity = len(atom_args) + body_literal = (predicate, atom_args, arity) + rule_index_to_body[rule_index].add(body_literal) + + elif name == 'head_literal': + rule_index = args[0].number + predicate = args[1].name + atom_args = args[3].arguments + atom_args = settings.cached_atom_args[tuple(atom_args)] + arity = len(atom_args) + head_literal = (predicate, atom_args, arity) + rule_index_to_head[rule_index] = head_literal + + prog = [] + + for rule_index in rule_index_to_head: # pylint: ignore=C0206 + head_pred, head_args, _head_arity = rule_index_to_head[rule_index] + head = Literal(head_pred, head_args) + body: Set[Literal] = set() + for (body_pred, body_args, _body_arity) in rule_index_to_body[rule_index]: + body.add(Literal(body_pred, body_args)) + rule = head, frozenset(body) + prog.append((rule)) + + return frozenset(prog) diff --git a/popper/generate.py b/popper/generate.py index 42fa6439..a30a3f7b 100644 --- a/popper/generate.py +++ b/popper/generate.py @@ -304,62 +304,6 @@ def parse_model_single_rule(self, model: Sequence[Symbol]) -> RuleBase: rule: Rule = head, frozenset(body) return frozenset([rule]) - def parse_model_pi(self, model) -> RuleBase: - settings = self.settings - # directions = defaultdict(lambda: defaultdict(lambda: '?')) - rule_index_to_body = defaultdict(set) - rule_index_to_head = {} - # rule_index_ordering = defaultdict(set) - - for atom in model: - args = atom.arguments - name = atom.name - - if name == 'body_literal': - rule_index = args[0].number - predicate = args[1].name - atom_args = args[3].arguments - atom_args = settings.cached_atom_args[tuple(atom_args)] - arity = len(atom_args) - body_literal = (predicate, atom_args, arity) - rule_index_to_body[rule_index].add(body_literal) - - elif name == 'head_literal': - rule_index = args[0].number - predicate = args[1].name - atom_args = args[3].arguments - atom_args = settings.cached_atom_args[tuple(atom_args)] - arity = len(atom_args) - head_literal = (predicate, atom_args, arity) - rule_index_to_head[rule_index] = head_literal - - # # TODO AC: STOP READING THESE THE MODELS - # elif name == 'direction_': - # pred_name = args[0].name - # arg_index = args[1].number - # arg_dir_str = args[2].name - - # if arg_dir_str == 'in': - # arg_dir = '+' - # elif arg_dir_str == 'out': - # arg_dir = '-' - # else: - # raise Exception(f'Unrecognised argument direction "{arg_dir_str}"') - # directions[pred_name][arg_index] = arg_dir - - prog = [] - - for rule_index in rule_index_to_head: - head_pred, head_args, head_arity = rule_index_to_head[rule_index] - head = Literal(head_pred, head_args) - body = set() - for (body_pred, body_args, body_arity) in rule_index_to_body[rule_index]: - body.add(Literal(body_pred, body_args)) - body = frozenset(body) - rule = head, body - prog.append((rule)) - - return frozenset(prog) def update_solver(self, size): self.update_number_of_literals(size) From 88e289aeede2e5ba2c925deec5a134a6a77544de Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 19 Jun 2025 17:50:02 -0500 Subject: [PATCH 14/18] Fix abstract method declarations. --- popper/abs_generate.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index fbd88a44..3df16a8b 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -14,47 +14,53 @@ class Generator(abc.ABC): settings: 'Settings' - # @profile + @abc.abstractmethod def get_prog(self) -> RuleBase: pass + @abc.abstractmethod def gen_symbol(self, literal, backend): pass + @abc.abstractmethod def update_solver(self, size): pass - self.update_number_of_literals(size) + @abc.abstractmethod def update_number_of_literals(self, size): pass + @abc.abstractmethod def update_number_of_vars(self, size): pass + @abc.abstractmethod def update_number_of_rules(self, size): pass + @abc.abstractmethod def prune_size(self, size): pass - # @profile + @abc.abstractmethod def get_ground_rules(self, rule): pass + @abc.abstractmethod def parse_handles(self, new_handles): pass + @abc.abstractmethod def constrain(self, tmp_new_cons): pass @abc.abstractmethod def build_encoding(self, bkcons: List, settings: "Settings") -> str: - """Build and return a string for an ASP solver, used to generate hypotheses.""" - pass + """Build and return a string for an ASP solver.""" @abc.abstractmethod def init_solver(self, encoding: str) -> clingo.Control: - """Incorporate the `encoding` into a new solver (`clingo.Control`) and return it.""" + """Incorporate the `encoding` into a new solver and return it.""" def parse_model_pi(self, model) -> RuleBase: settings = self.settings From 66a0e6edfaa921b9a2632e715c4c02585fd1bcc1 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Thu, 19 Jun 2025 17:50:24 -0500 Subject: [PATCH 15/18] Add property declarations to classes. --- popper/loop.py | 3 +++ popper/util.py | 26 +++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/popper/loop.py b/popper/loop.py index bb99e7a1..9ba490fa 100644 --- a/popper/loop.py +++ b/popper/loop.py @@ -2,6 +2,7 @@ from collections import defaultdict from functools import cache from itertools import chain, combinations, permutations +from logging import Logger from typing import Any, List, Optional, Set, Tuple from bitarray.util import any_and, ones, subset @@ -73,6 +74,8 @@ def load_solver(settings: Settings, tester: Tester, coverage_pos, coverage_neg, class Popper(): settings: Settings tester: Tester + logger: Logger + noisy: bool # the following 2 type hints are conjectural -- rpg num_pos: int num_neg: int diff --git a/popper/util.py b/popper/util.py index b056008e..5e8b6c74 100644 --- a/popper/util.py +++ b/popper/util.py @@ -119,6 +119,9 @@ def fix_path(filename): class Stats: + logger: logging.Logger + maxsat_calls: int + def __init__(self, info=False, debug=False): self.exec_start = perf_counter() self.total_programs = 0 @@ -254,10 +257,27 @@ def flatten(xs): class Settings: - showcons: bool + anytime_maxsat_solver: str + anytime_maxsat_solver_params: str + anytime_maxsat_solver_signal: int + batch_size: int + best_mdl: int # best model description length + cached_literals: Dict[Tuple[str, Any], Literal] datalog: bool + exact_maxsat_solver: str + exact_maxsat_solver_params: str + last_combine_stage: Any + lex: bool + lex_via_weights: bool + max_size: int + maxsat_timeout: Optional[int] + min_coverage: int + old_format: bool + pointless: Set[Tuple[str, Any]] + search_depth: int + showcons: bool + solution_found: Any show_failures: bool # display detailed FP and FN information - cached_literals: Dict[Tuple[str, Any], Literal] def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_literals=MAX_LITERALS, timeout=TIMEOUT, quiet=False, eval_timeout=EVAL_TIMEOUT, max_examples=MAX_EXAMPLES, max_body=None, @@ -286,7 +306,7 @@ def __init__(self, cmd_line=False, info=True, debug=False, show_stats=True, max_ # no_bias = args.no_bias # order_space = args.order_space noisy = args.noisy - batch_size = args.batch_size + self.batch_size = args.batch_size solver = args.solver anytime_solver = args.anytime_solver anytime_timeout = args.anytime_timeout From 2bd9c8ffa4e41e8bc68f6fe2c1d69e7721449f7b Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Fri, 27 Jun 2025 15:26:23 -0500 Subject: [PATCH 16/18] Import Dan Bryce's SWI prolog error-handling. We are getting some errors in returns from SWI prolog. Catch them and treat them as query failures. --- popper/tester.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/popper/tester.py b/popper/tester.py index 2b65249f..b2ec5e14 100644 --- a/popper/tester.py +++ b/popper/tester.py @@ -1,30 +1,42 @@ +import logging import os from collections import defaultdict from contextlib import contextmanager from functools import cache from itertools import product -from typing import Tuple +from typing import Optional, Tuple from bitarray import bitarray, frozenbitarray from bitarray.util import ones from janus_swi import consult, query_once +from janus_swi.janus import PrologError from .util import Literal, calc_prog_size, calc_rule_size, format_rule, order_prog, prog_hash, prog_is_recursive from .resources import resource_string, resource_filename, close_resource_file +logger: Optional[logging.Logger] = None + def format_literal_janus(literal): args = ','.join(f'_V{i}' for i in literal.arguments) return f'{literal.predicate}({args})' def bool_query(query): - return query_once(query)['truth'] + try: + return query_once(query)['truth'] + except PrologError as e: + if logger is not None: + logger.debug(f"Error in SWI bool_query {query}: {e}") + return False class Tester(): def __init__(self, settings): + global logger self.settings = settings + logger = self.settings.logger + bk_pl_path = self.settings.bk_file exs_pl_path = self.settings.ex_file test_pl_path = str(resource_filename(__name__, "lp/test.pl")) From aca027d4cab5a04d2b0ecff0af12e3a8adfad768 Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Fri, 27 Jun 2025 18:36:10 -0500 Subject: [PATCH 17/18] Fix abstract Generator class. There were some abstract methods declared that are not present in Generator 2 (gen2.py). Removed them from the ABC definition. --- popper/abs_generate.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index 3df16a8b..8b7f6312 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -18,37 +18,37 @@ class Generator(abc.ABC): def get_prog(self) -> RuleBase: pass - @abc.abstractmethod - def gen_symbol(self, literal, backend): - pass + # @abc.abstractmethod + # def gen_symbol(self, literal, backend): + # pass @abc.abstractmethod def update_solver(self, size): pass - @abc.abstractmethod - def update_number_of_literals(self, size): - pass + # @abc.abstractmethod + # def update_number_of_literals(self, size): + # pass - @abc.abstractmethod - def update_number_of_vars(self, size): - pass + # @abc.abstractmethod + # def update_number_of_vars(self, size): + # pass - @abc.abstractmethod - def update_number_of_rules(self, size): - pass + # @abc.abstractmethod + # def update_number_of_rules(self, size): + # pass @abc.abstractmethod def prune_size(self, size): pass - @abc.abstractmethod - def get_ground_rules(self, rule): - pass + # @abc.abstractmethod + # def get_ground_rules(self, rule): + # pass - @abc.abstractmethod - def parse_handles(self, new_handles): - pass + # @abc.abstractmethod + # def parse_handles(self, new_handles): + # pass @abc.abstractmethod def constrain(self, tmp_new_cons): From 9d3d64f89d973d9c9b02e72bee1fc8080274582f Mon Sep 17 00:00:00 2001 From: "Robert P. Goldman" Date: Tue, 8 Jul 2025 15:00:54 -0500 Subject: [PATCH 18/18] Fixes to generator properties and init_solver(). Added property type hints to the abstract `Generator` class. Stumbled on these when checking some calling code with mypy. This caused me to fix the `init_solver` method in gen2.py. Previously, this would only set the solver property if `settings.single_solve` was true. But gen2 is only used if `single_solve` is true, so I removed that condition. If that condition was false, then `init_solver` would fail in odd ways because it would set properties on the `solver` but `solver` would be unbound. --- popper/abs_generate.py | 5 ++++- popper/gen2.py | 9 ++++----- popper/gen3.py | 5 ++++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/popper/abs_generate.py b/popper/abs_generate.py index 8b7f6312..91ee58fa 100644 --- a/popper/abs_generate.py +++ b/popper/abs_generate.py @@ -1,6 +1,6 @@ import abc from collections import defaultdict -from typing import Set, TYPE_CHECKING, List +from typing import Set, TYPE_CHECKING, List, Optional import clingo @@ -13,6 +13,9 @@ class Generator(abc.ABC): settings: 'Settings' + solver: clingo.Control + handle: Optional[clingo.SolveHandle] + model: Optional[clingo.Model] @abc.abstractmethod def get_prog(self) -> RuleBase: diff --git a/popper/gen2.py b/popper/gen2.py index ef06a644..9baeafcd 100644 --- a/popper/gen2.py +++ b/popper/gen2.py @@ -40,8 +40,8 @@ def atom_to_symbol(pred, args) -> Function: class Generator(AbstractGenerator): settings: Settings model: Optional[clingo.Model] - solver: Optional[clingo.Control] - handler: Optional[clingo.SolveHandle] + solver: clingo.Control + handle: Optional[clingo.SolveHandle] def __init__(self, settings: Settings, bkcons=[]): self.savings = 0 @@ -60,9 +60,8 @@ def __init__(self, settings: Settings, bkcons=[]): self.model = None - def init_solver(self, encoding): - if self.settings.single_solve: - solver = clingo.Control(['--heuristic=Domain', '-Wnone']) + def init_solver(self, encoding) -> clingo.Control: + solver = clingo.Control(['--heuristic=Domain', '-Wnone']) solver.configuration.solve.models = 0 solver.add('base', [], encoding) solver.ground([('base', [])]) diff --git a/popper/gen3.py b/popper/gen3.py index 6bd57f10..9d2249d6 100644 --- a/popper/gen3.py +++ b/popper/gen3.py @@ -6,6 +6,7 @@ import clingo import clingo.script +import clingo.configuration from .abs_generate import Generator as AbstractGenerator from .resources import resource_string @@ -63,7 +64,7 @@ def __init__(self, settings, bkcons=[]): solver = self.init_solver(encoding) self.solver = solver - def init_solver(self, encoding): + def init_solver(self, encoding) -> clingo.Control: if self.settings.single_solve: solver = clingo.Control(['--heuristic=Domain', '-Wnone']) else: @@ -76,6 +77,8 @@ def init_solver(self, encoding): #sum{K+1,Clause : body_size(Clause,K)} != n. """ solver.add('number_of_literals', ['n'], NUM_OF_LITERALS) + assert isinstance(solver.configuration.solve, + clingo.configuration.Configuration) solver.configuration.solve.models = 0 solver.add('base', [], encoding) solver.ground([('base', [])])