Support loading *.aig files in binary format

masinag · masinag · commit cf925818f96d · 2023-01-14T18:40:35.000+01:00
diff --git a/aiger/parser.py b/aiger/parser.py
@@ -1,30 +1,31 @@
-import io
 import re
 from collections import defaultdict
 from functools import reduce
 from typing import Mapping, List, Optional
+from uuid import uuid1
 
 import attr
 import funcy as fn
 from bidict import bidict
+from sortedcontainers import SortedDict
 from toposort import toposort_flatten
-from uuid import uuid1
-from sortedcontainers import SortedList, SortedSet, SortedDict
 
 import aiger as A
 
 
 @attr.s(auto_attribs=True, repr=False)
 class Header:
+    binary_mode: bool
     max_var_index: int
     num_inputs: int
     num_latches: int
     num_outputs: int
     num_ands: int
 
     def __repr__(self):
-        return f"aag {self.max_var_index} {self.num_inputs} " \
-            f"{self.num_latches} {self.num_outputs} {self.num_ands}"
+        mode = 'aig' if self.binary_mode else 'aag'
+        return f"{mode} {self.max_var_index} {self.num_inputs} " \
+               f"{self.num_latches} {self.num_outputs} {self.num_ands}"
 
 
 NOT_DONE_PARSING_ERROR = "Parsing rules exhausted at line {}!\n{}"
@@ -79,28 +80,41 @@ def remaining_inputs(self):
         return self.header.num_inputs - len(self.inputs)
 
 
-HEADER_PATTERN = re.compile(r"aag (\d+) (\d+) (\d+) (\d+) (\d+)\n")
+def _consume_stream(stream, delim) -> str:
+    line = bytearray()
+    ch = -1
+    delim = ord(delim)
+    while ch != delim:
+        ch = next(stream, delim)
+        line.append(ch)
+    return line.decode('ascii')
+
 
+HEADER_PATTERN = re.compile(r"(a[ai]g) (\d+) (\d+) (\d+) (\d+) (\d+)\n")
 
-def parse_header(state, line) -> bool:
+
+def parse_header(state, stream) -> bool:
     if state.header is not None:
         return False
 
+    line = _consume_stream(stream, '\n')
     match = HEADER_PATTERN.match(line)
     if not match:
-        raise ValueError(f"Failed to parse aag HEADER. {line}")
+        raise ValueError(f"Failed to parse aag/aig HEADER. {line}")
 
     try:
-        ids = fn.lmap(int, match.groups())
+        binary_mode = match.group(1) == 'aig'
+        ids = fn.lmap(int, match.groups()[1:])
 
         if any(x < 0 for x in ids):
-            raise ValueError("Indicies must be positive!")
+            raise ValueError("Indices must be positive!")
 
         max_idx, nin, nlatch, nout, nand = ids
         if nin + nlatch + nand > max_idx:
             raise ValueError("Sum of claimed indices greater than max.")
 
         state.header = Header(
+            binary_mode=binary_mode,
             max_var_index=max_idx,
             num_inputs=nin,
             num_latches=nlatch,
@@ -116,21 +130,38 @@ def parse_header(state, line) -> bool:
 IO_PATTERN = re.compile(r"(\d+)\s*\n")
 
 
-def parse_input(state, line) -> bool:
-    match = IO_PATTERN.match(line)
-
-    if match is None or state.remaining_inputs <= 0:
-        return False
-    lit = int(line)
+def _add_input(state, lit):
     state.inputs.append(lit)
     state.nodes[lit] = set()
-    return True
 
 
-def parse_output(state, line) -> bool:
+def parse_input(state, stream) -> bool:
+    if state.remaining_inputs <= 0:
+        return False
+
+    if state.header.binary_mode:
+        for lit in range(2, 2 * (state.header.num_inputs + 1), 2):
+            _add_input(state, lit)
+        return False
+
+    line = _consume_stream(stream, '\n')
     match = IO_PATTERN.match(line)
-    if match is None or state.remaining_outputs <= 0:
+
+    if match is None:
+        raise ValueError(f"Expecting an input: {line}")
+
+    _add_input(state, int(line))
+    return True
+
+
+def parse_output(state, stream) -> bool:
+    if state.remaining_outputs <= 0:
         return False
+
+    line = _consume_stream(stream, '\n')
+    match = IO_PATTERN.match(line)
+    if match is None:
+        raise ValueError(f"Expecting an output: {line}")
     lit = int(line)
     state.outputs.append(lit)
     if lit & 1:
@@ -139,17 +170,28 @@ def parse_output(state, line) -> bool:
 
 
 LATCH_PATTERN = re.compile(r"(\d+) (\d+)(?: (\d+))?\n")
+LATCH_PATTERN_BINARY = re.compile(r"(\d+)(?: (\d+))?\n")
 
 
-def parse_latch(state, line) -> bool:
+def parse_latch(state, stream) -> bool:
     if state.remaining_latches <= 0:
         return False
 
-    match = LATCH_PATTERN.match(line)
-    if match is None:
-        raise ValueError("Expecting a latch: {line}")
+    line = _consume_stream(stream, '\n')
+
+    if state.header.binary_mode:
+        match = LATCH_PATTERN_BINARY.match(line)
+        if match is None:
+            raise ValueError(f"Expecting a latch: {line}")
+        idx = state.header.num_inputs + len(state.latches) + 1
+        lit = 2 * idx
+        elems = (lit,) + match.groups()
+    else:
+        match = LATCH_PATTERN.match(line)
+        if match is None:
+            raise ValueError(f"Expecting a latch: {line}")
+        elems = match.groups()
 
-    elems = match.groups()
     if elems[2] is None:
         elems = elems[:2] + (0,)
     elems = fn.lmap(int, elems)
@@ -165,30 +207,71 @@ def parse_latch(state, line) -> bool:
 AND_PATTERN = re.compile(r"(\d+) (\d+) (\d+)\s*\n")
 
 
-def parse_and(state, line) -> bool:
-    if state.header.num_ands <= 0:
-        return False
+def _read_delta(data):
+    ch = next(data)
+    i = 0
+    delta = 0
+    while (ch & 0x80) != 0:
+        if i == 5:
+            raise ValueError("Invalid byte in delta encoding")
+        delta |= (ch & 0x7f) << (7 * i)
+        i += 1
+        ch = next(data)
+    if i == 5 and ch >= 8:
+        raise ValueError("Invalid byte in delta encoding")
+
+    delta |= ch << (7 * i)
+    return delta
 
-    match = AND_PATTERN.match(line)
-    if match is None:
-        return False
 
-    elems = fn.lmap(int, match.groups())
+def _add_and(state, elems):
+    elems = fn.lmap(int, elems)
     state.header.num_ands -= 1
     deps = set(elems[1:])
     state.nodes[elems[0]] = deps
     for dep in deps:
         if dep & 1:
             state.nodes[dep] = {dep ^ 1}
+
+
+def parse_and(state, stream) -> bool:
+    if state.header.num_ands <= 0:
+        return False
+
+    if state.header.binary_mode:
+        lhs = 2 * (state.header.num_inputs + state.header.num_latches)
+        for i in range(state.header.num_ands):
+            lhs += 2
+            delta = _read_delta(stream)
+            if delta > lhs:
+                raise ValueError(f"Invalid lhs {lhs} or delta {delta}")
+            rhs0 = lhs - delta
+            delta = _read_delta(stream)
+            if delta > rhs0:
+                raise ValueError(f"Invalid rhs0 {rhs0} or delta {delta}")
+            rhs1 = rhs0 - delta
+            _add_and(state, (lhs, rhs0, rhs1))
+
+    else:
+        line = _consume_stream(stream, '\n')
+        match = AND_PATTERN.match(line)
+        if match is None:
+            raise ValueError(f"Expecting an and: {line}")
+
+        _add_and(state, match.groups())
     return True
 
 
 SYM_PATTERN = re.compile(r"([ilo])(\d+) (.*)\s*\n")
 
 
-def parse_symbol(state, line) -> bool:
+def parse_symbol(state, stream) -> bool:
+    line = _consume_stream(stream, '\n')
     match = SYM_PATTERN.match(line)
     if match is None:
+        # We might have consumed the 'c' starting the comments section
+        if line.rstrip() == 'c':
+            state.comments = []
         return False
 
     kind, idx, name = match.groups()
@@ -202,7 +285,8 @@ def parse_symbol(state, line) -> bool:
     return True
 
 
-def parse_comment(state, line) -> bool:
+def parse_comment(state, stream) -> bool:
+    line = _consume_stream(stream, '\n')
     if state.comments is not None:
         state.comments.append(line.rstrip())
     elif line.rstrip() == 'c':
@@ -227,20 +311,25 @@ def finish_table(table, keys):
     return {table[i]: key for i, key in enumerate(keys)}
 
 
-def parse(lines, to_aig: bool = True):
-    if isinstance(lines, str):
-        lines = io.StringIO(lines)
+def parse(stream):
+    if isinstance(stream, list):
+        stream = ''.join(stream)
+    if isinstance(stream, str):
+        stream = bytes(stream, 'ascii')
+    stream = iter(stream)
 
     state = State()
     parsers = parse_seq()
     parser = next(parsers)
 
-    for i, line in enumerate(lines):
-        while not parser(state, line):
+    i = 0
+    while stream.__length_hint__() > 0:
+        i += 1
+        while not parser(state, stream):
             parser = next(parsers, None)
 
             if parser is None:
-                raise ValueError(NOT_DONE_PARSING_ERROR.format(i + 1, state))
+                raise ValueError(NOT_DONE_PARSING_ERROR.format(i, state))
 
     if parser not in (parse_header, parse_output, parse_comment, parse_symbol):
         raise ValueError(DONE_PARSING_ERROR.format(state))
@@ -284,9 +373,9 @@ def parse(lines, to_aig: bool = True):
     )
 
 
-def load(path: str, to_aig: bool = True):
-    with open(path, 'r') as f:
-        return parse(''.join(f.readlines()), to_aig=to_aig)
+def load(path: str):
+    with open(path, 'rb') as f:
+        return parse(f.read())
 
 
 __all__ = ['load', 'parse']
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -142,10 +142,45 @@ def test_io_order():
 o1 oa
 """)
 
-
     data = {'a': False, 'b': True}
     assert circ1(data) \
-        == circ2(data) \
-        == circ3(data) \
-        == circ4(data)
+           == circ2(data) \
+           == circ3(data) \
+           == circ4(data)
+
 
+# ----------- BINARY FILE PARSER TESTS ----------------
+
+@given(st.data())
+def test_smoke1_aig(data):
+    circ1 = aigp.parse(TEST1)
+    circ2 = aigp.load("tests/aig/test1.aig")
+    test_input = {f'{i}': data.draw(st.booleans()) for i in circ1.inputs}
+    assert circ1(test_input) == circ2(test_input)
+
+
+@given(st.data())
+def test_smoke2_aig(data):
+    circ1 = aigp.parse(TEST2)
+    circ2 = aigp.load("tests/aig/test2.aig")
+    test_input = {f'{i}': data.draw(st.booleans()) for i in circ1.inputs}
+    assert circ1(test_input) == circ2(test_input)
+
+
+def test_mutex_example_smoke_aig():
+    aigp.load('tests/aig/mutex_converted.aig')
+
+
+def test_degenerate_smoke_aig():
+    import aiger as A
+
+    expr = A.BoolExpr(A.load("tests/aig/test_degenerate1.aig"))
+    assert expr({}) is False
+    expr = A.BoolExpr(A.load("tests/aig/test_degenerate2.aig"))
+    assert expr({}) is True
+    circ = A.load("tests/aig/test_degenerate3.aig")
+    assert len(circ.node_map) == 0
+    assert circ.inputs == circ.outputs == circ.latches == set()
+
+    circ = A.load("tests/aig/test_degenerate4.aig")
+    assert not any(circ({})[0].values())