From 4b3b5fe2ffa1becd4d715db13871c9df039a6afd Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Thu, 27 Feb 2014 17:15:28 +0000 Subject: [PATCH 1/2] ENH: Allow one to replace the annotation functions. --- refcycle/object_graph.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/refcycle/object_graph.py b/refcycle/object_graph.py index 35b8578..10a073b 100755 --- a/refcycle/object_graph.py +++ b/refcycle/object_graph.py @@ -246,11 +246,20 @@ def __new__(cls, objects=()): ### Annotations. ########################################################################### - def annotated(self): + def annotated(self, annotated_references=annotated_references, + object_annotation=object_annotation, + graph_class=AnnotatedGraph): """ Annotate this graph, returning an AnnotatedGraph object with the same structure. + annotated_references takes an object and a mapping from + referents of that object to edge annotations for that reference. + object_annotation takes an object and returns the annotation for + that object. + graph_class is the (sub)class of the ``AnnotatedGraph`` to + construct. + """ # Build up dictionary of edge annotations. edge_annotations = {} @@ -285,7 +294,7 @@ def annotated(self): for edge in self.edges ] - return AnnotatedGraph( + return graph_class( vertices=annotated_vertices, edges=annotated_edges, ) From cf355fe31b8d61b5fffba6836d6ac83e65316cc3 Mon Sep 17 00:00:00 2001 From: Robert Kern Date: Fri, 28 Mar 2014 13:30:03 +0000 Subject: [PATCH 2/2] ENH: JSONL export/import. --- refcycle/annotated_graph.py | 91 ++++++++++++++++++++++ refcycle/test/test_annotated_graph.py | 107 ++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) diff --git a/refcycle/annotated_graph.py b/refcycle/annotated_graph.py index 092fba6..814e7e2 100644 --- a/refcycle/annotated_graph.py +++ b/refcycle/annotated_graph.py @@ -14,6 +14,7 @@ from __future__ import unicode_literals import collections +import gzip import json import os import subprocess @@ -222,6 +223,40 @@ def to_json(self): # Ensure that we always return unicode output on Python 2. return six.text_type(json.dumps(obj, ensure_ascii=False)) + def to_jsonl_file(self, fp): + """ + Write JSONL (one JSON object per line) to a file-like object. + + """ + # Edges are distinguished from vertices only by the presence of + # 'head' and 'tail' keys. + for vertex in self.vertices: + json.dump( + { + "id": vertex.id, + "annotation": vertex.annotation, + }, + fp, + ensure_ascii=True, + separators=(",", ":"), + check_circular=False, + ) + fp.write("\n") + for edge in self._edges: + json.dump( + { + "id": edge.id, + "annotation": edge.annotation, + "head": edge.head, + "tail": edge.tail, + }, + fp, + ensure_ascii=True, + separators=(",", ":"), + check_circular=False, + ) + fp.write("\n") + @classmethod def from_json(cls, json_graph): """ @@ -250,6 +285,30 @@ def from_json(cls, json_graph): return cls(vertices=vertices, edges=edges) + @classmethod + def from_jsonl_file(cls, fp): + """ + Reconstruct the graph from a graph exported to JSONL. + + """ + vertices = [] + edges = [] + for line in fp: + obj = json.loads(line) + if "head" in obj: + edges.append(AnnotatedEdge( + id=obj["id"], + annotation=obj["annotation"], + head=obj["head"], + tail=obj["tail"], + )) + else: + vertices.append(AnnotatedVertex( + id=obj["id"], + annotation=obj["annotation"], + )) + return cls(vertices=vertices, edges=edges) + def export_json(self, filename): """ Export graph in JSON form to the given file. @@ -270,6 +329,38 @@ def import_json(cls, filename): json_graph = f.read().decode('utf-8') return cls.from_json(json_graph) + def export_jsonl(self, filename): + """ + Export graph in JSONL form to the given file. + + If the file name ends in ``.gz``, then it will be written + gzip-encoded. + + """ + if filename.endswith('.gz'): + opener = gzip.GzipFile + else: + opener = open + with opener(filename, 'wb') as f: + self.to_jsonl_file(f) + + @classmethod + def import_jsonl(cls, filename): + """ + Import graph from the given file. The file is expected + to contain UTF-8 encoded JSONL data. It may be gzip-encoded. + + """ + with open(filename, 'rb') as f: + perhaps_magic = f.read(2) + f.seek(0, 0) + if perhaps_magic == b'\037\213': + # This is a gzip file. + fp = gzip.GzipFile(fileobj=f) + else: + fp = f + return cls.from_jsonl_file(fp) + ########################################################################### ### Graphviz output. ########################################################################### diff --git a/refcycle/test/test_annotated_graph.py b/refcycle/test/test_annotated_graph.py index b0e5c7c..7903699 100644 --- a/refcycle/test/test_annotated_graph.py +++ b/refcycle/test/test_annotated_graph.py @@ -148,3 +148,110 @@ def test_dot_quoting(self): self.assertIsInstance(dot, six.text_type) self.assertIn(r'"vertex \"1\""', dot) self.assertIn(r'"from \"1\" to \"2\""', dot) + + def test_to_from_jsonl(self): + graph = AnnotatedGraph( + vertices=[ + AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}), + AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}), + ], + edges=[ + AnnotatedEdge( + id=3, + annotation={'label': 'from "1" to "2"'}, + head=0, + tail=1, + ), + ], + ) + out_fp = six.StringIO() + graph.to_jsonl_file(out_fp) + jsonl_text = out_fp.getvalue() + in_fp = six.StringIO(jsonl_text) + reconstructed = AnnotatedGraph.from_jsonl_file(in_fp) + + vertex_labels = set(v.annotation['label'] + for v in reconstructed.vertices) + edge_labels = set(e.annotation['label'] + for e in reconstructed.edges) + + self.assertIsInstance(reconstructed, AnnotatedGraph) + self.assertEqual(reconstructed.vertices, graph.vertices) + self.assertEqual(reconstructed.edges, graph.edges) + self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'}) + self.assertEqual(edge_labels, {'from "1" to "2"'}) + + def test_export_import_jsonl(self): + graph = AnnotatedGraph( + vertices=[ + AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}), + AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}), + ], + edges=[ + AnnotatedEdge( + id=3, + annotation={'label': 'from "1" to "2"'}, + head=0, + tail=1, + ), + ], + ) + + tempdir = tempfile.mkdtemp() + try: + filename = os.path.join(tempdir, 'output.jsonl') + graph.export_jsonl(filename) + self.assertTrue(os.path.exists(filename)) + reconstructed = AnnotatedGraph.import_jsonl(filename) + finally: + shutil.rmtree(tempdir) + + vertex_labels = set(v.annotation['label'] + for v in reconstructed.vertices) + edge_labels = set(e.annotation['label'] + for e in reconstructed.edges) + + self.assertIsInstance(reconstructed, AnnotatedGraph) + self.assertEqual(reconstructed.vertices, graph.vertices) + self.assertEqual(reconstructed.edges, graph.edges) + self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'}) + self.assertEqual(edge_labels, {'from "1" to "2"'}) + + def test_export_import_jsonl_gz(self): + graph = AnnotatedGraph( + vertices=[ + AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}), + AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}), + ], + edges=[ + AnnotatedEdge( + id=3, + annotation={'label': 'from "1" to "2"'}, + head=0, + tail=1, + ), + ], + ) + + tempdir = tempfile.mkdtemp() + try: + filename = os.path.join(tempdir, 'output.jsonl.gz') + graph.export_jsonl(filename) + self.assertTrue(os.path.exists(filename)) + with open(filename, 'rb') as f: + perhaps_magic = f.read(2) + self.assertEqual(perhaps_magic, b'\037\213') + reconstructed = AnnotatedGraph.import_jsonl(filename) + finally: + shutil.rmtree(tempdir) + + vertex_labels = set(v.annotation['label'] + for v in reconstructed.vertices) + edge_labels = set(e.annotation['label'] + for e in reconstructed.edges) + + self.assertIsInstance(reconstructed, AnnotatedGraph) + self.assertEqual(reconstructed.vertices, graph.vertices) + self.assertEqual(reconstructed.edges, graph.edges) + self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'}) + self.assertEqual(edge_labels, {'from "1" to "2"'})