Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions refcycle/annotated_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from __future__ import unicode_literals

import collections
import gzip
import json
import os
import subprocess
Expand Down Expand Up @@ -222,6 +223,40 @@ def to_json(self):
# Ensure that we always return unicode output on Python 2.
return six.text_type(json.dumps(obj, ensure_ascii=False))

def to_jsonl_file(self, fp):
"""
Write JSONL (one JSON object per line) to a file-like object.

"""
# Edges are distinguished from vertices only by the presence of
# 'head' and 'tail' keys.
for vertex in self.vertices:
json.dump(
{
"id": vertex.id,
"annotation": vertex.annotation,
},
fp,
ensure_ascii=True,
separators=(",", ":"),
check_circular=False,
)
fp.write("\n")
for edge in self._edges:
json.dump(
{
"id": edge.id,
"annotation": edge.annotation,
"head": edge.head,
"tail": edge.tail,
},
fp,
ensure_ascii=True,
separators=(",", ":"),
check_circular=False,
)
fp.write("\n")

@classmethod
def from_json(cls, json_graph):
"""
Expand Down Expand Up @@ -250,6 +285,30 @@ def from_json(cls, json_graph):

return cls(vertices=vertices, edges=edges)

@classmethod
def from_jsonl_file(cls, fp):
"""
Reconstruct the graph from a graph exported to JSONL.

"""
vertices = []
edges = []
for line in fp:
obj = json.loads(line)
if "head" in obj:
edges.append(AnnotatedEdge(
id=obj["id"],
annotation=obj["annotation"],
head=obj["head"],
tail=obj["tail"],
))
else:
vertices.append(AnnotatedVertex(
id=obj["id"],
annotation=obj["annotation"],
))
return cls(vertices=vertices, edges=edges)

def export_json(self, filename):
"""
Export graph in JSON form to the given file.
Expand All @@ -270,6 +329,38 @@ def import_json(cls, filename):
json_graph = f.read().decode('utf-8')
return cls.from_json(json_graph)

def export_jsonl(self, filename):
"""
Export graph in JSONL form to the given file.

If the file name ends in ``.gz``, then it will be written
gzip-encoded.

"""
if filename.endswith('.gz'):
opener = gzip.GzipFile
else:
opener = open
with opener(filename, 'wb') as f:
self.to_jsonl_file(f)

@classmethod
def import_jsonl(cls, filename):
"""
Import graph from the given file. The file is expected
to contain UTF-8 encoded JSONL data. It may be gzip-encoded.

"""
with open(filename, 'rb') as f:
perhaps_magic = f.read(2)
f.seek(0, 0)
if perhaps_magic == b'\037\213':
# This is a gzip file.
fp = gzip.GzipFile(fileobj=f)
else:
fp = f
return cls.from_jsonl_file(fp)

###########################################################################
### Graphviz output.
###########################################################################
Expand Down
13 changes: 11 additions & 2 deletions refcycle/object_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,20 @@ def __new__(cls, objects=()):
### Annotations.
###########################################################################

def annotated(self):
def annotated(self, annotated_references=annotated_references,
object_annotation=object_annotation,
graph_class=AnnotatedGraph):
"""
Annotate this graph, returning an AnnotatedGraph object
with the same structure.

annotated_references takes an object and a mapping from
referents of that object to edge annotations for that reference.
object_annotation takes an object and returns the annotation for
that object.
graph_class is the (sub)class of the ``AnnotatedGraph`` to
construct.

"""
# Build up dictionary of edge annotations.
edge_annotations = {}
Expand Down Expand Up @@ -285,7 +294,7 @@ def annotated(self):
for edge in self.edges
]

return AnnotatedGraph(
return graph_class(
vertices=annotated_vertices,
edges=annotated_edges,
)
Expand Down
107 changes: 107 additions & 0 deletions refcycle/test/test_annotated_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,110 @@ def test_dot_quoting(self):
self.assertIsInstance(dot, six.text_type)
self.assertIn(r'"vertex \"1\""', dot)
self.assertIn(r'"from \"1\" to \"2\""', dot)

def test_to_from_jsonl(self):
graph = AnnotatedGraph(
vertices=[
AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}),
AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}),
],
edges=[
AnnotatedEdge(
id=3,
annotation={'label': 'from "1" to "2"'},
head=0,
tail=1,
),
],
)
out_fp = six.StringIO()
graph.to_jsonl_file(out_fp)
jsonl_text = out_fp.getvalue()
in_fp = six.StringIO(jsonl_text)
reconstructed = AnnotatedGraph.from_jsonl_file(in_fp)

vertex_labels = set(v.annotation['label']
for v in reconstructed.vertices)
edge_labels = set(e.annotation['label']
for e in reconstructed.edges)

self.assertIsInstance(reconstructed, AnnotatedGraph)
self.assertEqual(reconstructed.vertices, graph.vertices)
self.assertEqual(reconstructed.edges, graph.edges)
self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'})
self.assertEqual(edge_labels, {'from "1" to "2"'})

def test_export_import_jsonl(self):
graph = AnnotatedGraph(
vertices=[
AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}),
AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}),
],
edges=[
AnnotatedEdge(
id=3,
annotation={'label': 'from "1" to "2"'},
head=0,
tail=1,
),
],
)

tempdir = tempfile.mkdtemp()
try:
filename = os.path.join(tempdir, 'output.jsonl')
graph.export_jsonl(filename)
self.assertTrue(os.path.exists(filename))
reconstructed = AnnotatedGraph.import_jsonl(filename)
finally:
shutil.rmtree(tempdir)

vertex_labels = set(v.annotation['label']
for v in reconstructed.vertices)
edge_labels = set(e.annotation['label']
for e in reconstructed.edges)

self.assertIsInstance(reconstructed, AnnotatedGraph)
self.assertEqual(reconstructed.vertices, graph.vertices)
self.assertEqual(reconstructed.edges, graph.edges)
self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'})
self.assertEqual(edge_labels, {'from "1" to "2"'})

def test_export_import_jsonl_gz(self):
graph = AnnotatedGraph(
vertices=[
AnnotatedVertex(id=0, annotation={'label': 'vertex "1"'}),
AnnotatedVertex(id=1, annotation={'label': 'vertex "2"'}),
],
edges=[
AnnotatedEdge(
id=3,
annotation={'label': 'from "1" to "2"'},
head=0,
tail=1,
),
],
)

tempdir = tempfile.mkdtemp()
try:
filename = os.path.join(tempdir, 'output.jsonl.gz')
graph.export_jsonl(filename)
self.assertTrue(os.path.exists(filename))
with open(filename, 'rb') as f:
perhaps_magic = f.read(2)
self.assertEqual(perhaps_magic, b'\037\213')
reconstructed = AnnotatedGraph.import_jsonl(filename)
finally:
shutil.rmtree(tempdir)

vertex_labels = set(v.annotation['label']
for v in reconstructed.vertices)
edge_labels = set(e.annotation['label']
for e in reconstructed.edges)

self.assertIsInstance(reconstructed, AnnotatedGraph)
self.assertEqual(reconstructed.vertices, graph.vertices)
self.assertEqual(reconstructed.edges, graph.edges)
self.assertEqual(vertex_labels, {'vertex "1"', 'vertex "2"'})
self.assertEqual(edge_labels, {'from "1" to "2"'})