Skip to content
66 changes: 63 additions & 3 deletions src/jsonid/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@
from datetime import timezone

try:
import pronom
import registry_data
import version
except ModuleNotFoundError:
try:
from src.jsonid import registry_data, version
from src.jsonid import pronom, registry_data, version
except ModuleNotFoundError:
from jsonid import registry_data, version
from jsonid import pronom, registry_data, version

logger = logging.getLogger(__name__)


def exportJSON() -> None: # pylint: disable=C0103
"""Export to JSON."""
logger.debug("exporting registry ad JSON")
logger.debug("exporting registry as JSON")
data = registry_data.registry()
json_obj = []
id_ = {
Expand All @@ -35,3 +36,62 @@ def exportJSON() -> None: # pylint: disable=C0103
for datum in data:
json_obj.append(datum.json())
print(json.dumps(json_obj, indent=2))


def exportPRONOM() -> None:
"""Export a PRONOM compatible set of signatures."""
logger.debug("exporting registry as PRONOM")
data = registry_data.registry()
all_sequences = []

formats = []

for datum in data:
id_ = datum.json()["identifier"]
name_ = datum.json()["name"][0]["@en"]
markers = datum.json()["markers"]

format_sequences = []

try:
sequences = pronom.process_markers(markers.copy())
all_sequences.append((id_, name_, sequences))
format_sequences.append(sequences)
except pronom.UnprocessableEntity as err:
logger.error(
"%s %s: cannot handle: %s",
id_,
name_,
err,
)
for marker in markers:
logger.debug("--- START ---")
logger.debug("marker: %s", marker)
logger.debug("--- END ---")
continue

format = pronom.Format(
id=0,
name=name_,
version="",
puid=id_,
mime="TODO",
classification="structured text", # TODO: magic
external_signatures=[
pronom.ExternalSignature(
id=0,
signature="JSON",
type="TODO",
)
],
internal_signatures=format_sequences[0],
priorities=[],
)

formats.append(format)

pronom.process_formats_and_save(formats, "abc.xml")


def exportPRONOMXML() -> None:
"""Export a PRONOM compatible set of signatures."""
30 changes: 30 additions & 0 deletions src/jsonid/export_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Helpers for the export functions."""

import datetime
from datetime import timezone
from typing import Final
from xml.dom.minidom import parseString

UTC_TIME_FORMAT: Final[str] = "%Y-%m-%dT%H:%M:%SZ"


def get_utc_timestamp_now():
"""Get a formatted UTC timestamp for 'now' that can be used when
a timestamp is needed.
"""
return datetime.datetime.now(timezone.utc).strftime(UTC_TIME_FORMAT)


def new_prettify(c):
"""Remove excess newlines from DOM output.

via: https://stackoverflow.com/a/14493981
"""
reparsed = parseString(c)
return "\n".join(
[
line
for line in reparsed.toprettyxml(indent=" " * 2).split("\n")
if line.strip()
]
)
25 changes: 17 additions & 8 deletions src/jsonid/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import time
from typing import Union
from typing import Final, Union

try:
import htm_template
Expand Down Expand Up @@ -75,6 +75,15 @@ def format_marker(marker_text: str, marker: dict) -> str:
return f"{marker_text}{marker_formatted}\n"


TYPE_BOOL: Final[str] = "bool"
TYPE_FLOAT: Final[str] = "float"
TYPE_INTEGER: Final[str] = "integer"
TYPE_LIST: Final[str] = "list"
TYPE_NONE: Final[str] = "NoneType"
TYPE_MAP: Final[str] = "map"
TYPE_STRING: Final[str] = "string"


def substitute_type_text(replace_me: Union[str, type]):
"""Output a text substitution for a type that will otherwise not
pretty-print.
Expand All @@ -83,19 +92,19 @@ def substitute_type_text(replace_me: Union[str, type]):
# pylint: disable=R0911

if replace_me.__name__ == "dict":
return "map"
return TYPE_MAP
if replace_me.__name__ == "int":
return "integer"
return TYPE_INTEGER
if replace_me.__name__ == "list":
return "list"
return TYPE_LIST
if replace_me.__name__ == "str":
return "string"
return TYPE_STRING
if replace_me.__name__ == "float":
return "float"
return TYPE_NONE
if replace_me.__name__ == "bool":
return "bool"
return TYPE_BOOL
if replace_me.__name__ == "NoneType":
return "NoneType"
return TYPE_NONE
if not isinstance(replace_me, type):
pass
return replace_me
Expand Down
4 changes: 3 additions & 1 deletion src/jsonid/jsonid.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def main() -> None:
"--pronom",
help="return a PRONOM-centric view of the results",
required=False,
action="store_true",
)
parser.add_argument(
"--export",
Expand Down Expand Up @@ -220,7 +221,8 @@ def main() -> None:
if args.registry:
raise NotImplementedError("custom registry is not yet available")
if args.pronom:
raise NotImplementedError("pronom view is not yet implemented")
export.exportPRONOM()
sys.exit()
if args.language:
raise NotImplementedError("multiple languages are not yet implemented")
if args.export:
Expand Down
Loading
Loading