Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions json2encoding_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""json2json will convert JSON compatible objects from one encoding
to UTF-8.
"""

from src.utils import json2encoding


def main():
"""Primary entry point for this script."""
json2encoding.main()


if __name__ == "__main__":
main()
14 changes: 14 additions & 0 deletions json2json_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""json2json will convert JSON compatible objects from one encoding
to UTF-8.
"""

from src.utils import json2json


def main():
"""Primary entry point for this script."""
json2json.main()


if __name__ == "__main__":
main()
108 changes: 108 additions & 0 deletions src/utils/json2encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""Write a UTF-8 file to a different encoding."""

import argparse
import logging
import sys
import time

from pathlib import Path
from typing import Final

# Set up logging.
logging.basicConfig(
format="%(asctime)-15s %(levelname)s :: %(filename)s:%(lineno)s:%(funcName)s() :: %(message)s", # noqa: E501
datefmt="%Y-%m-%d %H:%M:%S",
level="INFO",
handlers=[
logging.StreamHandler(),
],
)


# Default to UTC time.
logging.Formatter.converter = time.gmtime

logger = logging.getLogger(__name__)


supported_encodings: Final[list] = [
"UTF-8",
"UTF-16",
"UTF-16BE",
"UTF-32",
"UTF-32BE",
"SHIFT-JIS",
"BIG5",
]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

        "UTF-8",
        "UTF-16",
        "UTF-16LE",
        "UTF-16BE",
        "UTF-32",
        "UTF-32LE",
        "UTF-32BE",
        "SHIFT-JIS",
        "BIG5",

I think these actually all are different and one main difference seems to be the inclusion of the byte-order-mark. We should check.



def write_json(path: str, output: str, encoding: str):
"""Write files with different encodings."""
if encoding.upper() not in supported_encodings:
logger.error(
"encoding: '%s' not supported, must be one of: '%s'",
encoding,
", ".join(supported_encodings),
)
sys.exit(1)
data = None
with open(path, "r") as input:
data = input.read()
data = data.encode(encoding)
with open(output, "wb") as output:
output.write(data)


def main():
"""Primary entry point for this script."""

parser = argparse.ArgumentParser(
prog="json2json",
description="read a plaintext file (usually JSON) and opt to output it in a different encodingss",
epilog="for more information visit https://github.com/ffdev-info/json-id",
)
parser.add_argument(
"--debug",
help="use debug loggng",
required=False,
action="store_true",
)
parser.add_argument(
"--input",
"-i",
help="file path to process",
required=False,
)
parser.add_argument(
"--output",
"-o",
help="output file path",
required=False,
)
parser.add_argument(
"--encoding",
"-e",
help="encoding to output as",
required=False,
)
parser.add_argument(
"--list",
"-l",
help="list encodings",
required=False,
action="store_true",
)
args = parser.parse_args()
logging.getLogger(__name__).setLevel(logging.DEBUG if args.debug else logging.INFO)
logger.debug("debug logging is configured")
if args.list:
print("available encodings:", ", ".join(supported_encodings))
sys.exit()
if not args.input and args.output:
parser.print_help(sys.stderr)
sys.exit()
write_json(path=args.input, output=args.output, encoding=args.encoding)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion src/utils/json2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def identify_plaintext_bytestream(path: str) -> Tuple[bool, str]:
async def identify_json(paths: list[str]):
"""Identify objects."""
for idx, path in enumerate(paths):
valid, data, _, _ = await identify_plaintext_bytestream(path)
valid, data = await identify_plaintext_bytestream(path)
if not valid:
continue
print(json.dumps(data, indent=2))
Expand Down
Loading