diff --git a/clams/__init__.py b/clams/__init__.py index 6943815..bd1753f 100644 --- a/clams/__init__.py +++ b/clams/__init__.py @@ -1,10 +1,8 @@ +import argparse import sys -from mmif import __specver__ - +import mmif from clams import develop -from clams.mmif_utils import source -from clams.mmif_utils import rewind from clams.app import * from clams.app import __all__ as app_all from clams.appmetadata import AppMetadata @@ -16,34 +14,41 @@ def prep_argparser(): - import argparse parser = argparse.ArgumentParser() parser.add_argument( '-v', '--version', action='version', - version=version_template.format(__version__, __specver__) + version=version_template.format(__version__, mmif.__specver__) ) subparsers = parser.add_subparsers(title='sub-command', dest='subcmd') - for subcmd_module in [source, rewind, develop]: - subcmd_name = subcmd_module.__name__.rsplit('.')[-1] - subcmd_parser = subcmd_module.prep_argparser(add_help=False) - subparsers.add_parser(subcmd_name, parents=[subcmd_parser], - help=subcmd_module.describe_argparser()[0], - description=subcmd_module.describe_argparser()[1], - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - return parser + return parser, subparsers def cli(): - parser = prep_argparser() + parser, subparsers = prep_argparser() + cli_modules = {} + # thinly wrap all `mmif` subcommands + # this is primarily for backward compatibility for `souce` and `rewind` subcmds + to_register = list(mmif.find_all_modules('mmif.utils.cli')) + # then add my own subcommands + to_register.append(develop) + for cli_module in to_register: + cli_module_name = cli_module.__name__.rsplit('.')[-1] + cli_modules[cli_module_name] = cli_module + subcmd_parser = cli_module.prep_argparser(add_help=False) + subparsers.add_parser(cli_module_name, parents=[subcmd_parser], + help=cli_module.describe_argparser()[0], + description=cli_module.describe_argparser()[1], + formatter_class=argparse.RawDescriptionHelpFormatter, + ) if len(sys.argv) == 1: parser.print_help(sys.stderr) sys.exit(1) args = parser.parse_args() - if args.subcmd == 'source': - source.main(args) - if args.subcmd == 'rewind': - rewind.main(args) - if args.subcmd == 'develop': - develop.main(args) + if args.subcmd not in cli_modules: + parser.print_help(sys.stderr) + else: + cli_modules[args.subcmd].main(args) + +if __name__ == '__main__': + cli() \ No newline at end of file diff --git a/clams/mmif_utils/__init__.py b/clams/mmif_utils/__init__.py deleted file mode 100644 index 4ece749..0000000 --- a/clams/mmif_utils/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from clams.mmif_utils import rewind -from clams.mmif_utils import source - diff --git a/clams/mmif_utils/rewind.py b/clams/mmif_utils/rewind.py deleted file mode 100644 index c8bb9e1..0000000 --- a/clams/mmif_utils/rewind.py +++ /dev/null @@ -1,116 +0,0 @@ -""" -This module provides a CLI to rewind a MMIF from a CLAMS pipeline. -""" -import argparse -import sys -import textwrap - -import mmif - - -def prompt_user(mmif_obj: mmif.Mmif) -> int: - """ - Function to ask user to choose the rewind range. - """ - - ## Give a user options (#, "app", "timestamp") - time order - n = len(mmif_obj.views) - i = 0 # option number - aname = "" - a = 0 - # header - print("\n" + "{:<8} {:<8} {:<30} {:<100}".format("view-num", "app-num", "timestamp", "app")) - for view in reversed(mmif_obj.views): - if view.metadata.app != aname: - aname = view.metadata.app - a += 1 - i += 1 - print("{:<8} {:<8} {:<30} {:<100}".format(i, a, str(view.metadata.timestamp), str(view.metadata.app))) - - ## User input - return int(input("\nEnter the number to delete from that point by rewinding: ")) - - -def rewind_mmif(mmif_obj: mmif.Mmif, choice: int, choice_is_viewnum: bool = True) -> mmif.Mmif: - """ - Rewind MMIF by deleting the last N views. - The number of views to rewind is given as a number of "views", or number of "producer apps". - By default, the number argument is interpreted as the number of "views". - Note that when the same app is repeatedly run in a CLAMS pipeline and produces multiple views in a row, - rewinding in "app" mode will rewind all those views at once. - - :param mmif_obj: mmif object - :param choice: number of views to rewind - :param choice_is_viewnum: if True, choice is the number of views to rewind. If False, choice is the number of producer apps to rewind. - :return: rewound mmif object - - """ - if choice_is_viewnum: - for vid in list(v.id for v in mmif_obj.views)[-1:-choice-1:-1]: - mmif_obj.views._items.pop(vid) - else: - app_count = 0 - cur_app = "" - vid_to_pop = [] - for v in reversed(mmif_obj.views): - vid_to_pop.append(v.id) - if app_count >= choice: - break - if v.metadata.app != cur_app: - app_count += 1 - cur_app = v.metadata.app - for vid in vid_to_pop: - mmif_obj.views._items.pop(vid) - return mmif_obj - - -def describe_argparser(): - """ - returns two strings: one-line description of the argparser, and addition material, - which will be shown in `clams --help` and `clams --help`, respectively. - """ - oneliner = 'provides CLI to rewind a MMIF from a CLAMS pipeline.' - additional = textwrap.dedent(""" - MMIF rewinder rewinds a MMIF by deleting the last N views. - N can be specified as a number of views, or a number of producer apps. """) - return oneliner, oneliner + '\n\n' + additional - - -def prep_argparser(**kwargs): - parser = argparse.ArgumentParser(description=describe_argparser()[1], - formatter_class=argparse.RawDescriptionHelpFormatter, **kwargs) - parser.add_argument("IN_MMIF_FILE", - nargs="?", type=argparse.FileType("r"), - default=None if sys.stdin.isatty() else sys.stdin, - help='input MMIF file path, or STDIN if `-` or not provided.') - parser.add_argument("OUT_MMIF_FILE", - nargs="?", type=argparse.FileType("w"), - default=sys.stdout, - help='output MMIF file path, or STDOUT if `-` or not provided.') - parser.add_argument("-p", '--pretty', action='store_true', - help="Pretty-print rewound MMIF") - parser.add_argument("-n", '--number', default="0", type=int, - help="Number of views or apps to rewind, must be a positive integer. " - "If 0, the user will be prompted to choose. (default: 0)") - parser.add_argument("-m", '--mode', choices=['app', 'view'], default='view', - help="Choose to rewind by number of views or number of producer apps. (default: view)") - return parser - - -def main(args): - mmif_obj = mmif.Mmif(args.IN_MMIF_FILE.read()) - - if args.number == 0: # If user doesn't know how many views to rewind, give them choices. - choice = prompt_user(mmif_obj) - else: - choice = args.number - if not isinstance(choice, int) or choice <= 0: - raise ValueError(f"Only can rewind by a positive number of views. Got {choice}.") - - args.OUT_MMIF_FILE.write(rewind_mmif(mmif_obj, choice, args.mode == 'view').serialize(pretty=args.pretty)) - - -if __name__ == "__main__": - parser = prep_argparser() - args = parser.parse_args() - main(args) diff --git a/clams/mmif_utils/source.py b/clams/mmif_utils/source.py deleted file mode 100644 index f07d482..0000000 --- a/clams/mmif_utils/source.py +++ /dev/null @@ -1,296 +0,0 @@ -""" -This module provides a class for creating a "source" MMIF JSON object. -""" -import argparse -import itertools -import json -import pathlib -import sys -import textwrap -from typing import Union, Generator, List, Optional, Iterable -from urllib.parse import urlparse - -from mmif import Mmif, Document, DocumentTypes, __specver__ -from mmif.serialize.mmif import MmifMetadata - -__all__ = ['WorkflowSource'] - -DOC_JSON = Union[str, dict] -DOC = Union[DOC_JSON, Document] -METADATA_JSON = Union[str, dict] -METADATA = Union[METADATA_JSON, MmifMetadata] - - -class WorkflowSource: - """ - A WorkflowSource object is used at the beginning of a - CLAMS workflow to populate a new MMIF file with media. - - The same WorkflowSource object can be used repeatedly - to generate multiple MMIF objects. - - :param common_documents_json: - JSON doc_lists for any documents that should be common - to all MMIF objects produced by this workflow. - - :param common_metadata_json: - JSON doc_lists for metadata that should be common to - all MMIF objects produced by this workflow. - """ - mmif: Mmif - - def __init__( - self, - common_documents_json: Optional[List[DOC_JSON]] = None, - common_metadata_json: Optional[METADATA_JSON] = None - ) -> None: - if common_documents_json is None: - common_documents_json = [] - if common_metadata_json is None: - common_metadata_json = dict() - self.mmif_start: dict = {"documents": [json.loads(document) - if isinstance(document, str) - else document - for document in common_documents_json], - "views": [], - "metadata": { - "mmif": f"http://mmif.clams.ai/{__specver__}", - **common_metadata_json - }} - self.prime() - - def add_document(self, document: Union[str, dict, Document]) -> None: - """ - Adds a document to the working source MMIF. - - When you're done, fetch the source MMIF with produce(). - - :param document: the medium to add, as a JSON dict - or string or as a MMIF Medium object - """ - if isinstance(document, (str, dict)): - document = Document(document) - self.mmif.add_document(document) - - def change_metadata(self, key: str, value): - """ - Adds or changes a metadata entry in the working source MMIF. - - :param key: the desired key of the metadata property - :param value: the desired value of the metadata property - """ - self.mmif.metadata[key] = value - - def prime(self) -> None: - """ - Primes the WorkflowSource with a fresh MMIF object. - - Call this method if you want to reset the WorkflowSource - without producing a MMIF object with produce(). - """ - self.mmif = Mmif(self.mmif_start) - - def produce(self) -> Mmif: - """ - Returns the source MMIF and resets the WorkflowSource. - - Call this method once you have added all the documents - for your Workflow. - - :return: the current MMIF object that has been prepared - """ - source = self.mmif - self.prime() - return source - - def __call__( - self, - documents: Optional[List[DOC]] = None, - metadata: Optional[METADATA] = None - ) -> Mmif: - """ - Callable API that produces a new MMIF object from - this workflow source given a list of documents and - a metadata object. - - Call with no parameters to produce the default MMIF - object from ``self.mmif_start``. - - :param documents: a list of additional documents to add - :param metadata: additional metadata fields to add - :return: the produced MMIF object - """ - if documents is None: - documents = [] - if metadata is None: - metadata = {} - - if isinstance(documents, str): - documents = json.loads(documents) - if isinstance(metadata, MmifMetadata): - metadata = metadata.serialize() # pytype: disable=attribute-error # bug in pytype? (https://github.com/google/pytype/issues/533) - if isinstance(metadata, str): - metadata = json.loads(metadata) - - for document in documents: - self.add_document(document) - for key, value in metadata.items(): - self.change_metadata(key, value) - return self.produce() - - def from_data( - self, - doc_lists: Iterable[List[DOC]], - metadata_objs: Optional[Iterable[Optional[METADATA]]] = None - ) -> Generator[Mmif, None, None]: - """ - Provided with an iterable of document lists and an - optional iterable of metadata objects, generates - MMIF objects produced from that data. - - ``doc_lists`` and ``metadata_objs`` should be matched pairwise, - so that if they are zipped together, each pair defines - a single MMIF object from this workflow source. - - :param doc_lists: an iterable of document lists to generate MMIF from - :param metadata_objs: an iterable of metadata objects paired with the document lists - :return: a generator of produced MMIF files from the data - """ - if metadata_objs is None: - metadata_objs = itertools.repeat(None) - for documents, metadata in zip(doc_lists, metadata_objs): - yield self(documents, metadata) - - def __iter__(self): - """ - Endlessly produces MMIF directly from ``self.mmif_start``. - - If called after adding documents or changing metadata, - these changes are discarded, as the workflow source - gets re-primed. - """ - self.prime() - while True: - yield self.produce() - - -def generate_source_mmif_from_file(documents, prefix=None, scheme='file', **ignored): - at_types = { - 'video': DocumentTypes.VideoDocument, - 'audio': DocumentTypes.AudioDocument, - 'text': DocumentTypes.TextDocument, - 'image': DocumentTypes.ImageDocument - } - pl = WorkflowSource() - if prefix: - prefix = pathlib.PurePosixPath(prefix) - if not prefix.is_absolute(): - raise ValueError(f"prefix must be an absolute path; given \"{prefix}\".") - for doc_id, arg in enumerate(documents, start=1): - arg = arg.strip() - if len(arg) < 1: - continue - result = arg.split(':', maxsplit=1) - if len(result) == 2 and result[0].split('/', maxsplit=1)[0] in at_types: - mime, location = result - else: - raise ValueError( - f'Invalid MIME types, or no MIME type and/or path provided, in argument {doc_id-1} to source' - ) - location_uri = urlparse(location, scheme=scheme) - if location_uri.scheme == 'file': - location = pathlib.PurePosixPath(location_uri.path) - if prefix and location.is_absolute(): - raise ValueError(f"when prefix is used, file location must not be an absolute path; given \"{location}\".") - elif not prefix and not location.is_absolute(): - raise ValueError(f'file location must be an absolute path, or --prefix must be used; given \"{location}\".') - elif prefix and not location.is_absolute(): - location = prefix / location - location = str(location) - doc = Document() - doc.at_type = at_types[mime.split('/', maxsplit=1)[0]] - doc.properties.location = f"{location_uri.scheme}://{location if not location.startswith(location_uri.scheme) else location[len(location_uri.scheme)+3:]}" - doc.properties.id = f'd{doc_id}' - doc.properties.mime = mime - pl.add_document(doc) - return pl.produce().serialize(pretty=True) - - -def describe_argparser(): - """ - returns two strings: one-line description of the argparser, and addition material, - which will be shown in `clams --help` and `clams --help`, respectively. - """ - oneliner = 'provides CLI to create a "source" MMIF json.' - additional = textwrap.dedent(""" - A source MMIF is a MMIF with a list of source documents but empty views. - It can be used as a starting point for a CLAMS workflow. """) - return oneliner, oneliner + '\n\n' + additional - - -def prep_argparser(**kwargs): - import pkgutil - import re - import importlib - discovered_docloc_plugins = { - name[len('mmif_docloc_'):]: importlib.import_module(name) for _, name, _ in pkgutil.iter_modules() if - re.match(r'mmif[-_]docloc[-_]', name) - } - parser = argparse.ArgumentParser(description=describe_argparser()[1], formatter_class=argparse.RawTextHelpFormatter, **kwargs) - parser.add_argument( - 'documents', - default=None, - action='store', - nargs='+', - help='This list of documents MUST be colon-delimited pairs of document types and file locations. A document ' - 'type can be one of `audio`, `video`, `text`, `image`, or a MIME type string (such as video/mp4). The ' - 'file locations MUST be valid URI strings (e.g. `file:///path/to/file.mp4`, or URI scheme part can be ' - 'omitted, when `--scheme` flag is used). Note that when `file://` scheme is used (default), locations ' - 'MUST BE POSIX forms (Windows forms are not supported). The output will be a MMIF file containing a ' - 'document for each of those file paths, with the appropriate ``@type`` and MIME type (if given).' - ) - parser.add_argument( - '-p', '--prefix', - default=None, - metavar='PATH', - nargs='?', - help='An absolute path to use as prefix for file paths (ONLY WORKS with the default `file://` scheme, ignored ' - 'otherwise. MUST BE a POSIX form, Windows form is not supported). If prefix is set, document file paths ' - 'MUST be relative. Useful when creating source MMIF files from a system that\'s different from the ' - 'environment that actually runs the workflow (e.g. in a container).' - ) - parser.add_argument( - '-o', '--output', - default=None, - action='store', - nargs='?', - help='A name of a file to capture a generated MMIF json. When not given, MMIF is printed to stdout.' - ) - scheme_help = 'A scheme to associate with the document location URI. When not given, the default scheme is `file://`.' - if len(discovered_docloc_plugins) > 0: - plugin_help = [f'"{scheme_name}" ({scheme_plugin.help() if "help" in dir(scheme_plugin) else "help msg not provided by developer"})' - for scheme_name, scheme_plugin in discovered_docloc_plugins.items()] - scheme_help += ' (AVAILABLE ADDITIONAL SCHEMES) ' + ', '.join(plugin_help) - parser.add_argument( - '-s', '--scheme', - default='file', - action='store', - nargs='?', - help=scheme_help - ) - return parser - - -def main(args): - if args.output: - out_f = open(args.output, 'w') - else: - out_f = sys.stdout - mmif = generate_source_mmif_from_file(windows_path=False, **vars(args)) - out_f.write(mmif) - return mmif - -if __name__ == '__main__': - parser = prep_argparser() - args = parser.parse_args() - main(args) diff --git a/requirements.txt b/requirements.txt index 69e85a6..9efc988 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -mmif-python==1.0.19 +mmif-python==1.0.20 Flask>=2 Flask-RESTful>=0.3.9 diff --git a/tests/test_clamscli.py b/tests/test_clamscli.py deleted file mode 100644 index 43f4e99..0000000 --- a/tests/test_clamscli.py +++ /dev/null @@ -1,220 +0,0 @@ -import contextlib -import copy -import io -import os -import unittest -import unittest.mock - -from mmif.serialize import Mmif -from mmif.vocabulary import DocumentTypes, AnnotationTypes - -import clams -from clams.mmif_utils import rewind -from clams.mmif_utils import source - - -class TestCli(unittest.TestCase): - def setUp(self) -> None: - self.parser = clams.prep_argparser() - - def test_clams_cli(self): - stdout = io.StringIO() - with self.assertRaises(SystemExit) as e, contextlib.redirect_stdout(stdout): - self.parser.parse_args("-v".split()) - self.assertEqual(e.exception.code, 0) - self.assertEqual(stdout.getvalue().strip(), - clams.version_template.format(clams.__version__, clams.__specver__)) - - -class TestSource(unittest.TestCase): - - def setUp(self) -> None: - self.parser = clams.source.prep_argparser() - self.prefix = None - self.scheme = None - self.docs = [] - - def get_params(self): - - params = [] - if self.prefix: - params.extend(f'--prefix {self.prefix}'.split()) - if self.scheme: - params.extend(f'--scheme {self.scheme}'.split()) - params.extend(self.docs) - return params - - def generate_source_mmif(self): - - # to suppress output (otherwise, set to stdout by default - args = self.parser.parse_args(self.get_params()) - args.output = os.devnull - - return source.main(args) - - def test_accept_file_paths(self): - self.docs.append("video:/a/b/c.mp4") - self.docs.append('text:/a/b/c.txt') - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - self.assertTrue(all(map(lambda x: x.location_scheme() == 'file', source_mmif.documents))) - - # relative path - self.docs.append('audio:a/b/c.mp3') - with self.assertRaises(ValueError): - self.generate_source_mmif() - - @unittest.mock.patch('os.name', 'nt') - def test_on_windows(self): - self.test_accept_file_paths() - - def test_accept_prefixed_file_paths(self): - self.prefix = '/a/b' - self.docs.append("video:c.mp4") - self.docs.append("text:c.txt") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - - # absolute path + prefix flag - self.docs.append("audio:/c.mp3") - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_reject_relative_prefix(self): - self.prefix = '/' - self.docs.append("video:c.mp4") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 1) - - self.prefix = '.' - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_reject_unknown_mime(self): - self.docs.append("unknown_mime/more_unknown:/c.mp4") - with self.assertRaises(ValueError): - self.generate_source_mmif() - - def test_accept_scheme_files(self): - self.scheme = 'baapb' - self.docs.append("video:cpb-aacip-123-4567890.video") - self.docs.append("audio:cpb-aacip-111-1111111.audio") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - self.assertTrue(all(map(lambda x: x.location_scheme() == self.scheme, source_mmif.documents))) - - def test_generate_mixed_scheme(self): - self.scheme = 'baapb' - self.docs.append("video:file:///data/cpb-aacip-123-4567890.mp4") - self.docs.append("audio:cpb-aacip-111-1111111.audio") - source_mmif = Mmif(self.generate_source_mmif()) - self.assertEqual(len(source_mmif.documents), 2) - schemes = set(doc.location_scheme() for doc in source_mmif.documents) - self.assertEqual(len(schemes), 2) - self.assertTrue('baapb' in schemes) - self.assertTrue('file' in schemes) - - -class TestRewind(unittest.TestCase): - def setUp(self): - self.dummy_app_one = ExampleApp() - self.dummy_app_one.metadata.identifier = "dummy_app_one" - self.dummy_app_two = ExampleApp() - self.dummy_app_two.metadata.identifier = "dummy_app_two" - - # mmif we add views to - self.mmif_one = Mmif( - { - "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, - "documents": [], - "views": [], - } - ) - - # baseline empty mmif for comparison - self.empty_mmif = Mmif( - { - "metadata": {"mmif": "http://mmif.clams.ai/1.0.0"}, - "documents": [], - "views": [], - } - ) - - def test_view_rewind(self): - """ - Tests the use of "view-rewiding" to remove multiple views from a single app. - """ - # Regular Case - mmif_added_views = self.dummy_app_one.mmif_add_views(self.mmif_one, 10) - self.assertEqual(len(mmif_added_views.views), 10) - rewound = rewind.rewind_mmif(mmif_added_views, 5) - self.assertEqual(len(rewound.views), 5) - # rewinding is done "in-place" - self.assertEqual(len(rewound.views), len(mmif_added_views.views)) - - def test_app_rewind(self): - # Regular Case - app_one_views = 3 - app_two_views = 2 - app_one_out = self.dummy_app_one.mmif_add_views(self.mmif_one, app_one_views) - app_two_out = self.dummy_app_two.mmif_add_views(app_one_out, app_two_views) - self.assertEqual(len(app_two_out.views), app_one_views + app_two_views) - rewound = rewind.rewind_mmif(app_two_out, 1, choice_is_viewnum=False) - self.assertEqual(len(rewound.views), app_one_views) - -def compare_views(a: Mmif, b: Mmif) -> bool: - perfect_match = True - for view_a, view_b in zip(a.views, b.views): - if view_a != view_b: - perfect_match = False - return perfect_match - - -class ExampleApp(clams.app.ClamsApp): - """This is a barebones implementation of a CLAMS App - used to generate simple Views within a mmif object - for testing purposes. The three methods here all streamline - the mmif annotation process for the purposes of repeated insertion - and removal. - """ - - app_version = "lorem_ipsum" - - def _appmetadata(self): - pass - - def _annotate(self, mmif: Mmif, message: str, idx: int, **kwargs): - if type(mmif) is not Mmif: - mmif_obj = Mmif(mmif, validate=False) - else: - mmif_obj = mmif - - new_view = mmif_obj.new_view() - self.sign_view(new_view, runtime_conf=kwargs) - self.gen_annotate(new_view, message, idx) - - d1 = DocumentTypes.VideoDocument - d2 = DocumentTypes.from_str(f"{str(d1)[:-1]}99") - if mmif.get_documents_by_type(d2): - new_view.new_annotation(AnnotationTypes.TimePoint, "tp1") - if "raise_error" in kwargs and kwargs["raise_error"]: - raise ValueError - return mmif - - def gen_annotate(self, mmif_view, message, idx=0): - mmif_view.new_contain( - AnnotationTypes.TimeFrame, **{"producer": "dummy-producer"} - ) - ann = mmif_view.new_annotation( - AnnotationTypes.TimeFrame, "a1", start=10, end=99 - ) - ann.add_property("f1", message) - - def mmif_add_views(self, mmif_obj, idx: int): - """Helper Function to add an arbitrary number of views to a mmif""" - for i in range(idx): - mmif_obj = self._annotate(mmif_obj, message=f"message {i}", idx=idx) - return mmif_obj - -if __name__ == '__main__': - unittest.main()