|
| 1 | +import argparse |
| 2 | +import logging |
| 3 | +import os |
| 4 | +import shutil |
| 5 | +import stat |
| 6 | +import sys |
| 7 | +import tempfile |
| 8 | +from pathlib import Path |
| 9 | +from typing import List, Optional, Tuple, Dict |
| 10 | +from urllib.parse import urlparse, ParseResult |
| 11 | + |
| 12 | +import git |
| 13 | +import nbformat |
| 14 | +import yaml |
| 15 | +from git import Repo |
| 16 | +from repo2docker import Repo2Docker |
| 17 | + |
| 18 | +from .cwltoolextractor import AnnotatedIPython2CWLToolConverter |
| 19 | + |
| 20 | +logger = logging.getLogger('repo2cwl') |
| 21 | + |
| 22 | + |
| 23 | +def _get_notebook_paths_from_dir(dir_path: str): |
| 24 | + notebooks_paths = [] |
| 25 | + for path, subdirs, files in os.walk(dir_path): |
| 26 | + for name in files: |
| 27 | + if name.endswith('.ipynb'): |
| 28 | + notebooks_paths.append(os.path.join(path, name)) |
| 29 | + return notebooks_paths |
| 30 | + |
| 31 | + |
| 32 | +def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bin_absolute_path: str, image_id: str) \ |
| 33 | + -> Tuple[Optional[Dict], Optional[str]]: |
| 34 | + with open(notebook_path) as fd: |
| 35 | + notebook = nbformat.read(fd, as_version=4) |
| 36 | + |
| 37 | + converter = AnnotatedIPython2CWLToolConverter.from_jupyter_notebook_node(notebook) |
| 38 | + |
| 39 | + if len(converter._variables) == 0: |
| 40 | + logger.info(f"Notebook {notebook_path} does not contains typing annotations. skipping...") |
| 41 | + return None, None |
| 42 | + script_relative_path = os.path.relpath(notebook_path, git_directory_absolute_path)[:-6] |
| 43 | + script_relative_parent_directories = script_relative_path.split(os.sep) |
| 44 | + if len(script_relative_parent_directories) > 1: |
| 45 | + script_absolute_name = os.path.join(bin_absolute_path, os.sep.join(script_relative_parent_directories[:-1])) |
| 46 | + os.makedirs( |
| 47 | + script_absolute_name, |
| 48 | + exist_ok=True) |
| 49 | + script_absolute_name = os.path.join(script_absolute_name, os.path.basename(script_relative_path)) |
| 50 | + else: |
| 51 | + script_absolute_name = os.path.join(bin_absolute_path, script_relative_path) |
| 52 | + script = os.linesep.join([ |
| 53 | + '#!/usr/bin/env ipython', |
| 54 | + '"""', |
| 55 | + 'DO NOT EDIT THIS FILE', |
| 56 | + 'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.', |
| 57 | + 'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl', |
| 58 | + '"""', |
| 59 | + converter._wrap_script_to_method(converter._tree, converter._variables) |
| 60 | + ]) |
| 61 | + with open(script_absolute_name, 'w') as fd: |
| 62 | + fd.write(script) |
| 63 | + tool = converter.cwl_command_line_tool(image_id) |
| 64 | + in_git_dir_script_file = os.path.join(bin_absolute_path, script_relative_path) |
| 65 | + tool_st = os.stat(in_git_dir_script_file) |
| 66 | + os.chmod(in_git_dir_script_file, tool_st.st_mode | stat.S_IEXEC) |
| 67 | + return tool, script_relative_path |
| 68 | + |
| 69 | + |
| 70 | +def existing_path(path: str): |
| 71 | + path = Path(path) |
| 72 | + if not path.is_dir(): |
| 73 | + raise ValueError('Directory does not exists') |
| 74 | + return path |
| 75 | + |
| 76 | + |
| 77 | +def parser_arguments(argv: List[str]): |
| 78 | + parser = argparse.ArgumentParser() |
| 79 | + parser.add_argument('repo', type=lambda uri: urlparse(uri, scheme='file'), nargs=1) |
| 80 | + parser.add_argument('-o', '--output', help='Output directory to store the generated cwl files', |
| 81 | + type=existing_path, |
| 82 | + required=True) |
| 83 | + return parser.parse_args(argv[1:]) |
| 84 | + |
| 85 | + |
| 86 | +def setup_logger(): |
| 87 | + handler = logging.StreamHandler(sys.stdout) |
| 88 | + handler.setLevel(logging.INFO) |
| 89 | + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| 90 | + handler.setFormatter(formatter) |
| 91 | + logger.addHandler(handler) |
| 92 | + |
| 93 | + |
| 94 | +def repo2cwl(argv: Optional[List[str]] = None): |
| 95 | + setup_logger() |
| 96 | + argv = sys.argv if argv is None else argv |
| 97 | + args = parser_arguments(argv) |
| 98 | + uri: ParseResult = args.repo[0] |
| 99 | + output_directory: Path = args.output |
| 100 | + supported_schemes = {'file', 'http', 'https', 'ssh'} |
| 101 | + if uri.scheme not in supported_schemes: |
| 102 | + raise ValueError(f'Supported schema uris: {supported_schemes}') |
| 103 | + local_git_directory = os.path.join(tempfile.mkdtemp(prefix='repo2cwl_'), 'repo') |
| 104 | + if uri.scheme == 'file': |
| 105 | + if not os.path.isdir(uri.path): |
| 106 | + raise ValueError(f'Directory does not exists') |
| 107 | + logger.info(f'copy repo to temp directory: {local_git_directory}') |
| 108 | + shutil.copytree(uri.path, local_git_directory) |
| 109 | + local_git = git.Repo(local_git_directory) |
| 110 | + else: |
| 111 | + logger.info(f'cloning repo to temp directory: {local_git_directory}') |
| 112 | + local_git = git.Repo.clone_from(uri.geturl(), local_git_directory) |
| 113 | + |
| 114 | + image_id, cwl_tools = _repo2cwl(local_git) |
| 115 | + logger.info(f'Generated image id: {image_id}') |
| 116 | + for tool in cwl_tools: |
| 117 | + base_command_script_name = f'{tool["baseCommand"][len("/app/cwl/bin/"):].replace("/", "_")}.cwl' |
| 118 | + tool_filename = str(output_directory.joinpath(base_command_script_name)) |
| 119 | + with open(tool_filename, 'w') as f: |
| 120 | + logger.info(f'Creating CWL command line tool: {tool_filename}') |
| 121 | + yaml.safe_dump(tool, f) |
| 122 | + |
| 123 | + logger.info(f'Cleaning local temporary directory {local_git_directory}...') |
| 124 | + shutil.rmtree(local_git_directory) |
| 125 | + |
| 126 | + |
| 127 | +def _repo2cwl(git_directory_path: Repo) -> Tuple[str, List[Dict]]: |
| 128 | + """ |
| 129 | + Takes a Repo mounted to a local directory. That function will create new files and it will commit the changes. |
| 130 | + Do not use that function for Repositories you do not want to change them. |
| 131 | + :param git_directory_path: |
| 132 | + :return: The generated build image id & the cwl description |
| 133 | + """ |
| 134 | + r2d = Repo2Docker() |
| 135 | + r2d.target_repo_dir = os.path.join(os.path.sep, 'app') |
| 136 | + r2d.repo = git_directory_path.tree().abspath |
| 137 | + bin_path = os.path.join(r2d.repo, 'cwl', 'bin') |
| 138 | + os.makedirs(bin_path, exist_ok=True) |
| 139 | + notebooks_paths = _get_notebook_paths_from_dir(r2d.repo) |
| 140 | + |
| 141 | + tools = [] |
| 142 | + for notebook in notebooks_paths: |
| 143 | + cwl_command_line_tool, script_name = _store_jn_as_script( |
| 144 | + notebook, |
| 145 | + git_directory_path.tree().abspath, |
| 146 | + bin_path, |
| 147 | + r2d.output_image_spec |
| 148 | + ) |
| 149 | + if cwl_command_line_tool is None: |
| 150 | + continue |
| 151 | + cwl_command_line_tool['baseCommand'] = os.path.join('/app', 'cwl', 'bin', script_name) |
| 152 | + tools.append(cwl_command_line_tool) |
| 153 | + git_directory_path.index.commit("auto-commit") |
| 154 | + |
| 155 | + r2d.build() |
| 156 | + # fix dockerImageId |
| 157 | + for cwl_command_line_tool in tools: |
| 158 | + cwl_command_line_tool['hints']['DockerRequirement']['dockerImageId'] = r2d.output_image_spec |
| 159 | + return r2d.output_image_spec, tools |
| 160 | + |
| 161 | + |
| 162 | +if __name__ == '__main__': |
| 163 | + repo2cwl() |
0 commit comments