Skip to content

Commit 5a6ce33

Browse files
Merge pull request #2 from giannisdoukas/dev
Dev
2 parents ed17280 + 716cc4d commit 5a6ce33

15 files changed

+629
-13
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,3 +243,5 @@ cython_debug/
243243
/external_examples/
244244
/tests/jn/output/
245245
tmp.py
246+
/html/
247+
cwlbuild

.readthedocs.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
python:
2+
setup_py_install: true

.travis.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
services:
2+
- docker
13
language: python
24
python:
35
- "3.6"
@@ -10,6 +12,7 @@ before_install:
1012
install:
1113
- pip install -r test-requirements.txt
1214
- python setup.py install
15+
- pip freeze
1316
script:
1417
- pycodestyle --max-line-length=119 $(find ipython2cwl -name '*.py')
1518
- coverage run --source ipython2cwl -m unittest discover tests
@@ -27,4 +30,5 @@ matrix:
2730
- virtualenv -p python3 venv
2831
- source venv/bin/activate
2932
- pip3 install -U -r test-requirements.txt
30-
script: coverage run --source ipython2cwl -m unittest discover tests
33+
script: coverage run --source ipython2cwl -m unittest discover tests
34+
env: TRAVIS_IGNORE_DOCKER=true

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22

33
[![Build Status](https://travis-ci.com/giannisdoukas/ipython2cwl.svg)](https://travis-ci.com/giannisdoukas/ipython2cwl)
44
[![Coverage Status](https://coveralls.io/repos/github/giannisdoukas/ipython2cwl/badge.svg?branch=dev)](https://coveralls.io/github/giannisdoukas/ipython2cwl?branch=dev)
5+
[![Documentation Status](https://readthedocs.org/projects/ipython2cwl/badge/?version=latest)](https://ipython2cwl.readthedocs.io/en/latest/?badge=latest)
56

docs/conf.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Configuration file for the Sphinx documentation builder.
2+
#
3+
# This file only contains a selection of the most common options. For a full
4+
# list see the documentation:
5+
# http://www.sphinx-doc.org/en/master/config
6+
7+
# -- Path setup --------------------------------------------------------------
8+
9+
# If extensions (or modules to document with autodoc) are in another directory,
10+
# add these directories to sys.path here. If the directory is relative to the
11+
# documentation root, use os.path.abspath to make it absolute, like shown here.
12+
#
13+
# import os
14+
# import sys
15+
# sys.path.insert(0, os.path.abspath('.'))
16+
17+
# -- Project information -----------------------------------------------------
18+
19+
project = 'ipython2cwl'
20+
copyright = '2020, Yannis Doukas'
21+
author = 'Yannis Doukas'
22+
23+
# The full version, including alpha/beta/rc tags
24+
release = "0.1"
25+
26+
# -- General configuration ---------------------------------------------------
27+
28+
# Add any Sphinx extension module names here, as strings. They can be
29+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30+
# ones.
31+
extensions = ['sphinx.ext.autodoc']
32+
33+
# Add any paths that contain templates here, relative to this directory.
34+
templates_path = ['_templates']
35+
36+
# List of patterns, relative to source directory, that match files and
37+
# directories to ignore when looking for source files.
38+
# This pattern also affects html_static_path and html_extra_path.
39+
exclude_patterns = []
40+
41+
# -- Options for HTML output -------------------------------------------------
42+
43+
# The theme to use for HTML and HTML Help pages. See the documentation for
44+
# a list of builtin themes.
45+
#
46+
html_theme = 'alabaster'
47+
48+
# Add any paths that contain custom static files (such as style sheets) here,
49+
# relative to this directory. They are copied after the builtin static files,
50+
# so a file named "default.css" will overwrite the builtin "default.css".
51+
html_static_path = ['_static']
52+
53+
master_doc = 'index'

docs/index.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
IPython2CWL: Convert Jupyter Notebook to CWL
2+
================================================================================
3+
4+
.. image:: https://travis-ci.com/giannisdoukas/ipython2cwl.svg?branch=master
5+
:target: https://travis-ci.com/giannisdoukas/ipython2cwl
6+
.. image:: https://coveralls.io/repos/github/giannisdoukas/ipython2cwl/badge.svg?branch=master
7+
:target: https://coveralls.io/github/giannisdoukas/ipython2cwl?branch=master
8+
9+
10+
------------------------------------------------------------------------------------------
11+
12+
IPython2CWL is a tool for converting `IPython <https://ipython.org/>`_ Jupyter Notebooks to
13+
`CWL <https://www.commonwl.org/>`_ Command Line Tools by simply providing typing annotation.
14+
15+
.. code-block:: python
16+
17+
from ipython2cwl.iotypes import CWLFilePathInput, CWLFilePathOutput
18+
import csv
19+
input_filename: 'CWLFilePathInput' = 'data.csv'
20+
with open(input_filename) as f:
21+
csv_reader = csv.reader(f)
22+
data = [line for line in csv_reader]
23+
number_of_lines = len(data)
24+
result_file: 'CWLFilePathOutput' = 'number_of_lines.txt'
25+
with open(result_file, 'w') as f:
26+
f.write(str(number_of_lines))
27+
28+
29+
------------------------------------------------------------------------------------------
30+
31+
IPython2CWL is based on `repo2docker <https://github.com/jupyter/repo2docker>`_, the same tool
32+
used by `mybinder <https://mybinder.org/>`_. Now, by writing Jupyter Notebook and publish them, including repo2docker
33+
configuration, the community can not only execute the notebooks remotely but also to use them as steps in scientific
34+
workflows.
35+
36+
* Install ipython2cwl
37+
* Ensure that you have docker running
38+
* Create a directory to store the generated cwl files, for example cwlbuild
39+
* Execute :code:`jupyter repo2cwl https://github.com/giannisdoukas/cwl-annotated-jupyter-notebook.git -o cwlbuild`
40+
41+
Indices and tables
42+
==================
43+
44+
* :ref:`genindex`
45+
* :ref:`modindex`
46+
* :ref:`search`

ipython2cwl/cwltool.py renamed to ipython2cwl/cwltoolextractor.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
from typing import Dict, Any
1010

1111
import astor
12+
import nbconvert
1213
import yaml
14+
from nbformat.notebooknode import NotebookNode
1315

1416
from .iotypes import CWLFilePathInput, CWLBooleanInput, CWLIntInput, CWLStringInput, CWLFilePathOutput
1517
from .requirements_manager import RequirementsManager
@@ -51,7 +53,8 @@ def __init__(self, *args, **kwargs):
5153

5254
def visit_AnnAssign(self, node):
5355
try:
54-
if isinstance(node.annotation, ast.Name) and node.annotation.id in self.input_type_mapper:
56+
if (isinstance(node.annotation, ast.Name) and node.annotation.id in self.input_type_mapper) or \
57+
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.input_type_mapper):
5558
mapper = self.input_type_mapper[node.annotation.id]
5659
self.extracted_nodes.append(
5760
(node, mapper[0], mapper[1], True, True, False)
@@ -72,7 +75,8 @@ def visit_AnnAssign(self, node):
7275
(node, mapper[0] + '[]', mapper[1], True, True, False)
7376
)
7477
return None
75-
elif isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper:
78+
elif (isinstance(node.annotation, ast.Name) and node.annotation.id in self.output_type_mapper) or \
79+
(isinstance(node.annotation, ast.Str) and node.annotation.s in self.output_type_mapper):
7680
self.extracted_nodes.append(
7781
(node, None, None, None, False, True)
7882
)
@@ -121,6 +125,9 @@ class AnnotatedIPython2CWLToolConverter:
121125
"""The annotated python code to convert."""
122126

123127
def __init__(self, annotated_ipython_code: str):
128+
"""Creates an AnnotatedIPython2CWLToolConverter. If the annotated_ipython_code contains magic commands use the
129+
from_jupyter_notebook_node method"""
130+
124131
self._code = annotated_ipython_code
125132
extractor = AnnotatedVariablesExtractor()
126133
self._tree = ast.fix_missing_locations(extractor.visit(ast.parse(self._code)))
@@ -137,6 +144,12 @@ def __init__(self, annotated_ipython_code: str):
137144
node.value.s)
138145
)
139146

147+
@classmethod
148+
def from_jupyter_notebook_node(cls, node: NotebookNode) -> 'AnnotatedIPython2CWLToolConverter':
149+
python_exporter = nbconvert.PythonExporter()
150+
code = python_exporter.from_notebook_node(node)[0]
151+
return cls(code)
152+
140153
@classmethod
141154
def _wrap_script_to_method(cls, tree, variables) -> str:
142155
main_template_code = os.linesep.join([

ipython2cwl/ipython2cwl.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
11
import argparse
2+
import json
3+
from io import StringIO
24
from pathlib import Path
35
from typing import List, Optional
46

7+
import nbconvert
58
import nbformat
6-
from .cwltool import AnnotatedIPython2CWLToolConverter
9+
10+
from .cwltoolextractor import AnnotatedIPython2CWLToolConverter
11+
12+
13+
def jn2code(notebook):
14+
exporter = nbconvert.PythonExporter()
15+
script = exporter.from_file(StringIO(json.dumps(notebook)))
16+
return script
717

818

919
def main(argv: Optional[List[str]] = None):
@@ -17,10 +27,7 @@ def main(argv: Optional[List[str]] = None):
1727
notebook = nbformat.read(args.jn[0], as_version=4)
1828
output: Path = args.output
1929
args.jn[0].close()
20-
script_code = '\n'.join(
21-
[f"\n\n# --------- cell - {i} ---------\n\n{cell.source}" for i, cell in
22-
enumerate(filter(lambda c: c.cell_type == 'code', notebook.cells), start=1)]
23-
)
30+
script_code = jn2code(notebook)
2431

2532
converter = AnnotatedIPython2CWLToolConverter(script_code)
2633
converter.compile(output)

ipython2cwl/repo2cwl.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
import argparse
2+
import logging
3+
import os
4+
import shutil
5+
import stat
6+
import sys
7+
import tempfile
8+
from pathlib import Path
9+
from typing import List, Optional, Tuple, Dict
10+
from urllib.parse import urlparse, ParseResult
11+
12+
import git
13+
import nbformat
14+
import yaml
15+
from git import Repo
16+
from repo2docker import Repo2Docker
17+
18+
from .cwltoolextractor import AnnotatedIPython2CWLToolConverter
19+
20+
logger = logging.getLogger('repo2cwl')
21+
22+
23+
def _get_notebook_paths_from_dir(dir_path: str):
24+
notebooks_paths = []
25+
for path, subdirs, files in os.walk(dir_path):
26+
for name in files:
27+
if name.endswith('.ipynb'):
28+
notebooks_paths.append(os.path.join(path, name))
29+
return notebooks_paths
30+
31+
32+
def _store_jn_as_script(notebook_path: str, git_directory_absolute_path: str, bin_absolute_path: str, image_id: str) \
33+
-> Tuple[Optional[Dict], Optional[str]]:
34+
with open(notebook_path) as fd:
35+
notebook = nbformat.read(fd, as_version=4)
36+
37+
converter = AnnotatedIPython2CWLToolConverter.from_jupyter_notebook_node(notebook)
38+
39+
if len(converter._variables) == 0:
40+
logger.info(f"Notebook {notebook_path} does not contains typing annotations. skipping...")
41+
return None, None
42+
script_relative_path = os.path.relpath(notebook_path, git_directory_absolute_path)[:-6]
43+
script_relative_parent_directories = script_relative_path.split(os.sep)
44+
if len(script_relative_parent_directories) > 1:
45+
script_absolute_name = os.path.join(bin_absolute_path, os.sep.join(script_relative_parent_directories[:-1]))
46+
os.makedirs(
47+
script_absolute_name,
48+
exist_ok=True)
49+
script_absolute_name = os.path.join(script_absolute_name, os.path.basename(script_relative_path))
50+
else:
51+
script_absolute_name = os.path.join(bin_absolute_path, script_relative_path)
52+
script = os.linesep.join([
53+
'#!/usr/bin/env ipython',
54+
'"""',
55+
'DO NOT EDIT THIS FILE',
56+
'THIS FILE IS AUTO-GENERATED BY THE ipython2cwl.',
57+
'FOR MORE INFORMATION CHECK https://github.com/giannisdoukas/ipython2cwl',
58+
'"""',
59+
converter._wrap_script_to_method(converter._tree, converter._variables)
60+
])
61+
with open(script_absolute_name, 'w') as fd:
62+
fd.write(script)
63+
tool = converter.cwl_command_line_tool(image_id)
64+
in_git_dir_script_file = os.path.join(bin_absolute_path, script_relative_path)
65+
tool_st = os.stat(in_git_dir_script_file)
66+
os.chmod(in_git_dir_script_file, tool_st.st_mode | stat.S_IEXEC)
67+
return tool, script_relative_path
68+
69+
70+
def existing_path(path: str):
71+
path = Path(path)
72+
if not path.is_dir():
73+
raise ValueError('Directory does not exists')
74+
return path
75+
76+
77+
def parser_arguments(argv: List[str]):
78+
parser = argparse.ArgumentParser()
79+
parser.add_argument('repo', type=lambda uri: urlparse(uri, scheme='file'), nargs=1)
80+
parser.add_argument('-o', '--output', help='Output directory to store the generated cwl files',
81+
type=existing_path,
82+
required=True)
83+
return parser.parse_args(argv[1:])
84+
85+
86+
def setup_logger():
87+
handler = logging.StreamHandler(sys.stdout)
88+
handler.setLevel(logging.INFO)
89+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
90+
handler.setFormatter(formatter)
91+
logger.addHandler(handler)
92+
93+
94+
def repo2cwl(argv: Optional[List[str]] = None):
95+
setup_logger()
96+
argv = sys.argv if argv is None else argv
97+
args = parser_arguments(argv)
98+
uri: ParseResult = args.repo[0]
99+
output_directory: Path = args.output
100+
supported_schemes = {'file', 'http', 'https', 'ssh'}
101+
if uri.scheme not in supported_schemes:
102+
raise ValueError(f'Supported schema uris: {supported_schemes}')
103+
local_git_directory = os.path.join(tempfile.mkdtemp(prefix='repo2cwl_'), 'repo')
104+
if uri.scheme == 'file':
105+
if not os.path.isdir(uri.path):
106+
raise ValueError(f'Directory does not exists')
107+
logger.info(f'copy repo to temp directory: {local_git_directory}')
108+
shutil.copytree(uri.path, local_git_directory)
109+
local_git = git.Repo(local_git_directory)
110+
else:
111+
logger.info(f'cloning repo to temp directory: {local_git_directory}')
112+
local_git = git.Repo.clone_from(uri.geturl(), local_git_directory)
113+
114+
image_id, cwl_tools = _repo2cwl(local_git)
115+
logger.info(f'Generated image id: {image_id}')
116+
for tool in cwl_tools:
117+
base_command_script_name = f'{tool["baseCommand"][len("/app/cwl/bin/"):].replace("/", "_")}.cwl'
118+
tool_filename = str(output_directory.joinpath(base_command_script_name))
119+
with open(tool_filename, 'w') as f:
120+
logger.info(f'Creating CWL command line tool: {tool_filename}')
121+
yaml.safe_dump(tool, f)
122+
123+
logger.info(f'Cleaning local temporary directory {local_git_directory}...')
124+
shutil.rmtree(local_git_directory)
125+
126+
127+
def _repo2cwl(git_directory_path: Repo) -> Tuple[str, List[Dict]]:
128+
"""
129+
Takes a Repo mounted to a local directory. That function will create new files and it will commit the changes.
130+
Do not use that function for Repositories you do not want to change them.
131+
:param git_directory_path:
132+
:return: The generated build image id & the cwl description
133+
"""
134+
r2d = Repo2Docker()
135+
r2d.target_repo_dir = os.path.join(os.path.sep, 'app')
136+
r2d.repo = git_directory_path.tree().abspath
137+
bin_path = os.path.join(r2d.repo, 'cwl', 'bin')
138+
os.makedirs(bin_path, exist_ok=True)
139+
notebooks_paths = _get_notebook_paths_from_dir(r2d.repo)
140+
141+
tools = []
142+
for notebook in notebooks_paths:
143+
cwl_command_line_tool, script_name = _store_jn_as_script(
144+
notebook,
145+
git_directory_path.tree().abspath,
146+
bin_path,
147+
r2d.output_image_spec
148+
)
149+
if cwl_command_line_tool is None:
150+
continue
151+
cwl_command_line_tool['baseCommand'] = os.path.join('/app', 'cwl', 'bin', script_name)
152+
tools.append(cwl_command_line_tool)
153+
git_directory_path.index.commit("auto-commit")
154+
155+
r2d.build()
156+
# fix dockerImageId
157+
for cwl_command_line_tool in tools:
158+
cwl_command_line_tool['hints']['DockerRequirement']['dockerImageId'] = r2d.output_image_spec
159+
return r2d.output_image_spec, tools
160+
161+
162+
if __name__ == '__main__':
163+
repo2cwl()

0 commit comments

Comments
 (0)