diff --git a/README.md b/README.md index f37d51f9..07e37c1d 100644 --- a/README.md +++ b/README.md @@ -315,6 +315,27 @@ rocrate add test-instance test1 http://example.com -r jobs -i test1_1 rocrate add test-definition test1 test/test1/sort-and-change-case-test.yml -e planemo -v '>=0.70' ``` +Exporting a galaxy workflo-run export to an ro-crate. + +```python +from rocrate import rocrate_api + +wf_path = base_path + "example-history-export3.ga" +dataset_path = base_path + "example-history-export3/datasets/" +wfr_metadata_path = base_path + "example-history-export3" +files_list = os.listdir(dataset_path) +files_list = [dataset_path + f for f in files_list] + +# Create base package +wf_crate = rocrate_api.make_workflow_run_rocrate(workflow_path=wf_path, + wfr_metadata_path=wfr_metadata_path, author=None, orcid=None, + wf_type="Galaxy",include_files=files_list, prov_name="test_prov") + +# write crate to disk +out_path = base_path + "example-history-export3-crate-new" +wf_crate.write(out_path) +``` + ## License diff --git a/notebooks/ROcrate-example.ipynb b/notebooks/ROcrate-example.ipynb new file mode 100644 index 00000000..07b07352 --- /dev/null +++ b/notebooks/ROcrate-example.ipynb @@ -0,0 +1,160 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('/home/padge/Elixir/workflow-export/ro-crate-py/')\n", + "import os\n", + "from rocrate.rocrate import ROCrate\n", + "base_path = '/home/padge/Elixir/workflow-export/'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/padge/Elixir/workflow-export/ro-crate-py/notebooks'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# adding a Dataset\n", + "crate = ROCrate()\n", + "os.makedirs(\"tmp3\", exist_ok=True)\n", + "open('tmp3/empty_file', 'w').close()\n", + "dataset_entity = crate.add_directory(source=\"tmpd3\", dest_path=\"new_tmp\")\n", + "crate.write(\"./new_crate4\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.core.debugger import set_trace;set_trace()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SRR10903401\n", + "FooBar\n" + ] + } + ], + "source": [ + "shutil.rmtree(\"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate\")\n", + "shutil.copytree(\"/home/padge/Downloads/Elixir/ro-crate-py/test/test-data/read_crate\", \"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate\")\n", + "crate = ROCrate(\"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate\")\n", + "readme = crate.dereference(\"test_file_galaxy.txt\")\n", + "with open(readme.source, \"rt\") as f:\n", + " print(f.readline().strip())\n", + "new_source = \"/home/padge/Downloads/Elixir/ro-crate-py/tmp/foobar.txt\"\n", + "with open(new_source, \"wt\") as f:\n", + " f.write(\"FooBar\\n\")\n", + "crate.delete(readme)\n", + "crate.add_file(new_source, \"test_file_galaxy.txt\")\n", + "crate.write(\"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate\")\n", + "with open(readme.source, \"rt\") as f:\n", + " print(f.readline().strip())\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import pdb\n", + "from IPython.core.debugger import set_trace\n", + "set_trace()\n", + "\n", + "# adding a Dataset\n", + "crate = ROCrate(\"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate/\")\n", + "sample_dir = '/home/padge/Downloads/Elixir/ro-crate-py/tmp/examples2/'\n", + "dataset_entity = crate.add_directory(sample_dir, \"new_dir\")\n", + "crate.write(\"/home/padge/Downloads/Elixir/ro-crate-py/tmp/crate2\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from rocrate import rocrate_api\n", + "\n", + "wf_path = base_path + \"example-history-export3.ga\"\n", + "dataset_path = base_path + \"example-history-export3/datasets/\"\n", + "files_list = os.listdir(dataset_path)\n", + "files_list = [dataset_path + f for f in files_list]\n", + "\n", + "# Create base package\n", + "wf_crate = rocrate_api.make_workflow_rocrate(workflow_path=wf_path,wf_type=\"Galaxy\",include_files=files_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# write crate to disk\n", + "out_path = base_path + \"example-history-export3-crate\"\n", + "wf_crate.write(out_path)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "bcc799c17010608b26cede4750872a7cf236af662c7ba3d883e539f7db8da28c" + }, + "kernelspec": { + "display_name": "Python 3.9.5 64-bit ('base': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt index 69b84474..a161f22b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,6 @@ galaxy2cwl jinja2 python-dateutil click +prov +typing-extensions +pydot \ No newline at end of file diff --git a/rocrate/provenance_constants.py b/rocrate/provenance_constants.py new file mode 100644 index 00000000..0e4956c4 --- /dev/null +++ b/rocrate/provenance_constants.py @@ -0,0 +1,55 @@ +import hashlib +import os +import uuid + +from prov.identifier import Namespace + +__citation__ = "https://doi.org/10.5281/zenodo.1208477" + +# NOTE: Semantic versioning of the CWLProv Research Object +# **and** the cwlprov files +# +# Rough guide (major.minor.patch): +# 1. Bump major number if removing/"breaking" resources or PROV statements +# 2. Bump minor number if adding resources or PROV statements +# 3. Bump patch number for non-breaking non-adding changes, +# e.g. fixing broken relative paths +CWLPROV_VERSION = "https://w3id.org/cwl/prov/0.6.0" + +# Research Object folders +METADATA = "metadata" +DATA = "data" +WORKFLOW = "workflow" +SNAPSHOT = "snapshot" +# sub-folders +MAIN = os.path.join(WORKFLOW, "main") +PROVENANCE = os.path.join(METADATA, "provenance") +LOGS = os.path.join(METADATA, "logs") +WFDESC = Namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#") +WFPROV = Namespace("wfprov", "http://purl.org/wf4ever/wfprov#") +WF4EVER = Namespace("wf4ever", "http://purl.org/wf4ever/wf4ever#") +RO = Namespace("ro", "http://purl.org/wf4ever/ro#") +ORE = Namespace("ore", "http://www.openarchives.org/ore/terms/") +FOAF = Namespace("foaf", "http://xmlns.com/foaf/0.1/") +SCHEMA = Namespace("schema", "http://schema.org/") +CWLPROV = Namespace("cwlprov", "https://w3id.org/cwl/prov#") +ORCID = Namespace("orcid", "https://orcid.org/") +UUID = Namespace("id", "urn:uuid:") + +# BagIt and YAML always use UTF-8 +ENCODING = "UTF-8" +TEXT_PLAIN = 'text/plain; charset="%s"' % ENCODING + +# sha1, compatible with the File type's "checksum" field +# e.g. "checksum" = "sha1$47a013e660d408619d894b20806b1d5086aab03b" +# See ./cwltool/schemas/v1.0/Process.yml +Hasher = hashlib.sha1 +SHA1 = "sha1" +SHA256 = "sha256" +SHA512 = "sha512" + +# TODO: Better identifiers for user, at least +# these should be preserved in ~/.config/cwl for every execution +# on this host +USER_UUID = uuid.uuid4().urn +ACCOUNT_UUID = uuid.uuid4().urn diff --git a/rocrate/provenance_profile.py b/rocrate/provenance_profile.py new file mode 100644 index 00000000..378448aa --- /dev/null +++ b/rocrate/provenance_profile.py @@ -0,0 +1,582 @@ +import datetime + +# from tokenize import String +from io import StringIO +import urllib +import uuid +from io import BytesIO +from hashlib import md5 +from pathlib import PurePosixPath +from typing import ( + Any, + Dict, + List, + MutableSequence, + Optional, + Tuple, + Union, + cast, +) +from xmlrpc.client import Boolean + +from prov.identifier import Identifier +from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity + +# import graphviz +from prov.dot import prov_to_dot +from tools.load_ga_export import load_ga_history_export, GalaxyJob, GalaxyDataset +from ast import literal_eval +import os + +# from .errors import WorkflowException +# from .job import CommandLineJob, JobBase +# from .loghandler import #_logger +# from .process import Process, shortname +from rocrate.provenance_constants import ( + ACCOUNT_UUID, + CWLPROV, + METADATA, + ORE, + PROVENANCE, + ENCODING, + # TEXT_PLAIN, + RO, + SCHEMA, + # SHA1, + UUID, + WF4EVER, + WFDESC, + WFPROV, +) + +# from .stdfsaccess import StdFsAccess +# from rocrate.utils_cwl import CWLObjectType, JobsType, get_listing, posix_path, versionstring +# from .workflow_job import WorkflowJob + +# if TYPE_CHECKING: +# from rocrate.provenance import ResearchObject + +from pathlib import Path + + +def posix_path(local_path: str) -> str: + return str(PurePosixPath(Path(local_path))) + + +def remove_escapes(s): + escapes = "".join([chr(char) for char in range(1, 32)]) + translator = str.maketrans("", "", escapes) + s.translate(translator) + + +def reassign(d): + for k, v in d.items(): + try: + evald = literal_eval(v) + if isinstance(evald, dict): + d[k] = evald + except ValueError: + pass + + +class ProvenanceProfile: + """\ + Provenance profile. + + Populated from a galaxy workflow export. + """ + + def __init__( + self, + ga_export_dir: Path, + full_name: str = None, + orcid: str = None, + run_uuid: Optional[uuid.UUID] = None, + ) -> None: + """ + Initialize the provenance profile. + Keyword arguments: + ga_export -- the galaxy metadata export (Dict) + outpath -- + full_name -- author name (optional) + orcid -- orcid (optional) + prov_name -- provenance file name + run_uuid -- uuid for the workflow run + """ + self.orcid = orcid + self.ga_export_dir = ga_export_dir + self.ro_uuid = uuid.uuid4() + # TODO: should be connected to a ro_crate? + self.base_uri = "arcp://uuid,%s/" % self.ro_uuid + self.document = ProvDocument() + # TODO extract engine_uuid from galaxy, type: str + self.engine_uuid = "urn:uuid:%s" % uuid.uuid4() # type: str + self.full_name = full_name + # import galaxy history metadata + metadata_export = load_ga_history_export(ga_export_dir) + + self.declared_strings_s = {} + + self.datasets = [] + for i, dataset in enumerate(metadata_export["datasets_attrs"]): + datasets_attrs = GalaxyDataset() + datasets_attrs.parse_ga_dataset_attrs(dataset) + self.datasets.append(datasets_attrs.attributes) + + self.workflow_invocation_uuid = set() + self.jobs = {} + for i, job in enumerate(metadata_export["jobs_attrs"]): + job_attrs = GalaxyJob() + job_attrs.parse_ga_jobs_attrs(job) + self.jobs[job_attrs.attributes["encoded_id"]] = job_attrs.attributes + try: + self.workflow_invocation_uuid.add( + job_attrs.attributes["parameters"]["__workflow_invocation_uuid__"] + ) + except KeyError: + pass + + if self.workflow_invocation_uuid: + self.workflow_run_uuid = uuid.UUID( + next(iter(self.workflow_invocation_uuid)) + ) + self.workflow_run_uri = self.workflow_run_uuid.urn # type: str + else: + self.workflow_run_uuid = run_uuid or uuid.uuid4() + self.workflow_run_uri = self.workflow_run_uuid.urn # type: str + + self.generate_prov_doc() + for v in self.jobs.values(): + self.declare_process(v) + + def __str__(self) -> str: + """Represent this Provenvance profile as a string.""" + return "ProvenanceProfile <{}>".format( + self.workflow_run_uri, + # self.research_object, #? + ) + + def generate_prov_doc(self) -> Tuple[str, ProvDocument]: + """Add basic namespaces.""" + # TODO: + # can we identify a host where the workflow was executed? + # should OnlineAccount be used to describe a galaxy user? + # PROV_TYPE: FOAF["OnlineAccount"], + # TODO: change how we register galaxy version, probably a declare_version func + # self.galaxy_version = self.ga_export["jobs_attrs"][0]["galaxy_version"] + # TODO: change notation to already imported namespaces? + self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#") + # document.add_namespace('prov', 'http://www.w3.org/ns/prov#') + self.document.add_namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#") + # TODO: Make this ontology. For now only has cwlprov:image + self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#") + self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/") + self.document.add_namespace("schema", "http://schema.org/") + self.document.add_namespace("orcid", "https://orcid.org/") + self.document.add_namespace("id", "urn:uuid:") + # NOTE: Internet draft expired 2004-03-04 (!) + # https://tools.ietf.org/html/draft-thiemann-hash-urn-01 + # TODO: Change to nih:sha-256; hashes + # https://tools.ietf.org/html/rfc6920#section-7 + self.document.add_namespace("data", "urn:hash::sha1:") + + self.provenance_ns = self.document.add_namespace( + "provenance", self.base_uri + posix_path(PROVENANCE) + "/" + ) + # TODO: use appropriate refs for ga_export and related inputs + ro_identifier_workflow = self.base_uri + "ga_export" + "/" + self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow) + ro_identifier_input = self.base_uri + "ga_export/datasets#" + self.document.add_namespace("input", ro_identifier_input) + + # More info about the account (e.g. username, fullname) + # TODO: extract this info from galaxy somehow, probably only a username + account = self.document.agent(ACCOUNT_UUID) + if self.orcid or self.full_name: + person = {PROV_TYPE: PROV["Person"], "prov:type": SCHEMA["Person"]} + if self.full_name: + person["prov:label"] = self.full_name + person["foaf:name"] = self.full_name + person["schema:name"] = self.full_name + else: + # TODO: Look up name from ORCID API? + pass + + agent = self.document.agent(self.orcid or uuid.uuid4().urn, person) + self.document.actedOnBehalfOf(account, agent) + + # The engine that executed the workflow + wfengine = self.document.agent( + self.engine_uuid, + { + PROV_TYPE: PROV["SoftwareAgent"], + "prov:type": WFPROV["WorkflowEngine"], + # TODO: get galaxy version + "prov:label": "galaxy_version_placeholder", + }, + ) + self.document.wasStartedBy(wfengine, None, account, datetime.datetime.now()) + # define workflow run level activity + self.document.activity( + self.workflow_run_uri, + datetime.datetime.now(), + None, + { + PROV_TYPE: WFPROV["WorkflowRun"], + "prov:label": "Run of galaxy workflow", + }, + ) + # association between SoftwareAgent and WorkflowRun + main_workflow = "wf:main" + self.document.wasAssociatedWith( + self.workflow_run_uri, self.engine_uuid, main_workflow + ) + self.document.wasStartedBy( + self.workflow_run_uri, None, self.engine_uuid, datetime.datetime.now() + ) + return (self.workflow_run_uri, self.document) + + def declare_process( + self, + # process_name: str, + ga_export_jobs_attrs: dict, + # when: datetime.datetime, + process_run_id: Optional[str] = None, + ) -> str: + """Record the start of each Process.""" + if process_run_id is None: + process_run_id = uuid.uuid4().urn + + process_name = ga_export_jobs_attrs["tool_id"] + # tool_version = ga_export_jobs_attrs["tool_version"] + # TODO: insert workflow id + prov_label = "Run of " + process_name + start_time = ga_export_jobs_attrs["create_time"] + end_time = ga_export_jobs_attrs["update_time"] + + # TODO: Find out how to include commandline as a string + # cmd = ga_export_jobs_attrs["command_line"] + # cmd = self.document.entity( + # uuid.uuid4().urn, + # {PROV_TYPE: WFPROV["Artifact"], PROV_LABEL: ga_export_jobs_attrs["command_line"]} + # ) + + self.document.activity( + process_run_id, + start_time, + end_time, + { + PROV_TYPE: WFPROV["ProcessRun"], + PROV_LABEL: prov_label, + # PROV_LABEL: cmd + }, + ) + self.document.wasAssociatedWith( + process_run_id, self.engine_uuid, str("wf:main/" + process_name) + ) + self.document.wasStartedBy( + process_run_id, None, self.workflow_run_uri, start_time, None, None + ) + self.used_artefacts(process_run_id, ga_export_jobs_attrs) + return process_run_id + + def used_artefacts( + self, + process_run_id: str, + process_metadata: dict, + process_name: Optional[str] = None, + ) -> None: + """Add used() for each data artefact.""" + # FIXME: Use workflow name if available, + # "main" is wrong for nested workflows + base = "main" + if process_name is not None: + base += "/" + process_name + tool_id = process_metadata["tool_id"] + base += "/" + tool_id + items = ["inputs", "outputs", "parameters"] + for item in items: + for key, value in process_metadata[item].items(): + if not value: + pass + # if "json" in key: + # value = json.loads(value) + if isinstance(key, str): + key = key.replace("|", "_") + if isinstance(value, str): + value = value.replace("|", "_") + + prov_role = self.wf_ns[f"{base}/{key}"] + + # if not value or len(value) == 0: + if item in ("inputs", "outputs"): + for v in value: + for d in self.datasets: + if v in ( + [d["encoded_id"]] + + d["copied_from_history_dataset_association_id_chain"] + ): + self.declare_entity(process_run_id, d, prov_role) + # else: + # self.declare_entity(process_run_id, value, prov_role) + + def declare_entity(self, process_run_id, value, prov_role) -> None: + try: + entity = self.declare_artefact(value) + self.document.used( + process_run_id, + entity, + datetime.datetime.now(), + None, + {"prov:role": prov_role}, + ) + except OSError: + pass + + def declare_artefact(self, value: Any) -> ProvEntity: + """Create data artefact entities for all file objects.""" + if value is None: + # FIXME: If this can happen we'll need a better way to + # represent this in PROV + return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"}) + + if isinstance(value, (bool, int, float)): + # Typically used in job documents for flags + + # FIXME: Make consistent hash URIs for these + # that somehow include the type + # (so "1" != 1 != "1.0" != true) + entity = self.document.entity(uuid.uuid4().urn, {PROV_VALUE: value}) + # self.research_object.add_uri(entity.identifier.uri) + return entity + + if isinstance(value, str): + # clean up unwanted characters + # value = value.replace("|", "_") + (entity, _) = self.declare_string(value) + return entity + + if isinstance(value, bytes): + # If we got here then we must be in Python 3 + # byte_s = BytesIO(value) + # data_file = self.research_object.add_data_file(byte_s) + # FIXME: Don't naively assume add_data_file uses hash in filename! + data_id = "data:%s" % str(value) # PurePosixPath(data_file).stem + return self.document.entity( + data_id, + {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)}, + ) + + if isinstance(value, Dict): + if "@id" in value: + # Already processed this value, + # but it might not be in this PROV + entities = self.document.get_record(value["@id"]) + if entities: + return entities[0] + # else, unknown in PROV, re-add below as if it's fresh + + # Base case - we found a File we need to update + if value.get("class") == "File": + entity = self.declare_file(value) + value["@id"] = entity.identifier.uri + return entity + + if value.get("class") == "Directory": + entity = self.declare_directory(value) + value["@id"] = entity.identifier.uri + return entity + coll_id = value.setdefault("@id", uuid.uuid4().urn) + # some other kind of dictionary? + # TODO: also Save as JSON + coll = self.document.entity( + coll_id, + [ + (PROV_TYPE, WFPROV["Artifact"]), + (PROV_TYPE, PROV["Collection"]), + (PROV_TYPE, PROV["Dictionary"]), + ], + ) + + if value.get("class"): + # _logger.warning("Unknown data class %s.", value["class"]) + # FIXME: The class might be "http://example.com/somethingelse" + coll.add_asserted_type(CWLPROV[value["class"]])q + + def declare_file(self, value: Dict) -> Tuple[ProvEntity, ProvEntity, str]: + if value["class"] != "File": + raise ValueError("Must have class:File: %s" % value) + + # Track filename and extension, this is generally useful only for + # secondaryFiles. Note that multiple uses of a file might thus record + # different names for the same entity, so we'll + # make/track a specialized entity by UUID + file_id = value.setdefault("@id", uuid.UUID(value["dataset_uuid"]).urn) + # A specialized entity that has just these names + file_entity = self.document.entity( + file_id, + [(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])], + ) # type: ProvEntity + + if "name" in value: + file_entity.add_attributes({CWLPROV["basename"]: value["name"]}) + # if "nameroot" in value: + # file_entity.add_attributes({CWLPROV["nameroot"]: value["nameroot"]}) + if "extension" in value: + file_entity.add_attributes({CWLPROV["nameext"]: value["extension"]}) + # self.document.specializationOf(file_entity, entity) + + return file_entity # , entity, checksum + + def declare_string(self, value: str) -> Tuple[ProvEntity, str]: + """Save as string in UTF-8.""" + value = str(value).replace("|", "_") + byte_s = BytesIO(str(value).encode(ENCODING)) + # data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN) + checksum = md5(byte_s.getbuffer()).hexdigest() + self.declared_strings_s[checksum] = byte_s + + # FIXME: Don't naively assume add_data_file uses hash in filename! + data_id = "data:%s" % checksum # PurePosixPath(data_file).stem + entity = self.document.entity( + data_id, {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)} + ) # type: ProvEntity + return entity, checksum # , data_file + + def generate_output_prov( + self, + final_output: Union[Dict, None], + process_run_id: Optional[str], + name: Optional[str], + ) -> None: + """Call wasGeneratedBy() for each output,copy the files into the RO.""" + if isinstance(final_output, MutableSequence): + for entry in final_output: + self.generate_output_prov(entry, process_run_id, name) + elif final_output is not None: + # Timestamp should be created at the earliest + timestamp = datetime.datetime.now() + + # For each output, find/register the corresponding + # entity (UUID) and document it as generated in + # a role corresponding to the output + for output, value in final_output.items(): + entity = self.declare_artefact(value) + if name is not None: + name = urllib.parse.quote(str(name), safe=":/,#") + # FIXME: Probably not "main" in nested workflows + role = self.wf_ns[f"main/{name}/{output}"] + else: + role = self.wf_ns["main/%s" % output] + + if not process_run_id: + process_run_id = self.workflow_run_uri + + self.document.wasGeneratedBy( + entity, process_run_id, timestamp, None, {"prov:role": role} + ) + + def finalize_prov_profile( + self, out_path: Path = None, serialize: Boolean = False, name=None + ): + # type: (Optional[str],Optional[bool],Optional[str]) -> Tuple[Dict,List[Identifier]] + """Transfer the provenance related files to the RO-crate""" + # NOTE: Relative posix path + if name is None: + # main workflow, fixed filenames + filename = "ga_export.cwlprov" + else: + # ASCII-friendly filename, + # avoiding % as we don't want %2520 in manifest.json + wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_") + # Note that the above could cause overlaps for similarly named + # workflows, but that's OK as we'll also include run uuid + # which also covers thhe case of this step being run in + # multiple places or iterations + filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov" + + # print(basename) + # serialized prov documents + serialized_prov_docs = {} + # list of prov identifiers of provenance files + prov_ids = [] + # https://www.w3.org/TR/prov-xml/ + serialized_prov_docs[filename + ".xml"] = StringIO( + self.document.serialize(format="xml", indent=4) + ) + prov_ids.append(self.provenance_ns[filename + ".xml"]) + # https://www.w3.org/TR/prov-n/ + serialized_prov_docs[filename + ".provn"] = StringIO( + self.document.serialize(format="provn", indent=2) + ) + prov_ids.append(self.provenance_ns[filename + ".provn"]) + # https://www.w3.org/Submission/prov-json/ + serialized_prov_docs[filename + ".json"] = StringIO( + self.document.serialize(format="json", indent=2) + ) + prov_ids.append(self.provenance_ns[filename + ".json"]) + + # "rdf" aka https://www.w3.org/TR/prov-o/ + # which can be serialized to ttl/nt/jsonld (and more!) + + # https://www.w3.org/TR/turtle/ + serialized_prov_docs[filename + ".ttl"] = StringIO( + self.document.serialize(format="rdf", rdf_format="turtle") + ) + prov_ids.append(self.provenance_ns[filename + ".ttl"]) + # https://www.w3.org/TR/n-triples/ + serialized_prov_docs[filename + ".nt"] = StringIO( + self.document.serialize(format="rdf", rdf_format="ntriples") + ) + prov_ids.append(self.provenance_ns[filename + ".nt"]) + # https://www.w3.org/TR/json-ld/ + # TODO: Use a nice JSON-LD context + # see also https://eprints.soton.ac.uk/395985/ + # 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld : + serialized_prov_docs[filename + ".jsonld"] = StringIO( + self.document.serialize(format="rdf", rdf_format="json-ld") + ) + prov_ids.append(self.provenance_ns[filename + ".jsonld"]) + + graph = prov_to_dot(self.document).to_string() + # graph_s = graph_dot + # print(type(graph)) + graph_s = StringIO() + graph_s.write(graph) + # dot.write_png(basename + '.png') + + if serialize: + if out_path is not None: + basename = str(PurePosixPath(out_path) / filename) + else: + basename = filename + + if not os.path.exists(out_path): + os.makedirs(out_path) + + with open(basename + ".xml", "w") as provenance_file: + self.document.serialize(provenance_file, format="xml", indent=4) + + with open(basename + ".provn", "w") as provenance_file: + self.document.serialize(provenance_file, format="provn", indent=2) + + with open(basename + ".json", "w") as provenance_file: + self.document.serialize(provenance_file, format="json", indent=2) + + with open(basename + ".ttl", "w") as provenance_file: + self.document.serialize( + provenance_file, format="rdf", rdf_format="turtle" + ) + + with open(basename + ".nt", "w") as provenance_file: + self.document.serialize( + provenance_file, format="rdf", rdf_format="ntriples" + ) + + with open(basename + ".jsonld", "w") as provenance_file: + self.document.serialize( + provenance_file, format="rdf", rdf_format="json-ld" + ) + + # _logger.debug("[provenance] added provenance: %s", prov_ids) + return (serialized_prov_docs, prov_ids, graph_s) diff --git a/rocrate/rocrate.py b/rocrate/rocrate.py index 264e8948..cd817804 100644 --- a/rocrate/rocrate.py +++ b/rocrate/rocrate.py @@ -46,6 +46,7 @@ from .utils import is_url, subclasses, get_norm_value, walk from .metadata import read_metadata, find_root_entity_id +from .provenance_profile import ProvenanceProfile def pick_type(json_entity, type_map, fallback=None): @@ -534,3 +535,60 @@ def make_workflow_rocrate(workflow_path, wf_type, include_files=[], for file_entry in include_files: wf_crate.add_file(file_entry) return wf_crate + + +# WIP +def make_workflow_run_rocrate( + workflow_path, + wf_type, + wfr_metadata_path, + author=None, + orcid=None, + include_files=[], + fetch_remote=False, + prov_name=None, + cwl=None, + diagram=None, +): + + wfr_crate = ROCrate() + workflow_path = Path(workflow_path) + print(workflow_path) + wf_file = wfr_crate.add_workflow( + workflow_path, + workflow_path.name, + fetch_remote=fetch_remote, + main=True, + lang=wf_type, + gen_cwl=(cwl is None), + ) + if "url" in wf_file.properties(): + wf_file["codeRepository"] = wf_file["url"] + + # add extra files + datasets = Path("datasets") + wfr_crate.add_dataset(datasets) + for file_entry in include_files: + wfr_crate.add_file(file_entry, datasets / file_entry.name) + + wfr_metadata_path = Path(wfr_metadata_path) + + prov = ProvenanceProfile(wfr_metadata_path, author, orcid) + + artifacts = Path("artifacts") + wfr_crate.add_dataset(artifacts) + for key, value in prov.declared_strings_s.items(): + dest = artifacts / key + wfr_crate.add_file(value, dest) + + prov_docs, _, graph = prov.finalize_prov_profile() + print(graph) + # add output files to ro-crate + provenance = Path("provenance") + wfr_crate.add_dataset(provenance) + for key, value in prov_docs.items(): + dest = provenance / key + wfr_crate.add_file(value, dest) + + wfr_crate.add_file(graph, provenance / "graph.dot") + return wfr_crate diff --git a/test/test-data/test_ga_history_export/history_export/collections_attrs.txt b/test/test-data/test_ga_history_export/history_export/collections_attrs.txt new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/collections_attrs.txt @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/datasets/Concatenate_datasets_on_data_2_and_data_1_3.txt b/test/test-data/test_ga_history_export/history_export/datasets/Concatenate_datasets_on_data_2_and_data_1_3.txt new file mode 100644 index 00000000..8b3f00b0 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets/Concatenate_datasets_on_data_2_and_data_1_3.txt @@ -0,0 +1,4 @@ +hello +universe! +hello +world diff --git a/test/test-data/test_ga_history_export/history_export/datasets/dataset1.txt_2.txt b/test/test-data/test_ga_history_export/history_export/datasets/dataset1.txt_2.txt new file mode 100644 index 00000000..94954abd --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets/dataset1.txt_2.txt @@ -0,0 +1,2 @@ +hello +world diff --git a/test/test-data/test_ga_history_export/history_export/datasets/dataset2.txt_1.txt b/test/test-data/test_ga_history_export/history_export/datasets/dataset2.txt_1.txt new file mode 100644 index 00000000..718e715c --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets/dataset2.txt_1.txt @@ -0,0 +1,2 @@ +hello +universe! diff --git a/test/test-data/test_ga_history_export/history_export/datasets/tutorial_output.txt_4.txt b/test/test-data/test_ga_history_export/history_export/datasets/tutorial_output.txt_4.txt new file mode 100644 index 00000000..f631dd54 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets/tutorial_output.txt_4.txt @@ -0,0 +1,3 @@ +hello +universe! +world diff --git a/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt b/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt new file mode 100644 index 00000000..2a1cf10b --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt @@ -0,0 +1,101 @@ +[{ + "annotation": null, + "blurb": "2 lines", + "copied_from_history_dataset_association_id_chain": ["2524d3d3b68ad39a"], + "create_time": "2021-06-23 10:46:27.112621", + "dataset_uuid": "18e78952-9447-4215-942c-542c35250d20", + "deleted": false, + "designation": null, + "encoded_id": "7ca3de8f49293d1a", + "extension": "txt", + "file_name": "datasets/dataset2.txt_1.txt", + "hid": 1, + "history_encoded_id": "a9eae7b6d5994701", + "info": "", + "metadata": { + "data_lines": 2, + "dbkey": "?" + }, + "model_class": "HistoryDatasetAssociation", + "name": "dataset2.txt", + "peek": "hello\nuniverse!\n", + "state": "ok", + "tags": [], + "update_time": "2021-06-23 10:46:28.363987", + "visible": true +}, { + "annotation": null, + "blurb": "2 lines", + "copied_from_history_dataset_association_id_chain": ["7c1d8e3101dbcdba"], + "create_time": "2021-06-23 10:46:27.185496", + "dataset_uuid": "6b0af26e-574c-475a-bb6d-1b4df9f137d8", + "deleted": false, + "designation": null, + "encoded_id": "2e789aae2475b7da", + "extension": "txt", + "file_name": "datasets/dataset1.txt_2.txt", + "hid": 2, + "history_encoded_id": "a9eae7b6d5994701", + "info": "", + "metadata": { + "data_lines": 2, + "dbkey": "?" + }, + "model_class": "HistoryDatasetAssociation", + "name": "dataset1.txt", + "peek": "hello\nworld\n", + "state": "ok", + "tags": [], + "update_time": "2021-06-23 10:46:28.363991", + "visible": true +}, { + "annotation": null, + "blurb": "4 lines", + "copied_from_history_dataset_association_id_chain": [], + "create_time": "2021-06-23 10:46:28.365628", + "dataset_uuid": "cf936245-b1d6-4840-b79d-2e20af74b921", + "deleted": false, + "designation": "out_file1", + "encoded_id": "fd7fec473f1e6c6b", + "extension": "txt", + "file_name": "datasets/Concatenate_datasets_on_data_2_and_data_1_3.txt", + "hid": 3, + "history_encoded_id": "a9eae7b6d5994701", + "info": "", + "metadata": { + "data_lines": 4, + "dbkey": "?" + }, + "model_class": "HistoryDatasetAssociation", + "name": "Concatenate datasets on data 2 and data 1", + "peek": "hello\nuniverse!\nhello\nworld\n", + "state": "ok", + "tags": [], + "update_time": "2021-06-23 10:46:44.644667", + "visible": true +}, { + "annotation": null, + "blurb": "3 lines", + "copied_from_history_dataset_association_id_chain": [], + "create_time": "2021-06-23 10:46:28.531348", + "dataset_uuid": "d9b5abdc-2a54-4fb7-b3df-1f05e9f2399a", + "deleted": false, + "designation": "out_file1", + "encoded_id": "775bc9b3c1750167", + "extension": "txt", + "file_name": "datasets/tutorial_output.txt_4.txt", + "hid": 4, + "history_encoded_id": "a9eae7b6d5994701", + "info": "\nKept 3 of 4 total lines.\nUsed random seed of \"0\".", + "metadata": { + "data_lines": 3, + "dbkey": "?" + }, + "model_class": "HistoryDatasetAssociation", + "name": "tutorial_output.txt", + "peek": "hello\nuniverse!\nworld\n", + "state": "ok", + "tags": [], + "update_time": "2021-06-23 10:47:04.750978", + "visible": true +}] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt.provenance b/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt.provenance new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/datasets_attrs.txt.provenance @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/export_attrs.txt b/test/test-data/test_ga_history_export/history_export/export_attrs.txt new file mode 100644 index 00000000..a2100763 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/export_attrs.txt @@ -0,0 +1 @@ +{"galaxy_export_version": "2"} \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/history_attrs.txt b/test/test-data/test_ga_history_export/history_export/history_attrs.txt new file mode 100644 index 00000000..53de783c --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/history_attrs.txt @@ -0,0 +1,11 @@ +{ + "model_class": "History", + "create_time": "2021-06-23 10:46:26.457161", + "update_time": "2022-01-06 17:05:06.816140", + "name": "Minimal-history", + "hid_counter": 5, + "genome_build": "?", + "annotation": null, + "tags": ["planemo-tutorial"], + "encoded_id": "a9eae7b6d5994701" +} \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/implicit_collection_jobs_attrs.txt b/test/test-data/test_ga_history_export/history_export/implicit_collection_jobs_attrs.txt new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/implicit_collection_jobs_attrs.txt @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/jobs_attrs.txt b/test/test-data/test_ga_history_export/history_export/jobs_attrs.txt new file mode 100644 index 00000000..1e2a84f0 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/jobs_attrs.txt @@ -0,0 +1,165 @@ +[{ + "command_line": "python '/srv/galaxy/shared/src/lib/galaxy/tools/data_fetch.py' --galaxy-root '/srv/galaxy/shared/src' --datatypes-registry '/srv/galaxy/shared/etc/tmpdir/jwd/079/79718/registry.xml' --request-version '1' --request '/srv/galaxy/shared/etc/tmpdir/jwd/079/79718/configs/tmpjgi6b5_u'", + "create_time": "2021-06-23T10:20:22.770291", + "encoded_id": "7b69c80a9f428343", + "exit_code": 0, + "galaxy_version": "21.01", + "implicit_output_dataset_collection_mapping": {}, + "info": null, + "input_dataset_collection_element_mapping": {}, + "input_dataset_collection_mapping": {}, + "input_dataset_mapping": {}, + "job_stderr": "", + "job_stdout": "", + "model_class": "Job", + "output_dataset_collection_mapping": {}, + "output_dataset_mapping": { + "output0": ["2524d3d3b68ad39a"] + }, + "params": { + "file_count": "1", + "files": [{ + "__index__": 0, + "file_data": "/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_85x51jn2" + }], + "paramfile": null, + "request_json": "{\"targets\": [{\"destination\": {\"type\": \"hdas\"}, \"elements\": [{\"ext\": \"auto\", \"dbkey\": \"?\", \"to_posix_lines\": true, \"src\": \"path\", \"name\": \"dataset2.txt\", \"hashes\": [], \"in_place\": false, \"purge_source\": false, \"path\": \"/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_85x51jn2\", \"object_id\": 155708}], \"auto_decompress\": false}], \"check_content\": true}", + "request_version": "1" + }, + "state": "ok", + "tool_id": "__DATA_FETCH__", + "tool_stderr": "", + "tool_stdout": "", + "tool_version": "0.1.0", + "traceback": null, + "update_time": "2021-06-23T10:20:42.041319" +}, { + "command_line": "python '/srv/galaxy/shared/src/lib/galaxy/tools/data_fetch.py' --galaxy-root '/srv/galaxy/shared/src' --datatypes-registry '/srv/galaxy/shared/etc/tmpdir/jwd/079/79717/registry.xml' --request-version '1' --request '/srv/galaxy/shared/etc/tmpdir/jwd/079/79717/configs/tmpmod1j0bw'", + "create_time": "2021-06-23T10:20:00.981702", + "encoded_id": "77dd7a937748a715", + "exit_code": 0, + "galaxy_version": "21.01", + "implicit_output_dataset_collection_mapping": {}, + "info": null, + "input_dataset_collection_element_mapping": {}, + "input_dataset_collection_mapping": {}, + "input_dataset_mapping": {}, + "job_stderr": "", + "job_stdout": "", + "model_class": "Job", + "output_dataset_collection_mapping": {}, + "output_dataset_mapping": { + "output0": ["7c1d8e3101dbcdba"] + }, + "params": { + "file_count": "1", + "files": [{ + "__index__": 0, + "file_data": "/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_asmrlj6a" + }], + "paramfile": null, + "request_json": "{\"targets\": [{\"destination\": {\"type\": \"hdas\"}, \"elements\": [{\"ext\": \"auto\", \"dbkey\": \"?\", \"to_posix_lines\": true, \"src\": \"path\", \"name\": \"dataset1.txt\", \"hashes\": [], \"in_place\": false, \"purge_source\": false, \"path\": \"/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_asmrlj6a\", \"object_id\": 155707}], \"auto_decompress\": false}], \"check_content\": true}", + "request_version": "1" + }, + "state": "ok", + "tool_id": "__DATA_FETCH__", + "tool_stderr": "", + "tool_stdout": "", + "tool_version": "0.1.0", + "traceback": null, + "update_time": "2021-06-23T10:20:22.202678" +}, { + "command_line": "python /srv/galaxy/shared/src/tools/filters/catWrapper.py '/srv/galaxy/shared/etc/tmpdir/jwd/079/79726/outputs/galaxy_dataset_cf936245-b1d6-4840-b79d-2e20af74b921.dat' '/srv/galaxy/shared/database/files/000/134/dataset_134410.dat' '/srv/galaxy/shared/database/files/000/134/dataset_134409.dat'", + "create_time": "2021-06-23T10:46:28.351123", + "encoded_id": "77cf44871c57dd95", + "exit_code": 0, + "galaxy_version": "21.01", + "implicit_output_dataset_collection_mapping": {}, + "info": null, + "input_dataset_collection_element_mapping": {}, + "input_dataset_collection_mapping": {}, + "input_dataset_mapping": { + "input1": ["7ca3de8f49293d1a"], + "queries_0|input2": ["2e789aae2475b7da"] + }, + "job_stderr": "", + "job_stdout": "", + "model_class": "Job", + "output_dataset_collection_mapping": {}, + "output_dataset_mapping": { + "out_file1": ["fd7fec473f1e6c6b"] + }, + "params": { + "__input_ext": "txt", + "__workflow_invocation_uuid__": "42f21c0ad41011eb87defa163eba836f", + "chromInfo": "/srv/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len", + "dbkey": "?", + "input1": { + "values": [{ + "id": "7ca3de8f49293d1a", + "src": "hda" + }] + }, + "queries": [{ + "__index__": 0, + "input2": { + "values": [{ + "id": "2e789aae2475b7da", + "src": "hda" + }] + } + }] + }, + "state": "ok", + "tool_id": "cat1", + "tool_stderr": "", + "tool_stdout": "", + "tool_version": "1.0.0", + "traceback": null, + "update_time": "2021-06-23T10:46:44.738703" +}, { + "command_line": "python '/srv/galaxy/shared/src/tools/filters/random_lines_two_pass.py' '/srv/galaxy/shared/database/files/000/134/dataset_134423.dat' '/srv/galaxy/shared/etc/tmpdir/jwd/079/79727/outputs/galaxy_dataset_d9b5abdc-2a54-4fb7-b3df-1f05e9f2399a.dat' '3' --seed '0'", + "create_time": "2021-06-23T10:46:28.526901", + "encoded_id": "ea1a7696fd6484de", + "exit_code": 0, + "galaxy_version": "21.01", + "implicit_output_dataset_collection_mapping": {}, + "info": null, + "input_dataset_collection_element_mapping": {}, + "input_dataset_collection_mapping": {}, + "input_dataset_mapping": { + "input": ["fd7fec473f1e6c6b"] + }, + "job_stderr": "", + "job_stdout": "", + "model_class": "Job", + "output_dataset_collection_mapping": {}, + "output_dataset_mapping": { + "out_file1": ["775bc9b3c1750167"] + }, + "params": { + "__input_ext": "txt", + "__workflow_invocation_uuid__": "42f21c0ad41011eb87defa163eba836f", + "chromInfo": "/srv/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len", + "dbkey": "?", + "input": { + "values": [{ + "id": "fd7fec473f1e6c6b", + "src": "hda" + }] + }, + "num_lines": "3", + "seed_source": { + "__current_case__": 1, + "seed": "0", + "seed_source_selector": "set_seed" + } + }, + "state": "ok", + "tool_id": "random_lines1", + "tool_stderr": "", + "tool_stdout": "Kept 3 of 4 total lines.\nUsed random seed of \"0\".\n", + "tool_version": "2.0.2", + "traceback": null, + "update_time": "2021-06-23T10:47:04.837750" +}] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/history_export/libraries_attrs.txt b/test/test-data/test_ga_history_export/history_export/libraries_attrs.txt new file mode 100644 index 00000000..0637a088 --- /dev/null +++ b/test/test-data/test_ga_history_export/history_export/libraries_attrs.txt @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-data/test_ga_history_export/wf_definition.ga b/test/test-data/test_ga_history_export/wf_definition.ga new file mode 100644 index 00000000..28194ed4 --- /dev/null +++ b/test/test-data/test_ga_history_export/wf_definition.ga @@ -0,0 +1,131 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "Workflow constructed from history 'Minimal-history'", + "steps": { + "0": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "dataset2.txt" + } + ], + "label": "dataset2.txt", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 10, + "top": 10 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "8dfb1a60-ac90-4125-b1a4-c674a084bf37", + "workflow_outputs": [] + }, + "1": { + "annotation": "", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "dataset1.txt" + } + ], + "label": "dataset1.txt", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 10, + "top": 130 + }, + "tool_id": null, + "tool_state": "{\"optional\": false}", + "tool_version": null, + "type": "data_input", + "uuid": "104d6198-2f2e-46f1-8006-1c594f1f091c", + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "cat1", + "errors": null, + "id": 2, + "input_connections": { + "input1": { + "id": 0, + "output_name": "output" + }, + "queries_0|input2": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Concatenate datasets", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 230, + "top": 10 + }, + "post_job_actions": {}, + "tool_id": "cat1", + "tool_state": "{\"__input_ext\": \"txt\", \"__workflow_invocation_uuid__\": \"42f21c0ad41011eb87defa163eba836f\", \"chromInfo\": \"/srv/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input1\": null, \"queries\": [{\"__index__\": 0, \"input2\": null}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "821567a8-1edd-4f35-9430-596fd6801ca1", + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "random_lines1", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 2, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Select random lines", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 450, + "top": 10 + }, + "post_job_actions": {}, + "tool_id": "random_lines1", + "tool_state": "{\"__input_ext\": \"txt\", \"__workflow_invocation_uuid__\": \"42f21c0ad41011eb87defa163eba836f\", \"chromInfo\": \"/srv/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input\": null, \"num_lines\": \"3\", \"seed_source\": {\"seed_source_selector\": \"set_seed\", \"__current_case__\": 1, \"seed\": \"0\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.0.2", + "type": "tool", + "uuid": "a6b4dc53-9a0f-4a5c-9311-f9f03238f9dc", + "workflow_outputs": [] + } + }, + "tags": [], + "uuid": "de90339e-0497-49da-a1ce-f7e2dd3080c6", + "version": 0 +} \ No newline at end of file diff --git a/test/test_parse_ga_export.py b/test/test_parse_ga_export.py new file mode 100644 index 00000000..0492873e --- /dev/null +++ b/test/test_parse_ga_export.py @@ -0,0 +1,56 @@ +import os +from rocrate.rocrate import ROCrate, make_workflow_run_rocrate +from rocrate.provenance_profile import ProvenanceProfile + +from tools.load_ga_export import load_ga_history_export, GalaxyJob + + +def test_ga_history_loading(test_data_dir, tmpdir, helpers): + export_dir = "test_ga_history_export" + export_path = test_data_dir / export_dir / "history_export" + + metadata_export = load_ga_history_export(export_path) + jobs = [] + for job in metadata_export["jobs_attrs"]: + job_attrs = GalaxyJob() + job_attrs.parse_ga_jobs_attrs(job) + jobs.append(job_attrs.attributes) + + assert isinstance(job_attrs, GalaxyJob) + + assert len(jobs) == 4 + + +def test_ga_history_parsing(test_data_dir, tmpdir, helpers): + export_dir = "test_ga_history_export" + export_path = test_data_dir / export_dir / "history_export" + prov_path = tmpdir / "provenance" + prov = ProvenanceProfile(export_path, "PDG", "https://orcid.org/0000-0002-8940-4946") + + assert isinstance(prov, ProvenanceProfile) + + prov.finalize_prov_profile(out_path=prov_path) + + +def test_create_wf_run_ro_crate(test_data_dir, tmpdir, helpers): + + export_dir = "test_ga_history_export" + wfr_metadata_path = test_data_dir / export_dir / "history_export" + dataset_path = wfr_metadata_path / "datasets" + files_list = os.listdir(dataset_path) + files_list = [dataset_path / f for f in files_list] + wf_id = 'wf_definition.ga' + wf_path = test_data_dir / export_dir / wf_id + + wf_crate = make_workflow_run_rocrate( + workflow_path=wf_path, wfr_metadata_path=wfr_metadata_path, author=None, orcid=None, + wf_type="Galaxy", include_files=files_list, prov_name="test_prov" + ) + assert isinstance(wf_crate, ROCrate) + + # wf = wf_crate.dereference(wf_id) + + out_path = test_data_dir / export_dir / "history_export_ro_crate" + if not os.path.exists(out_path): + out_path.mkdir() + wf_crate.write(out_path) diff --git a/test/test_prov.ttl b/test/test_prov.ttl new file mode 100644 index 00000000..c2cdbc40 --- /dev/null +++ b/test/test_prov.ttl @@ -0,0 +1,1081 @@ +@prefix data: . +@prefix foaf: . +@prefix id: . +@prefix orcid: . +@prefix prov: . +@prefix rdfs: . +@prefix schema: . +@prefix wf: . +@prefix wfprov: . +@prefix xsd: . + +id:2d74b09c-b2a3-4db2-9569-0ed8fb27b36a a wfprov:ProcessRun, + prov:Activity ; + rdfs:label "Run of ga_export/jobs_attrs.txt#cat1"^^xsd:string ; + prov:endedAtTime "2021-06-23T10:46:44.738703"^^xsd:dateTime ; + prov:qualifiedAssociation [ a prov:Association ; + prov:hadPlan ] ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2021-06-23T10:46:28.351123"^^xsd:dateTime ; + prov:hadActivity id:20e49739-c446-4fa8-b564-c002a6762af8 ] ; + prov:qualifiedUsage [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.036989"^^xsd:dateTime ; + prov:entity data:txt ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.038601"^^xsd:dateTime ; + prov:entity id:cfa5924e-df83-482f-9986-1126f46a08fb ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.036616"^^xsd:dateTime ; + prov:entity id:b2c2277d-6021-4112-850d-e6919b16ffe2 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.037115"^^xsd:dateTime ; + prov:entity ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.037050"^^xsd:dateTime ; + prov:entity data:42f21c0ad41011eb87defa163eba836f ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.037174"^^xsd:dateTime ; + prov:entity ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.036920"^^xsd:dateTime ; + prov:entity id:d2fd94f5-3b74-4275-b2e6-9a740901bd12 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.037723"^^xsd:dateTime ; + prov:entity id:4211e9db-3e4c-4c72-b40b-5cd61e509919 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.036738"^^xsd:dateTime ; + prov:entity id:e00df3b8-b936-45aa-931b-e55aa48d5845 ; + prov:hadRole ] ; + prov:startedAtTime "2021-06-23T10:46:28.351123"^^xsd:dateTime ; + prov:wasAssociatedWith id:e1515807-7299-444b-a8ff-366789285b60 . + +id:799e5218-7301-43c2-851d-fa3da27b83d8 a wfprov:ProcessRun, + prov:Activity ; + rdfs:label "Run of ga_export/jobs_attrs.txt#random_lines1"^^xsd:string ; + prov:endedAtTime "2021-06-23T10:47:04.837750"^^xsd:dateTime ; + prov:qualifiedAssociation [ a prov:Association ; + prov:hadPlan ] ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2021-06-23T10:46:28.526901"^^xsd:dateTime ; + prov:hadActivity id:20e49739-c446-4fa8-b564-c002a6762af8 ] ; + prov:qualifiedUsage [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.040067"^^xsd:dateTime ; + prov:entity id:16a9f31d-a9fd-410b-ab7e-4a3c2f6902e0 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039459"^^xsd:dateTime ; + prov:entity ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.040589"^^xsd:dateTime ; + prov:entity id:789f87ee-4c9e-410a-8e0d-d56b88ff8894 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039328"^^xsd:dateTime ; + prov:entity data:txt ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039150"^^xsd:dateTime ; + prov:entity id:75614749-0504-46e7-918c-d552b6ef6873 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039270"^^xsd:dateTime ; + prov:entity id:858bcf6c-d3e9-4cbb-98db-e3e23a10b56e ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039398"^^xsd:dateTime ; + prov:entity data:42f21c0ad41011eb87defa163eba836f ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.039514"^^xsd:dateTime ; + prov:entity ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.040182"^^xsd:dateTime ; + prov:entity data:3 ; + prov:hadRole ] ; + prov:startedAtTime "2021-06-23T10:46:28.526901"^^xsd:dateTime ; + prov:wasAssociatedWith id:e1515807-7299-444b-a8ff-366789285b60 . + +id:e6c6a259-caa6-4637-873f-c91a4787cc6d a wfprov:ProcessRun, + prov:Activity ; + rdfs:label "Run of ga_export/jobs_attrs.txt#__DATA_FETCH__"^^xsd:string ; + prov:endedAtTime "2021-06-23T10:20:22.202678"^^xsd:dateTime ; + prov:qualifiedAssociation [ a prov:Association ; + prov:hadPlan ] ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2021-06-23T10:20:00.981702"^^xsd:dateTime ; + prov:hadActivity id:20e49739-c446-4fa8-b564-c002a6762af8 ] ; + prov:qualifiedUsage [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.035999"^^xsd:dateTime ; + prov:entity id:7eaadbec-4494-4367-82b6-0cd14b4fffc4 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.033953"^^xsd:dateTime ; + prov:entity id:e3e5c826-43a9-44c6-905c-a8a4a1105eff ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.034038"^^xsd:dateTime ; + prov:entity data: ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.036060"^^xsd:dateTime ; + prov:entity data:1 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.033411"^^xsd:dateTime ; + prov:entity data:1 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.033343"^^xsd:dateTime ; + prov:entity id:4f87d9db-d488-4386-be6e-1d7a65449be4 ; + prov:hadRole ] ; + prov:startedAtTime "2021-06-23T10:20:00.981702"^^xsd:dateTime ; + prov:wasAssociatedWith id:e1515807-7299-444b-a8ff-366789285b60 . + +id:f6db7670-04d7-4035-986b-204226451914 a wfprov:ProcessRun, + prov:Activity ; + rdfs:label "Run of ga_export/jobs_attrs.txt#__DATA_FETCH__"^^xsd:string ; + prov:endedAtTime "2021-06-23T10:20:42.041319"^^xsd:dateTime ; + prov:qualifiedAssociation [ a prov:Association ; + prov:hadPlan ] ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2021-06-23T10:20:22.770291"^^xsd:dateTime ; + prov:hadActivity id:20e49739-c446-4fa8-b564-c002a6762af8 ] ; + prov:qualifiedUsage [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.029123"^^xsd:dateTime ; + prov:entity id:0926faad-cc2b-437d-8ba7-5a1b7cbcc1cd ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.029205"^^xsd:dateTime ; + prov:entity data:1 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.029703"^^xsd:dateTime ; + prov:entity data: ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.032326"^^xsd:dateTime ; + prov:entity id:0d405ad5-5e6f-409f-9ed8-6d6e0f6305e4 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.032449"^^xsd:dateTime ; + prov:entity data:1 ; + prov:hadRole ], + [ a prov:Usage ; + prov:atTime "2022-03-07T12:43:03.029636"^^xsd:dateTime ; + prov:entity id:bdde3981-0ad0-49f9-b228-60858920e96d ; + prov:hadRole ] ; + prov:startedAtTime "2021-06-23T10:20:22.770291"^^xsd:dateTime ; + prov:wasAssociatedWith id:e1515807-7299-444b-a8ff-366789285b60 . + +orcid:0000-0002-8940-4946 a schema:Person, + prov:Agent, + prov:Person ; + rdfs:label "PDG"^^xsd:string ; + schema:name "PDG"^^xsd:string ; + foaf:name "PDG"^^xsd:string . + +data:2524d3d3b68ad39a a wfprov:Artifact, + prov:Entity ; + prov:value "2524d3d3b68ad39a"^^xsd:string . + +data:3 a wfprov:Artifact, + prov:Entity ; + prov:value "3"^^xsd:string . + +data:775bc9b3c1750167 a wfprov:Artifact, + prov:Entity ; + prov:value "775bc9b3c1750167"^^xsd:string . + +data:7c1d8e3101dbcdba a wfprov:Artifact, + prov:Entity ; + prov:value "7c1d8e3101dbcdba"^^xsd:string . + +id:01747b41-76b6-4ee3-8baa-f2a38ef2588b a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:14e7c198-8f10-4656-94e4-e6afb569d09c ; + prov:pairKey "object_id"^^xsd:string . + +id:06c1a25c-0fb4-4cbf-8058-d088ca1ff260 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:7ca3de8f49293d1a ; + prov:pairKey "id"^^xsd:string . + +id:08561888-4226-440a-8346-d02fa58d7124 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:0926faad-cc2b-437d-8ba7-5a1b7cbcc1cd a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:2524d3d3b68ad39a . + +id:0d405ad5-5e6f-409f-9ed8-6d6e0f6305e4 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:3e45e45e-5272-4108-b43a-d6847917fe76, + id:471fc2ea-17d6-4c4d-95b5-c33e8afdafc3, + id:4c0932b4-50d9-4463-a555-b97101ccd38d ; + prov:hadMember , + id:c306ccbe-5c38-409f-8e8c-d07b90071064, + id:e539820a-376d-4d6b-9b76-83fd6b45fc06 . + +id:12aeb197-393c-474f-8c02-2a646dd3ff45 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:set_seed ; + prov:pairKey "seed_source_selector"^^xsd:string . + +id:135c7d70-dce8-4495-b383-c205f7a09377 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:1616545c-1b7b-431d-a266-d1548fbcf1c7 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:f8b6a64b-c2a7-4379-9332-e56cd20a5205 ; + prov:pairKey "in_place"^^xsd:string . + +id:16a9f31d-a9fd-410b-ab7e-4a3c2f6902e0 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:a42768b7-eb03-4adf-9acf-d3f246ec1180, + id:eae74a98-c429-4feb-b5ec-f4337ecd310a ; + prov:hadMember , + id:5c7939ed-17c6-4258-97a2-cbddf4e53571 . + +id:19c2fef7-4841-43bb-9e84-5578ec8c0fe1 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:1c646195-e527-49d4-add9-57d75b59f28f a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:bdd9651f-565b-4f01-9b44-ecab0f8d03cc ; + prov:pairKey "destination"^^xsd:string . + +id:1dde2384-f621-4430-ad69-7b882879ec12 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:2034fd0b-8758-4122-ac5a-2da3b99d9601 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "dbkey"^^xsd:string . + +id:2a5c2479-5e36-4f56-b5e1-c4bb157d8c1f a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:0 ; + prov:pairKey "seed"^^xsd:string . + +id:2d340838-6c11-4b9b-8efb-a3cdd1daf03e a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:56995ae6-a9bf-446a-8ff2-a3f6b6e4c3d9 ; + prov:pairKey "values"^^xsd:string . + +id:35336b14-07be-4324-a3e2-b1ae386ec383 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:06c1a25c-0fb4-4cbf-8058-d088ca1ff260, + id:d78b7d83-0649-4480-88c3-d5031257914a, + id:f718bf7a-9772-4df8-a79b-065ac0f48060 ; + prov:hadMember data:7ca3de8f49293d1a, + data:hda, + . + +id:3cb906bd-a339-4dce-9f10-8830e77064dd a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:aba904a4-c936-440b-8a8e-f0c785e5d6f6 ; + prov:pairKey "auto_decompress"^^xsd:string . + +id:3e45e45e-5272-4108-b43a-d6847917fe76 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:c306ccbe-5c38-409f-8e8c-d07b90071064 ; + prov:pairKey "targets"^^xsd:string . + +id:3e80874d-b085-4be6-b7e6-6752a504df15 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:434fa725-7662-4a38-a6b6-0c5b7a046a6d, + id:4ca631fe-a3c3-40cf-a2f1-c35c216fc4c3, + id:ae22b859-366d-4823-8dc3-8246dde1d038 ; + prov:hadMember data:fd7fec473f1e6c6b, + data:hda, + . + +id:411b4b64-efa0-498b-831b-e1a811b533fc a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "path"^^xsd:string . + +id:4211e9db-3e4c-4c72-b40b-5cd61e509919 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:848b21af-2337-43b4-88d3-556edb65da6d, + id:9ff8d7db-bc2e-4c36-96a9-ba9b0acc5d66 ; + prov:hadMember , + id:fcc931f3-252e-4db0-81ae-44745d02e60b . + +id:434fa725-7662-4a38-a6b6-0c5b7a046a6d a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:fd7fec473f1e6c6b ; + prov:pairKey "id"^^xsd:string . + +id:435fe41f-0b33-476d-ac9e-0ae09aa868aa a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:c9e82030-9dc0-4ecb-b5e2-1080e851476b, + id:e38a5ee3-57ea-4bd2-98ed-ae29ba42f819, + id:f2a08bca-e609-45bb-a7e2-461ffd56a888 ; + prov:hadMember data:2e789aae2475b7da, + data:hda, + . + +id:458ba136-e386-4887-81d2-0dc58f05d613 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:1616545c-1b7b-431d-a266-d1548fbcf1c7, + id:2034fd0b-8758-4122-ac5a-2da3b99d9601, + id:411b4b64-efa0-498b-831b-e1a811b533fc, + id:4d9d83e8-2b4a-4da1-a733-0bc97c7a4fba, + id:61c482a1-fcb0-4e6c-a464-5ee8e731bbce, + id:7eb26f31-0568-4a19-8bd6-250d198af74f, + id:92ea61f0-9411-4237-afe1-124e9a791e0c, + id:a865a7ee-e00a-4a5f-9e9d-e38d4e9c7ff2, + id:b42d5b13-379e-4dba-8280-37e5e1ad9bfe, + id:d9b124d2-3709-42e8-a9d6-46741deaedcd, + id:f81db514-cf8d-496e-8a20-2124f9ec7106 ; + prov:hadMember , + , + data:auto, + data:dataset2.txt, + data:path, + , + id:20e2d794-4254-4633-aae0-ec5e89a26cf7, + id:331b5070-e37d-4cc2-a1d2-42fa8a860fcc, + id:a0d80900-bfe5-430d-8b2a-d84f1e30819b, + id:d26ae8a8-25ea-44f6-b525-60c07c6d5a26, + id:f8b6a64b-c2a7-4379-9332-e56cd20a5205 . + +id:471fc2ea-17d6-4c4d-95b5-c33e8afdafc3 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:4886ee12-8a4c-4ace-9612-a99c59d60862 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:4c0932b4-50d9-4463-a555-b97101ccd38d a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:e539820a-376d-4d6b-9b76-83fd6b45fc06 ; + prov:pairKey "check_content"^^xsd:string . + +id:4ca631fe-a3c3-40cf-a2f1-c35c216fc4c3 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:hda ; + prov:pairKey "src"^^xsd:string . + +id:4d9d83e8-2b4a-4da1-a733-0bc97c7a4fba a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:auto ; + prov:pairKey "ext"^^xsd:string . + +id:4e9f2201-16ee-411e-b0b5-6b721a31961b a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:1c646195-e527-49d4-add9-57d75b59f28f, + id:7e9d3f8d-26fa-4a75-a7be-8bf19a988a22, + id:98ab820f-380b-4de0-921b-3e5eefa05895, + id:d71b0844-dcb3-4f95-8ef4-a123c1fda9ab ; + prov:hadMember , + id:2699f676-0e8d-45f9-83e4-e18d42c8ea37, + id:47576d8c-0376-4b0a-b92a-62986ff6d530, + id:bdd9651f-565b-4f01-9b44-ecab0f8d03cc . + +id:4f87d9db-d488-4386-be6e-1d7a65449be4 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:7c1d8e3101dbcdba . + +id:5736c82f-9794-4cec-b080-2a584974f965 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:e6c4b36c-d9f7-4cdf-af54-9574a03c7cc4 ; + prov:pairKey "hashes"^^xsd:string . + +id:60645176-f281-4f11-a8ba-19998f73214c a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:auto ; + prov:pairKey "ext"^^xsd:string . + +id:61c482a1-fcb0-4e6c-a464-5ee8e731bbce a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:331b5070-e37d-4cc2-a1d2-42fa8a860fcc ; + prov:pairKey "object_id"^^xsd:string . + +id:69f2e9bb-0e88-4b99-bae6-84e0c090bf40 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:6a97f8d9-a34a-450f-9091-70462ef9cab8 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:7035f1f8-c649-48dc-9cec-36026f8fd37b ; + prov:pairKey "__current_case__"^^xsd:string . + +id:72473707-f240-4058-9ccd-e8503f35a8c0 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:3d61a5b6-35cc-4b40-9709-8ddea0a6838c ; + prov:pairKey "__index__"^^xsd:string . + +id:749da440-59c3-4769-824c-a46ba41f6a96 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:3b075671-c68a-4cfa-a1ca-ba3402f7f8e0 ; + prov:pairKey "input2"^^xsd:string . + +id:75614749-0504-46e7-918c-d552b6ef6873 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:fd7fec473f1e6c6b . + +id:7646cc43-0998-44e3-ad50-a4066c301445 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:6277e935-5850-4603-ad47-9eadba7ed319 ; + prov:pairKey "check_content"^^xsd:string . + +id:789f87ee-4c9e-410a-8e0d-d56b88ff8894 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:08561888-4226-440a-8346-d02fa58d7124, + id:12aeb197-393c-474f-8c02-2a646dd3ff45, + id:2a5c2479-5e36-4f56-b5e1-c4bb157d8c1f, + id:6a97f8d9-a34a-450f-9091-70462ef9cab8 ; + prov:hadMember data:0, + data:set_seed, + , + id:7035f1f8-c649-48dc-9cec-36026f8fd37b . + +id:78f2923d-b049-46db-b855-68fe4b39fba7 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:7c47812e-a743-471e-bc60-289ff2412086 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:dataset1.txt ; + prov:pairKey "name"^^xsd:string . + +id:7e9d3f8d-26fa-4a75-a7be-8bf19a988a22 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:2699f676-0e8d-45f9-83e4-e18d42c8ea37 ; + prov:pairKey "auto_decompress"^^xsd:string . + +id:7eaadbec-4494-4367-82b6-0cd14b4fffc4 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:7646cc43-0998-44e3-ad50-a4066c301445, + id:c2dd6553-a377-4b35-8f6e-2114447a8164, + id:fffb0f2d-0bc8-470b-ae81-9e6483e51141 ; + prov:hadMember , + id:6277e935-5850-4603-ad47-9eadba7ed319, + id:d193f76c-bf93-47b3-9f2e-adeedb1b9b83 . + +id:7eb26f31-0568-4a19-8bd6-250d198af74f a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:a0d80900-bfe5-430d-8b2a-d84f1e30819b ; + prov:pairKey "to_posix_lines"^^xsd:string . + +id:848b21af-2337-43b4-88d3-556edb65da6d a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:858bcf6c-d3e9-4cbb-98db-e3e23a10b56e a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:775bc9b3c1750167 . + +id:8595276a-419f-4efd-9104-bb8af8e7c9ad a prov:Agent ; + prov:actedOnBehalfOf orcid:0000-0002-8940-4946 . + +id:89888457-0be2-482a-a5f4-78df6215c738 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:hdas ; + prov:pairKey "type"^^xsd:string . + +id:8a1d73d2-c260-4522-93da-9853539f0fb1 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:181d6f96-e6ff-4cac-acb5-d357537f5d69 ; + prov:pairKey "in_place"^^xsd:string . + +id:8f861bcc-f5e6-48b8-a1c5-df3fecdd0380 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "file_data"^^xsd:string . + +id:92ea61f0-9411-4237-afe1-124e9a791e0c a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:path ; + prov:pairKey "src"^^xsd:string . + +id:98ab820f-380b-4de0-921b-3e5eefa05895 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:9ae83fa5-9f2a-495c-b6ef-a67a2c1282af a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:8b9d60fe-9459-4a11-9316-a07b8564b9bf ; + prov:pairKey "__index__"^^xsd:string . + +id:9f8e537f-7734-4846-a1dc-10072021fede a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:72473707-f240-4058-9ccd-e8503f35a8c0, + id:749da440-59c3-4769-824c-a46ba41f6a96, + id:ab7ad9b2-5937-477c-a4de-81d4017e0caf ; + prov:hadMember , + id:3b075671-c68a-4cfa-a1ca-ba3402f7f8e0, + id:3d61a5b6-35cc-4b40-9709-8ddea0a6838c . + +id:9ff8d7db-bc2e-4c36-96a9-ba9b0acc5d66 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:fcc931f3-252e-4db0-81ae-44745d02e60b ; + prov:pairKey "values"^^xsd:string . + +id:a37e6a38-7f99-400b-91fb-a7f2eee116a9 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:69f2e9bb-0e88-4b99-bae6-84e0c090bf40, + id:9ae83fa5-9f2a-495c-b6ef-a67a2c1282af, + id:bed207ba-d46d-4b13-861d-726e592bbb18 ; + prov:hadMember , + , + id:8b9d60fe-9459-4a11-9316-a07b8564b9bf . + +id:a3f935f2-2c2d-4e7e-80a0-f7865180ae06 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:01747b41-76b6-4ee3-8baa-f2a38ef2588b, + id:4886ee12-8a4c-4ace-9612-a99c59d60862, + id:5736c82f-9794-4cec-b080-2a584974f965, + id:60645176-f281-4f11-a8ba-19998f73214c, + id:7c47812e-a743-471e-bc60-289ff2412086, + id:8a1d73d2-c260-4522-93da-9853539f0fb1, + id:afbc9621-dc4b-43ba-8ff2-09f77d9dd3e3, + id:b4379925-f8d0-44f5-ab8a-0530fa73fd0f, + id:d9709813-bb67-4546-89ab-827a698ac0fa, + id:df157f5e-cea3-48da-97fd-f819d1a472ae, + id:fbe70690-fffc-4c7b-bbc3-0acc8baa2dbc ; + prov:hadMember , + , + data:auto, + data:dataset1.txt, + data:path, + , + id:14e7c198-8f10-4656-94e4-e6afb569d09c, + id:181d6f96-e6ff-4cac-acb5-d357537f5d69, + id:a665694e-38a7-4443-bc4e-f45cb26d0f6a, + id:e6c4b36c-d9f7-4cdf-af54-9574a03c7cc4, + id:fa3e442b-6fc6-4104-8f98-831e3a228339 . + +id:a42768b7-eb03-4adf-9acf-d3f246ec1180 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:a80d46cc-57ae-40eb-8e57-94c8a9c0d01f a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:8f861bcc-f5e6-48b8-a1c5-df3fecdd0380, + id:aa4b73d5-8e85-4263-bc4c-6512c3273d52, + id:ca6e1da9-b31d-47b2-9a07-4720646d02e0 ; + prov:hadMember , + , + id:1c95730f-eb41-432b-add7-8dd293286dbb . + +id:a865a7ee-e00a-4a5f-9e9d-e38d4e9c7ff2 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:dataset2.txt ; + prov:pairKey "name"^^xsd:string . + +id:aa4b73d5-8e85-4263-bc4c-6512c3273d52 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:ab7ad9b2-5937-477c-a4de-81d4017e0caf a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:ae22b859-366d-4823-8dc3-8246dde1d038 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:afbc9621-dc4b-43ba-8ff2-09f77d9dd3e3 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:fa3e442b-6fc6-4104-8f98-831e3a228339 ; + prov:pairKey "to_posix_lines"^^xsd:string . + +id:b2c2277d-6021-4112-850d-e6919b16ffe2 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:7ca3de8f49293d1a . + +id:b42d5b13-379e-4dba-8280-37e5e1ad9bfe a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:d26ae8a8-25ea-44f6-b525-60c07c6d5a26 ; + prov:pairKey "hashes"^^xsd:string . + +id:b4379925-f8d0-44f5-ab8a-0530fa73fd0f a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:path ; + prov:pairKey "src"^^xsd:string . + +id:b7b35e57-4bea-4854-92b9-9b15830459a1 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:hdas ; + prov:pairKey "type"^^xsd:string . + +id:b8857908-272e-4afc-a3d9-8410e083d311 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:e370a05b-53e5-4110-ab4b-ad5aee8255a1 ; + prov:pairKey "destination"^^xsd:string . + +id:bdde3981-0ad0-49f9-b228-60858920e96d a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:a37e6a38-7f99-400b-91fb-a7f2eee116a9 . + +id:bed207ba-d46d-4b13-861d-726e592bbb18 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "file_data"^^xsd:string . + +id:c2dd6553-a377-4b35-8f6e-2114447a8164 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:d193f76c-bf93-47b3-9f2e-adeedb1b9b83 ; + prov:pairKey "targets"^^xsd:string . + +id:c9e82030-9dc0-4ecb-b5e2-1080e851476b a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:ca6e1da9-b31d-47b2-9a07-4720646d02e0 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:1c95730f-eb41-432b-add7-8dd293286dbb ; + prov:pairKey "__index__"^^xsd:string . + +id:cfa5924e-df83-482f-9986-1126f46a08fb a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:9f8e537f-7734-4846-a1dc-10072021fede . + +id:d2fd94f5-3b74-4275-b2e6-9a740901bd12 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:fd7fec473f1e6c6b . + +id:d321fdc7-b41e-4773-a9d8-fb8a310a77cd a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:f09d70dc-44ec-43ac-afb4-a3994541ff53 ; + prov:pairKey "elements"^^xsd:string . + +id:d71b0844-dcb3-4f95-8ef4-a123c1fda9ab a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:47576d8c-0376-4b0a-b92a-62986ff6d530 ; + prov:pairKey "elements"^^xsd:string . + +id:d78b7d83-0649-4480-88c3-d5031257914a a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:hda ; + prov:pairKey "src"^^xsd:string . + +id:d9709813-bb67-4546-89ab-827a698ac0fa a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "path"^^xsd:string . + +id:d9b124d2-3709-42e8-a9d6-46741deaedcd a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:df157f5e-cea3-48da-97fd-f819d1a472ae a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "dbkey"^^xsd:string . + +id:e00df3b8-b936-45aa-931b-e55aa48d5845 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember data:2e789aae2475b7da . + +id:e246c224-ec3f-4188-82b7-86fdf5391aa5 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:19c2fef7-4841-43bb-9e84-5578ec8c0fe1, + id:3cb906bd-a339-4dce-9f10-8830e77064dd, + id:b8857908-272e-4afc-a3d9-8410e083d311, + id:d321fdc7-b41e-4773-a9d8-fb8a310a77cd ; + prov:hadMember , + id:aba904a4-c936-440b-8a8e-f0c785e5d6f6, + id:e370a05b-53e5-4110-ab4b-ad5aee8255a1, + id:f09d70dc-44ec-43ac-afb4-a3994541ff53 . + +id:e38a5ee3-57ea-4bd2-98ed-ae29ba42f819 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:2e789aae2475b7da ; + prov:pairKey "id"^^xsd:string . + +id:e3e5c826-43a9-44c6-905c-a8a4a1105eff a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:a80d46cc-57ae-40eb-8e57-94c8a9c0d01f . + +id:eae74a98-c429-4feb-b5ec-f4337ecd310a a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:5c7939ed-17c6-4258-97a2-cbddf4e53571 ; + prov:pairKey "values"^^xsd:string . + +id:f2a08bca-e609-45bb-a7e2-461ffd56a888 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity data:hda ; + prov:pairKey "src"^^xsd:string . + +id:f718bf7a-9772-4df8-a79b-065ac0f48060 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +id:f81db514-cf8d-496e-8a20-2124f9ec7106 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:20e2d794-4254-4633-aae0-ec5e89a26cf7 ; + prov:pairKey "purge_source"^^xsd:string . + +id:fbe70690-fffc-4c7b-bbc3-0acc8baa2dbc a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity id:a665694e-38a7-4443-bc4e-f45cb26d0f6a ; + prov:pairKey "purge_source"^^xsd:string . + +id:fffb0f2d-0bc8-470b-ae81-9e6483e51141 a prov:Entity, + prov:KeyEntityPair ; + prov:pairEntity ; + prov:pairKey "@id"^^xsd:string . + +data: a wfprov:Artifact, + prov:Entity ; + prov:value ""^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "/srv/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len"^^xsd:string . + +data:0 a wfprov:Artifact, + prov:Entity ; + prov:value "0"^^xsd:string . + +data:42f21c0ad41011eb87defa163eba836f a wfprov:Artifact, + prov:Entity ; + prov:value "42f21c0ad41011eb87defa163eba836f"^^xsd:string . + +data:dataset1.txt a wfprov:Artifact, + prov:Entity ; + prov:value "dataset1.txt"^^xsd:string . + +data:dataset2.txt a wfprov:Artifact, + prov:Entity ; + prov:value "dataset2.txt"^^xsd:string . + +data:set_seed a wfprov:Artifact, + prov:Entity ; + prov:value "set_seed"^^xsd:string . + +data:txt a wfprov:Artifact, + prov:Entity ; + prov:value "txt"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:0d405ad5-5e6f-409f-9ed8-6d6e0f6305e4"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:16a9f31d-a9fd-410b-ab7e-4a3c2f6902e0"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:35336b14-07be-4324-a3e2-b1ae386ec383"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:3b075671-c68a-4cfa-a1ca-ba3402f7f8e0"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:3e80874d-b085-4be6-b7e6-6752a504df15"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:4211e9db-3e4c-4c72-b40b-5cd61e509919"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:435fe41f-0b33-476d-ac9e-0ae09aa868aa"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:458ba136-e386-4887-81d2-0dc58f05d613"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:4e9f2201-16ee-411e-b0b5-6b721a31961b"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:789f87ee-4c9e-410a-8e0d-d56b88ff8894"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:7eaadbec-4494-4367-82b6-0cd14b4fffc4"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:9f8e537f-7734-4846-a1dc-10072021fede"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:a37e6a38-7f99-400b-91fb-a7f2eee116a9"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:a3f935f2-2c2d-4e7e-80a0-f7865180ae06"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:a80d46cc-57ae-40eb-8e57-94c8a9c0d01f"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:bdd9651f-565b-4f01-9b44-ecab0f8d03cc"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:e246c224-ec3f-4188-82b7-86fdf5391aa5"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "urn:uuid:e370a05b-53e5-4110-ab4b-ad5aee8255a1"^^xsd:string . + +id:14e7c198-8f10-4656-94e4-e6afb569d09c a prov:Entity ; + prov:value "155707"^^xsd:int . + +id:181d6f96-e6ff-4cac-acb5-d357537f5d69 a prov:Entity ; + prov:value false . + +id:1c95730f-eb41-432b-add7-8dd293286dbb a prov:Entity ; + prov:value "0"^^xsd:int . + +id:20e2d794-4254-4633-aae0-ec5e89a26cf7 a prov:Entity ; + prov:value false . + +id:2699f676-0e8d-45f9-83e4-e18d42c8ea37 a prov:Entity ; + prov:value false . + +id:331b5070-e37d-4cc2-a1d2-42fa8a860fcc a prov:Entity ; + prov:value "155708"^^xsd:int . + +id:3b075671-c68a-4cfa-a1ca-ba3402f7f8e0 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:1dde2384-f621-4430-ad69-7b882879ec12, + id:2d340838-6c11-4b9b-8efb-a3cdd1daf03e ; + prov:hadMember , + id:56995ae6-a9bf-446a-8ff2-a3f6b6e4c3d9 . + +id:3d61a5b6-35cc-4b40-9709-8ddea0a6838c a prov:Entity ; + prov:value "0"^^xsd:int . + +id:47576d8c-0376-4b0a-b92a-62986ff6d530 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:458ba136-e386-4887-81d2-0dc58f05d613 . + +id:56995ae6-a9bf-446a-8ff2-a3f6b6e4c3d9 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:435fe41f-0b33-476d-ac9e-0ae09aa868aa . + +id:5c7939ed-17c6-4258-97a2-cbddf4e53571 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:3e80874d-b085-4be6-b7e6-6752a504df15 . + +id:6277e935-5850-4603-ad47-9eadba7ed319 a prov:Entity ; + prov:value true . + +id:7035f1f8-c649-48dc-9cec-36026f8fd37b a prov:Entity ; + prov:value "1"^^xsd:int . + +id:8b9d60fe-9459-4a11-9316-a07b8564b9bf a prov:Entity ; + prov:value "0"^^xsd:int . + +id:a0d80900-bfe5-430d-8b2a-d84f1e30819b a prov:Entity ; + prov:value true . + +id:a665694e-38a7-4443-bc4e-f45cb26d0f6a a prov:Entity ; + prov:value false . + +id:aba904a4-c936-440b-8a8e-f0c785e5d6f6 a prov:Entity ; + prov:value false . + +id:bdd9651f-565b-4f01-9b44-ecab0f8d03cc a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:78f2923d-b049-46db-b855-68fe4b39fba7, + id:89888457-0be2-482a-a5f4-78df6215c738 ; + prov:hadMember data:hdas, + . + +id:c306ccbe-5c38-409f-8e8c-d07b90071064 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:4e9f2201-16ee-411e-b0b5-6b721a31961b . + +id:d193f76c-bf93-47b3-9f2e-adeedb1b9b83 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:e246c224-ec3f-4188-82b7-86fdf5391aa5 . + +id:d26ae8a8-25ea-44f6-b525-60c07c6d5a26 a wfprov:Artifact, + prov:Collection, + prov:EmptyCollection, + prov:Entity . + +id:e370a05b-53e5-4110-ab4b-ad5aee8255a1 a wfprov:Artifact, + prov:Collection, + prov:Dictionary, + prov:Entity ; + prov:hadDictionaryMember id:135c7d70-dce8-4495-b383-c205f7a09377, + id:b7b35e57-4bea-4854-92b9-9b15830459a1 ; + prov:hadMember data:hdas, + . + +id:e539820a-376d-4d6b-9b76-83fd6b45fc06 a prov:Entity ; + prov:value true . + +id:e6c4b36c-d9f7-4cdf-af54-9574a03c7cc4 a wfprov:Artifact, + prov:Collection, + prov:EmptyCollection, + prov:Entity . + +id:f09d70dc-44ec-43ac-afb4-a3994541ff53 a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:a3f935f2-2c2d-4e7e-80a0-f7865180ae06 . + +id:f8b6a64b-c2a7-4379-9332-e56cd20a5205 a prov:Entity ; + prov:value false . + +id:fa3e442b-6fc6-4104-8f98-831e3a228339 a prov:Entity ; + prov:value true . + +id:fcc931f3-252e-4db0-81ae-44745d02e60b a wfprov:Artifact, + prov:Collection, + prov:Entity ; + prov:hadMember id:35336b14-07be-4324-a3e2-b1ae386ec383 . + +data:2e789aae2475b7da a wfprov:Artifact, + prov:Entity ; + prov:value "2e789aae2475b7da"^^xsd:string . + +data:7ca3de8f49293d1a a wfprov:Artifact, + prov:Entity ; + prov:value "7ca3de8f49293d1a"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_85x51jn2"^^xsd:string . + + a wfprov:Artifact, + prov:Entity ; + prov:value "/srv/galaxy/shared/etc/tmpdir/tmp/upload_file_data_asmrlj6a"^^xsd:string . + +data:1 a wfprov:Artifact, + prov:Entity ; + prov:value "1"^^xsd:string . + +data:auto a wfprov:Artifact, + prov:Entity ; + prov:value "auto"^^xsd:string . + +data:fd7fec473f1e6c6b a wfprov:Artifact, + prov:Entity ; + prov:value "fd7fec473f1e6c6b"^^xsd:string . + +data:hdas a wfprov:Artifact, + prov:Entity ; + prov:value "hdas"^^xsd:string . + +data:path a wfprov:Artifact, + prov:Entity ; + prov:value "path"^^xsd:string . + +id:20e49739-c446-4fa8-b564-c002a6762af8 a wfprov:WorkflowRun, + prov:Activity ; + rdfs:label "Run of galaxy workflow"^^xsd:string ; + prov:qualifiedAssociation [ a prov:Association ; + prov:hadPlan wf:main ] ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2022-03-07T12:43:03.028231"^^xsd:dateTime ; + prov:hadActivity id:e1515807-7299-444b-a8ff-366789285b60 ] ; + prov:startedAtTime "2022-03-07T12:43:03.028137"^^xsd:dateTime ; + prov:wasAssociatedWith id:e1515807-7299-444b-a8ff-366789285b60 . + + a wfprov:Artifact, + prov:Entity ; + prov:value "?"^^xsd:string . + +data:hda a wfprov:Artifact, + prov:Entity ; + prov:value "hda"^^xsd:string . + +id:e1515807-7299-444b-a8ff-366789285b60 a wfprov:WorkflowEngine, + prov:Agent, + prov:SoftwareAgent ; + rdfs:label "galaxy_version_placeholder"^^xsd:string ; + prov:qualifiedStart [ a prov:Start ; + prov:atTime "2022-03-07T12:43:03.028086"^^xsd:dateTime ; + prov:hadActivity id:8595276a-419f-4efd-9104-bb8af8e7c9ad ] . + diff --git a/tools/load_ga_export.py b/tools/load_ga_export.py new file mode 100644 index 00000000..9799edb0 --- /dev/null +++ b/tools/load_ga_export.py @@ -0,0 +1,89 @@ +import json +import os +import re +from typing import ( + Dict, +) + + +def load_ga_history_export(export_dir): + fn_list = os.listdir(export_dir) + export_metadata = {} + for f in fn_list: + export_dir_path = os.path.join(export_dir, f) + if os.path.isfile(export_dir_path): + with open(export_dir_path, "r") as fh: + # create keys for metadata files, removes '.' and 'txt' from fn + key = '_'.join(list(filter(None, re.split(r'\.|txt', f)))) + export_metadata[key] = json.loads(fh.read()) + return export_metadata + + +class GalaxyJob(Dict): + def __init__(self): + """ + Initialize the GalaxyJob object. + """ + self.attributes = {} + self.attributes["inputs"] = {} + self.attributes["outputs"] = {} + self.attributes["parameters"] = {} + + def parse_ga_jobs_attrs(self, job_attrs): + + for key, value in job_attrs.items(): + if not isinstance(value, dict): + self.attributes[key] = value + else: + if not value or len(value) == 0: + continue + else: + if "input" in key: + self.attributes["inputs"].update(job_attrs[key]) + if "output" in key: + self.attributes["outputs"].update(job_attrs[key]) + if "params" in key: + tmp_dict = {} + for k, v in job_attrs[key].items(): + if not v or len(v) == 0: + continue + try: + v = int(v) + except (TypeError, ValueError): + pass # it was a string, not an int. + + if "json" in k: + v = json.loads(v) + if isinstance(v, dict) or isinstance(v, list): + v = str(v) + tmp_dict[k] = v + + self.attributes["parameters"].update(tmp_dict) + + +class GalaxyDataset(Dict): + + def __init__(self): + """ + Initialize the GalaxyDataset object. + """ + self.attributes = {} + self.attributes["metadata"] = {} + self.attributes["class"] = "File" + + def parse_ga_dataset_attrs(self, dataset_attrs): + + for key, value in dataset_attrs.items(): + if not isinstance(value, dict): + self.attributes[key] = value + else: + if len(value) == 0: + pass + else: + if "metadata" in key: + self.attributes["metadata"].update(dataset_attrs[key]) + # self.attributes["used_encoded_id"] = \ + # next( + # iter(self.attributes["copied_from_history_dataset_association_id_chain"]), + # self.attributes["encoded_id"] + # )