From c1b5a6dbc62c0c8715277141350b3bfba388fede Mon Sep 17 00:00:00 2001 From: jweiser Date: Mon, 9 Feb 2026 23:37:09 -0500 Subject: [PATCH 01/17] updates Species.json to use EnsEMBL ref dbs from gk_central --- src/main/resources/Species.json | 135 ++++---------------------------- 1 file changed, 15 insertions(+), 120 deletions(-) diff --git a/src/main/resources/Species.json b/src/main/resources/Species.json index bfbb9a7..a14585c 100644 --- a/src/main/resources/Species.json +++ b/src/main/resources/Species.json @@ -2,14 +2,7 @@ "ddis":{ "mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice", "refdb":{ - "url":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Info\/Index", - "ensg_access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/geneview?gene=###ID###&db=core", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Dictyostelium discoideum_PROTEIN" - ], - "access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "protist" }, "alt_refdb":{ "url":"http:\/\/www.dictybase.org\/", @@ -35,14 +28,7 @@ "url":"http:\/\/www.wormbase.org" }, "refdb":{ - "access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_C_elegans_PROTEIN" - ], - "url":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Info\/Index", - "ensg_access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "name":[ "Caenorhabditis elegans" @@ -58,14 +44,7 @@ "Saccharomyces cerevisiae" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_S_cerevisiae_PROTEIN" - ], - "access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Info\/Index", - "ensg_access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "fungi" }, "alt_refdb":{ "access":"https:\/\/www.yeastgenome.org\/search?q=###ID###&category=locus", @@ -87,14 +66,7 @@ ], "mart_group":"sscrofa_gene_ensembl", "refdb":{ - "ensg_access":"http:\/\/www.ensembl.org\/Sus_scrofa\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Sus_scrofa\/Info\/Index\/", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Sus_scrofa_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Sus_scrofa\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "main" }, "abbreviation": "SSC" }, @@ -105,27 +77,13 @@ "Homo sapiens" ], "refdb":{ - "url":"http:\/\/www.ensembl.org\/Homo_sapiens\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Homo_sapiens\/geneview?gene=###ID###&db=core", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Homo_sapiens_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Homo_sapiens\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "main" }, "abbreviation": "HSA" }, "ggal":{ "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Gallus_gallus_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Gallus_gallus\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/www.ensembl.org\/Gallus_gallus\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Gallus_gallus\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"ggallus_gene_ensembl", "name":[ @@ -141,14 +99,7 @@ "Xenopus tropicalis" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Xenopus_tropicalis_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "group":"Vertebrate", "compara":"core", @@ -170,27 +121,13 @@ }, "mart_url":"http:\/\/fungi.ensembl.org\/biomart\/martservice", "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_S_pombe_PROTEIN" - ], - "access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/geneview?gene=###ID###&db=core", - "url":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Info\/Index" + "use_gk_central_ensembl_ref_db": "fungi" }, "abbreviation": "SPO" }, "btau":{ "refdb":{ - "url":"http:\/\/www.ensembl.org\/Bos_taurus\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Bos_taurus\/geneview?gene=###ID###&db=core", - "access":"http:\/\/www.ensembl.org\/Bos_taurus\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Bos_taurus_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"btaurus_gene_ensembl", "name":[ @@ -202,14 +139,7 @@ }, "mmus":{ "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Mus_musculus_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Mus_musculus\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/www.ensembl.org\/Mus_musculus\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Mus_musculus\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"mmusculus_gene_ensembl", "name":[ @@ -223,14 +153,7 @@ "compara":"core", "group":"Vertebrate", "refdb":{ - "access":"http:\/\/www.ensembl.org\/Canis_familiaris\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Canis_PROTEIN" - ], - "ensg_access":"http:\/\/www.ensembl.org\/Canis_familiaris\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Canis_familiaris\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "name":[ "Canis familiaris" @@ -244,14 +167,7 @@ "Plasmodium falciparum" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_P_falciparum_PROTEIN" - ], - "access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/geneview?gene=###ID###&db=core", - "url":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Info\/Index" + "use_gk_central_ensembl_ref_db": "protist" }, "mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice", "alt_refdb":{ @@ -269,14 +185,7 @@ "compara":"core", "group":"Metazoan", "refdb":{ - "ensg_access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/geneview?gene=###ID###&db=core", - "url":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster", - "access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_D_melanogaster_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "alt_refdb":{ "access":"https:\/\/flybase.org\/reports\/###ID###.html", @@ -297,14 +206,7 @@ "Danio rerio" ], "refdb":{ - "access":"http:\/\/www.ensembl.org\/Danio_rerio\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Danio_rerio_PROTEIN" - ], - "ensg_access":"http:\/\/www.ensembl.org\/Danio_rerio\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Danio_rerio\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "group":"Vertebrate", "compara":"core", @@ -314,14 +216,7 @@ "compara":"core", "group":"Vertebrate", "refdb":{ - "ensg_access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Info\/Index\/", - "access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Rattus_norvegicus_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"rnorvegicus_gene_ensembl", "name":[ From 66395ffb81f925aa956564100f959411ebd65a64 Mon Sep 17 00:00:00 2001 From: jweiser Date: Mon, 9 Feb 2026 23:38:31 -0500 Subject: [PATCH 02/17] updates inferrer classes to use EnsEMBL ref dbs from gk_central --- .../reactome/orthoinference/EWASInferrer.java | 85 +++++++++++-------- .../orthoinference/EventsInferrer.java | 7 +- 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 685a2b9..17f0a0a 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -339,53 +339,39 @@ public static void readENSGMappingFile(String toSpecies, String pathToOrthopairs // Fetches Uniprot DB instance @SuppressWarnings("unchecked") - public static void fetchAndSetUniprotDbInstance() throws Exception - { + public static void fetchAndSetUniprotDbInstance() throws Exception { Collection uniprotDbInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "UniProt"); uniprotDbInst = uniprotDbInstances.iterator().next(); } - // Creates instance pertaining to the species Ensembl Protein DB - public static void createEnsemblProteinDbInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnspAccessUrl) throws Exception - { - String enspSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_PROTEIN"; - enspDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - enspDbInst.setDbAdaptor(dba); - enspDbInst.addAttributeValue(created, instanceEditInst); - enspDbInst.addAttributeValue(name, "Ensembl"); - enspDbInst.addAttributeValue(name, enspSpeciesDb); - enspDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl); - enspDbInst.addAttributeValue(accessUrl, toSpeciesEnspAccessUrl); - enspDbInst.setAttributeValue(_displayName, "Ensembl"); - dba.storeInstance(enspDbInst); - } + public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) throws Exception { + GKInstance ensemblDbInst = fetchEnsemblDbInstance(ensemblDatabaseType); + if (ensemblDbInst == null) { + throw new IllegalStateException( + "Unable to fetch EnsEMBL Reference Database for type: " + ensemblDatabaseType + ); + } - // Creates instance pertaining to the species Ensembl Gene DB - public static void createEnsemblGeneDBInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnsgAccessUrl) throws Exception - { - String ensgSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_GENE"; - ensgDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - ensgDbInst.setDbAdaptor(dba); - ensgDbInst.addAttributeValue(created, instanceEditInst); - ensgDbInst.addAttributeValue(name, "ENSEMBL"); - ensgDbInst.addAttributeValue(name, ensgSpeciesDb); - ensgDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl); - ensgDbInst.addAttributeValue(accessUrl, toSpeciesEnsgAccessUrl); - ensgDbInst.setAttributeValue(_displayName, "ENSEMBL"); - dba.storeInstance(ensgDbInst); + ensgDbInst = ensemblDbInst; + enspDbInst = ensemblDbInst; } // Create instance pertaining to any alternative reference DB for the species public static void createAlternateReferenceDBInstance(JSONObject altRefDbJSON) throws Exception { - alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - alternateDbInst.setDbAdaptor(dba); - alternateDbInst.addAttributeValue(created, instanceEditInst); - alternateDbInst.addAttributeValue(name, ((JSONArray) altRefDbJSON.get("dbname")).get(0)); - alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url")); - alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access")); - alternateDbInst.setAttributeValue(_displayName, ((JSONArray) altRefDbJSON.get("dbname")).get(0)); - alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null); + String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbName")).get(0); + if (refDbExistsInDb(altRefDbDisplayName)) { + alternateDbInst = getRefDbFromDb(altRefDbDisplayName); + } else { + alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); + alternateDbInst.setDbAdaptor(dba); + alternateDbInst.addAttributeValue(created, instanceEditInst); + alternateDbInst.addAttributeValue(name, altRefDbDisplayName); + alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url")); + alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access")); + alternateDbInst.setAttributeValue(_displayName, altRefDbDisplayName); + alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null); + } if (altRefDbJSON.get("alt_id") != null) { altRefDbId = (String) altRefDbJSON.get("alt_id"); @@ -412,4 +398,29 @@ public static void setWormbaseMappings(Map> wormbaseMapping public static void setGeneNameMappingFile(Map geneNameMappingsCopy) { geneNameMappings = geneNameMappingsCopy; } + + private static boolean refDbExistsInDb(String refDbDisplayName) throws Exception { + Collection refDbInstances = dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); + return !refDbInstances.isEmpty(); + } + + private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Exception { + return (GKInstance) dba.fetchInstanceByAttribute( + ReferenceDatabase, _displayName, "=", refDbDisplayName + ).iterator().next(); + } + + private static GKInstance fetchEnsemblDbInstance(String ensemblDatabaseType) throws Exception { + GKInstance ensemblDbInst; + if (ensemblDatabaseType.equals("main")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL"); + } else if (ensemblDatabaseType.equals("fungi")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL Fungi"); + } else if (ensemblDatabaseType.equals("protist")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL Protist"); + } else { + throw new IllegalStateException(ensemblDatabaseType + " is not a valid EnsEMBL database type"); + } + return ensemblDbInst; + } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 40943d5..bcd3fe0 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -95,9 +95,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio logger.info("Beginning orthoinference of " + speciesName); JSONObject refDb = (JSONObject) speciesObject.get("refdb"); - String refDbUrl = (String) refDb.get("url"); - String refDbProteinUrl = (String) refDb.get("access"); - String refDbGeneUrl = (String) refDb.get("ensg_access"); + String ensemblDatabaseType = (String) refDb.get("use_gk_central_ensembl_ref_db"); // Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred String eligibleFilename = "eligible_" + species + "_75.txt"; @@ -120,8 +118,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio } EWASInferrer.readENSGMappingFile(species, pathToOrthopairs); EWASInferrer.fetchAndSetUniprotDbInstance(); - EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); - EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); + EWASInferrer.fetchAndSetEnsemblDbInstance(ensemblDatabaseType); JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb"); if (altRefDbJSON != null) From 9177479b174890dfe1077c39c42d8a152004eeaa Mon Sep 17 00:00:00 2001 From: jweiser Date: Tue, 10 Feb 2026 21:29:52 -0500 Subject: [PATCH 03/17] bug fix - corrects typo in dbname JSON key --- src/main/java/org/reactome/orthoinference/EWASInferrer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 17f0a0a..b2e9189 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -359,7 +359,7 @@ public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) thro // Create instance pertaining to any alternative reference DB for the species public static void createAlternateReferenceDBInstance(JSONObject altRefDbJSON) throws Exception { - String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbName")).get(0); + String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbname")).get(0); if (refDbExistsInDb(altRefDbDisplayName)) { alternateDbInst = getRefDbFromDb(altRefDbDisplayName); } else { From 291ad8fa03cec7f6d15ec1e92b6ae4810e1e3a6c Mon Sep 17 00:00:00 2001 From: jweiser Date: Tue, 10 Feb 2026 21:51:09 -0500 Subject: [PATCH 04/17] adds null check for fetched reference databases --- src/main/java/org/reactome/orthoinference/EWASInferrer.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index b2e9189..23dc4c1 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -400,8 +400,9 @@ public static void setGeneNameMappingFile(Map geneNameMappingsCo } private static boolean refDbExistsInDb(String refDbDisplayName) throws Exception { - Collection refDbInstances = dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); - return !refDbInstances.isEmpty(); + Collection refDbInstances = + dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); + return refDbInstances != null && !refDbInstances.isEmpty(); } private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Exception { From 274d6f6ec9bfcdd6717be98ce8fed22ff35268d6 Mon Sep 17 00:00:00 2001 From: jweiser Date: Fri, 13 Mar 2026 14:42:23 -0400 Subject: [PATCH 05/17] creates EnsEMBL ref dbs if they don't exist in db --- .../reactome/orthoinference/EWASInferrer.java | 63 +++++++++++++++---- .../orthoinference/EventsInferrer.java | 3 +- src/main/resources/refdb.json | 21 +++++++ 3 files changed, 75 insertions(+), 12 deletions(-) create mode 100644 src/main/resources/refdb.json diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 23dc4c1..e313586 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -1,6 +1,7 @@ package org.reactome.orthoinference; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.Paths; @@ -12,12 +13,13 @@ import static org.gk.model.ReactomeJavaConstants.*; import org.gk.model.InstanceDisplayNameGenerator; +import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaClass; -import org.gk.schema.InvalidAttributeException; -import org.gk.schema.SchemaClass; +import org.gk.schema.*; import org.json.simple.JSONArray; import org.json.simple.JSONObject; +import org.json.simple.parser.JSONParser; +import org.json.simple.parser.ParseException; public class EWASInferrer { @@ -344,8 +346,10 @@ public static void fetchAndSetUniprotDbInstance() throws Exception { uniprotDbInst = uniprotDbInstances.iterator().next(); } - public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) throws Exception { - GKInstance ensemblDbInst = fetchEnsemblDbInstance(ensemblDatabaseType); + public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType, String pathToRefDbConfig) + throws Exception { + + GKInstance ensemblDbInst = fetchOrCreateEnsemblDbInstance(ensemblDatabaseType, pathToRefDbConfig); if (ensemblDbInst == null) { throw new IllegalStateException( "Unable to fetch EnsEMBL Reference Database for type: " + ensemblDatabaseType @@ -411,17 +415,54 @@ private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Excepti ).iterator().next(); } - private static GKInstance fetchEnsemblDbInstance(String ensemblDatabaseType) throws Exception { - GKInstance ensemblDbInst; + private static GKInstance fetchOrCreateEnsemblDbInstance(String ensemblDatabaseType, String pathToRefDbConfig) + throws Exception { + + String ensemblRefDBDisplayName; if (ensemblDatabaseType.equals("main")) { - ensemblDbInst = getRefDbFromDb("ENSEMBL"); + ensemblRefDBDisplayName = "ENSEMBL"; } else if (ensemblDatabaseType.equals("fungi")) { - ensemblDbInst = getRefDbFromDb("ENSEMBL Fungi"); + ensemblRefDBDisplayName = "ENSEMBL Fungi"; } else if (ensemblDatabaseType.equals("protist")) { - ensemblDbInst = getRefDbFromDb("ENSEMBL Protist"); + ensemblRefDBDisplayName = "ENSEMBL Protist"; } else { throw new IllegalStateException(ensemblDatabaseType + " is not a valid EnsEMBL database type"); } - return ensemblDbInst; + + return refDbExistsInDb(ensemblRefDBDisplayName) ? + getRefDbFromDb(ensemblRefDBDisplayName) : + createRefDb(ensemblRefDBDisplayName, pathToRefDbConfig); + } + + private static GKInstance createRefDb(String refDbDisplayName, String pathToRefDbConfig) + throws Exception { + + JSONParser parser = new JSONParser(); + JSONObject refDbsJsonObject = (JSONObject) parser.parse(new FileReader(pathToRefDbConfig)); + JSONObject refDbJsonObject = (JSONObject) refDbsJsonObject.get(refDbDisplayName); + + String accessUrl = (String) refDbJsonObject.get("accessUrl"); + String identifiersPrefix = (String) refDbJsonObject.get("identifiersPrefix"); + String resourceIdentifier = (String) refDbJsonObject.get("resourceIdentifier"); + String url = (String) refDbJsonObject.get("url"); + + GKInstance refDbInstance = new GKInstance(fetchSchema().getClassByName(ReferenceDatabase)); + refDbInstance.setDbAdaptor(dba); + refDbInstance.setAttributeValue(ReactomeJavaConstants.created, instanceEditInst); + refDbInstance.setAttributeValue(ReactomeJavaConstants.accessUrl, accessUrl); + refDbInstance.setAttributeValue("identifiersPrefix", identifiersPrefix); + refDbInstance.setAttributeValue(ReactomeJavaConstants.resourceIdentifier, resourceIdentifier); + refDbInstance.setAttributeValue(ReactomeJavaConstants.url, url); + refDbInstance.setAttributeValue(ReactomeJavaConstants.name, Collections.singletonList(refDbDisplayName)); + InstanceDisplayNameGenerator.setDisplayName(refDbInstance); + + return refDbInstance; + } + + private static Schema fetchSchema() throws Exception { + if (dba.getSchema() == null) { + return dba.fetchSchema(); + } + return dba.getSchema(); } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index bcd3fe0..4af1c63 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -77,6 +77,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio releaseVersion = props.getProperty("releaseNumber"); String pathToOrthopairs = props.getProperty("pathToOrthopairs", "orthopairs"); String pathToSpeciesConfig = props.getProperty("pathToSpeciesConfig", "src/main/resources/Species.json"); + String pathToRefDbConfig = props.getProperty("pathToRefDbConfig", "src/main/resources/refdb.json"); String dateOfRelease = props.getProperty("dateOfRelease"); int personId = Integer.valueOf(props.getProperty("personId")); setReleaseDates(dateOfRelease); @@ -118,7 +119,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio } EWASInferrer.readENSGMappingFile(species, pathToOrthopairs); EWASInferrer.fetchAndSetUniprotDbInstance(); - EWASInferrer.fetchAndSetEnsemblDbInstance(ensemblDatabaseType); + EWASInferrer.fetchAndSetEnsemblDbInstance(ensemblDatabaseType, pathToRefDbConfig); JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb"); if (altRefDbJSON != null) diff --git a/src/main/resources/refdb.json b/src/main/resources/refdb.json new file mode 100644 index 0000000..df92429 --- /dev/null +++ b/src/main/resources/refdb.json @@ -0,0 +1,21 @@ +{ + "ENSEMBL" : { + "accessUrl": "https://www.ensembl.org/id/###ID###", + "identifiersPrefix": "ensembl", + "resourceIdentifier": "MIR:00100011", + "url": "https://www.ensembl.org/" + }, + "ENSEMBL Fungi" : { + "accessUrl": "https://fungi.ensembl.org/id/###ID###", + "identifiersPrefix": "ensembl.fungi", + "resourceIdentifier": "MIR:00100265", + "url": "https://fungi.ensembl.org/" + + }, + "ENSEMBL Protist" : { + "accessUrl": "https://protists.ensembl.org/id/###ID###", + "identifiersPrefix": "ensembl.protist", + "resourceIdentifier": "MIR:00100262", + "url": "https://protists.ensembl.org/" + } +} \ No newline at end of file From 307c7c1e26e041c968eb97baad9f5308d56ca2c6 Mon Sep 17 00:00:00 2001 From: jweiser Date: Sun, 15 Mar 2026 17:25:25 -0400 Subject: [PATCH 06/17] adds line breaks for long lines --- .../java/org/reactome/orthoinference/EventsInferrer.java | 9 ++++++++- .../orthoinference/OrthologousEntityGenerator.java | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 4af1c63..fdade21 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -346,7 +346,14 @@ private static void setSummationInstance() throws Exception GKInstance summationInst = new GKInstance(dbAdaptor.getSchema().getClassByName(Summation)); summationInst.setDbAdaptor(dbAdaptor); summationInst.addAttributeValue(created, instanceEditInst); - String summationText = "This event has been computationally inferred from an event that has been demonstrated in another species.

The inference is based on the homology mapping from PANTHER. Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are inferred to the other species. High level events are also inferred for these events to allow for easier navigation.

More details and caveats of the event inference in Reactome. For details on PANTHER see also: http://www.pantherdb.org/about.jsp"; + String summationText = "This event has been computationally inferred from an event that has been " + + "demonstrated in another species.

The inference is based on the homology mapping from PANTHER. " + + "Briefly, reactions for which all involved PhysicalEntities (in input, output and catalyst) have a " + + "mapped orthologue/paralogue (for complexes at least 75% of components must have a mapping) are " + + "inferred to the other species. High level events are also inferred for these events to allow for " + + "easier navigation.

More details and " + + "caveats of the event inference in Reactome. For details on PANTHER see also: " + + "http://www.pantherdb.org/about.jsp"; summationInst.addAttributeValue(text, summationText); summationInst.addAttributeValue(_displayName, summationText); summationInst = InstanceUtilities.checkForIdenticalInstances(summationInst, null); diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 9253399..db0449c 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -313,7 +313,9 @@ private static GKInstance createInfEntitySet(GKInstance entitySetInst, boolean o for (GKInstance candidateInst : candidateInstances) { GKInstance infCandidateInst = createOrthoEntity(candidateInst, false); - if (infCandidateInst != null && !existingMemberInstances.contains(infCandidateInst.getAttributeValue(name).toString()) && !existingCandidateInstances.contains(infCandidateInst.getAttributeValue(name).toString())) + if (infCandidateInst != null && + !existingMemberInstances.contains(infCandidateInst.getAttributeValue(name).toString()) && + !existingCandidateInstances.contains(infCandidateInst.getAttributeValue(name).toString())) { existingCandidateInstances.add(infCandidateInst.getAttributeValue(name).toString()); infCandidatesList.add(infCandidateInst); From 2bec6510ada4c337a08faec51638ae16a1f5fca9 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 17:25:55 -0400 Subject: [PATCH 07/17] update checkstyle.xml line length max to 250 characters --- checkstyle.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkstyle.xml b/checkstyle.xml index 8506a80..c9d5dc6 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -3,7 +3,7 @@ "https://checkstyle.org/dtds/configuration_1_3.dtd"> - + From 6926eada361e5f040053373175054e0f9a892b52 Mon Sep 17 00:00:00 2001 From: jweiser Date: Sun, 15 Mar 2026 17:27:52 -0400 Subject: [PATCH 08/17] adds line breaks for long line --- src/main/java/org/reactome/orthoinference/EWASInferrer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index e313586..5f7d02d 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -100,7 +100,11 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr for (int endCoord : (Collection) ewasInst.getAttributeValuesList(endCoordinate)) { infEWASInst.addAttributeValue(endCoordinate, endCoord); } - if (infEWASInst.getAttributeValue(startCoordinate) != null && (int) infEWASInst.getAttributeValue(startCoordinate) > 1 || infEWASInst.getAttributeValue(endCoordinate) != null && (int) infEWASInst.getAttributeValue(endCoordinate) > 1) { + if (infEWASInst.getAttributeValue(startCoordinate) != null && + (int) infEWASInst.getAttributeValue(startCoordinate) > 1 || + infEWASInst.getAttributeValue(endCoordinate) != null && + (int) infEWASInst.getAttributeValue(endCoordinate) > 1) { + List infEWASInstNames = (ArrayList) (ewasInst).getAttributeValuesList(name); infEWASInst.addAttributeValue(name, infEWASInstNames.get(0)); infEWASInst.addAttributeValue(name, homologueId); From a941b7ac3cd35e25621358827a41e3424e925a05 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 17:32:19 -0400 Subject: [PATCH 09/17] updates pom.xml for reactome-base version to 2.0.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 04d5e26..f08e00c 100644 --- a/pom.xml +++ b/pom.xml @@ -70,7 +70,7 @@ org.reactome.base reactome-base - 2.2.4-SNAPSHOT + 2.0.0 From 9200d8e699743b8c77f16aa152d753ce728e4f52 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 17:41:02 -0400 Subject: [PATCH 10/17] updates ci.yml to temporarily allow push for any branch --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41348f3..03e6949 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: path: /tmp/image.tar docker-push: - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} + if: ${{ github.event_name == 'push' }} needs: docker-build runs-on: ubuntu-latest steps: From c545ea6bee94943cdc7832041ff8166b9dbcc715 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 18:04:05 -0400 Subject: [PATCH 11/17] updates ci.yml to docker-push on workflow dispatch --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03e6949..b43e770 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: path: /tmp/image.tar docker-push: - if: ${{ github.event_name == 'push' }} + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} needs: docker-build runs-on: ubuntu-latest steps: From 0fe46885561e7d436e4e9cfe629676cc020f3031 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 18:07:35 -0400 Subject: [PATCH 12/17] update ci.yml to temporarily allow push for any branch --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b43e770..9e3e513 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: - synchronize push: branches: - - main + - '*' permissions: id-token: write From 8af9634503ab519eeb0e18772df54fa9763794f0 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 18:42:29 -0400 Subject: [PATCH 13/17] Update ci.yml --- .github/workflows/ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9e3e513..cb8c646 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: - synchronize push: branches: - - '*' + - '**' permissions: id-token: write @@ -55,8 +55,7 @@ jobs: path: /tmp/image.tar docker-push: - if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} - needs: docker-build + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 From 7a9c08b537eabe18ff369bf45cc482bf7754b729 Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 18:48:01 -0400 Subject: [PATCH 14/17] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb8c646..cd46e8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: path: /tmp/image.tar docker-push: - if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }} + if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 From 70c1c6473503cce66d99f0c112018b2cd2b812df Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Sun, 15 Mar 2026 18:51:04 -0400 Subject: [PATCH 15/17] Update ci.yml --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cd46e8b..77dc7aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,6 +56,7 @@ jobs: docker-push: if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + needs: docker-build runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 From faf49908748ac96b608be921558038a51b7a342b Mon Sep 17 00:00:00 2001 From: Joel Weiser Date: Mon, 16 Mar 2026 21:54:08 -0400 Subject: [PATCH 16/17] reverts ci.yml to push main branch only --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77dc7aa..41348f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: - synchronize push: branches: - - '**' + - main permissions: id-token: write @@ -55,7 +55,7 @@ jobs: path: /tmp/image.tar docker-push: - if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} + if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} needs: docker-build runs-on: ubuntu-latest steps: From a38eff888037110e8da7464f130d89f89fa80c8e Mon Sep 17 00:00:00 2001 From: jweiser Date: Tue, 17 Mar 2026 13:59:53 -0400 Subject: [PATCH 17/17] refactors and adds error messages --- .../reactome/orthoinference/EWASInferrer.java | 38 +++++++++++-------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 5f7d02d..ebad6ed 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -1,7 +1,6 @@ package org.reactome.orthoinference; import java.io.BufferedReader; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.Paths; @@ -441,31 +440,40 @@ private static GKInstance fetchOrCreateEnsemblDbInstance(String ensemblDatabaseT private static GKInstance createRefDb(String refDbDisplayName, String pathToRefDbConfig) throws Exception { - JSONParser parser = new JSONParser(); - JSONObject refDbsJsonObject = (JSONObject) parser.parse(new FileReader(pathToRefDbConfig)); - JSONObject refDbJsonObject = (JSONObject) refDbsJsonObject.get(refDbDisplayName); - - String accessUrl = (String) refDbJsonObject.get("accessUrl"); - String identifiersPrefix = (String) refDbJsonObject.get("identifiersPrefix"); - String resourceIdentifier = (String) refDbJsonObject.get("resourceIdentifier"); - String url = (String) refDbJsonObject.get("url"); + JSONObject refDbJsonObject = getRefDBJSONObject(refDbDisplayName, pathToRefDbConfig); GKInstance refDbInstance = new GKInstance(fetchSchema().getClassByName(ReferenceDatabase)); refDbInstance.setDbAdaptor(dba); refDbInstance.setAttributeValue(ReactomeJavaConstants.created, instanceEditInst); - refDbInstance.setAttributeValue(ReactomeJavaConstants.accessUrl, accessUrl); - refDbInstance.setAttributeValue("identifiersPrefix", identifiersPrefix); - refDbInstance.setAttributeValue(ReactomeJavaConstants.resourceIdentifier, resourceIdentifier); - refDbInstance.setAttributeValue(ReactomeJavaConstants.url, url); + refDbInstance.setAttributeValue(ReactomeJavaConstants.accessUrl, refDbJsonObject.get("accessUrl")); + refDbInstance.setAttributeValue("identifiersPrefix", refDbJsonObject.get("identifiersPrefix")); + refDbInstance.setAttributeValue(ReactomeJavaConstants.resourceIdentifier, + refDbJsonObject.get("resourceIdentifier")); + refDbInstance.setAttributeValue(ReactomeJavaConstants.url, refDbJsonObject.get("url")); refDbInstance.setAttributeValue(ReactomeJavaConstants.name, Collections.singletonList(refDbDisplayName)); InstanceDisplayNameGenerator.setDisplayName(refDbInstance); return refDbInstance; } - private static Schema fetchSchema() throws Exception { + private static JSONObject getRefDBJSONObject(String refDbDisplayName, String pathToRefDbConfig) { + JSONParser parser = new JSONParser(); + JSONObject refDbsJsonObject; + try { + refDbsJsonObject = (JSONObject) parser.parse(new FileReader(pathToRefDbConfig)); + } catch (IOException | ParseException e) { + throw new RuntimeException("Unable to read/parse JSON from " + pathToRefDbConfig, e); + } + return (JSONObject) refDbsJsonObject.get(refDbDisplayName); + } + + private static Schema fetchSchema() { if (dba.getSchema() == null) { - return dba.fetchSchema(); + try { + return dba.fetchSchema(); + } catch (Exception e) { + throw new RuntimeException("Unable to fetch schema from " + dba, e); + } } return dba.getSchema(); }