diff --git a/pom.xml b/pom.xml
index cdff170..acb56fd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -83,10 +83,16 @@
${powermock.version}
test
+
uk.ac.ebi.uniprot
japi
- 1.0.31
+ 1.0.38
org.hamcrest
@@ -94,7 +100,6 @@
2.1
test
-
diff --git a/src/main/java/org/reactome/release/orthopairs/Main.java b/src/main/java/org/reactome/release/orthopairs/Main.java
index ef6cacc..ae10868 100644
--- a/src/main/java/org/reactome/release/orthopairs/Main.java
+++ b/src/main/java/org/reactome/release/orthopairs/Main.java
@@ -47,7 +47,7 @@ public static void main( String[] args ) throws IOException, ParseException, Ser
String pathToSpeciesConfig = props.getProperty("pathToSpeciesConfig", "src/main/resources/Species.json");
String pantherQfOFilename = props.getProperty("pantherQfOFilename", "QfO_Genome_Orthologs.tar.gz");
String pantherHCOPFilename = props.getProperty("pantherHCOPFilename", "Orthologs_HCOP.tar.gz");
-
+// String uniprotQueryBatchSize = props.getProperty("uniprotQueryBatchSize", "100");
if (releaseNumber.isEmpty()) {
logger.fatal("Please populate config.properties file with releaseNumber");
throw new IllegalStateException("No releaseNumber attribute in config.properties");
diff --git a/src/main/java/org/reactome/release/orthopairs/UniProtGeneNamesRetriever.java b/src/main/java/org/reactome/release/orthopairs/UniProtGeneNamesRetriever.java
index 225512a..e9bb613 100644
--- a/src/main/java/org/reactome/release/orthopairs/UniProtGeneNamesRetriever.java
+++ b/src/main/java/org/reactome/release/orthopairs/UniProtGeneNamesRetriever.java
@@ -12,17 +12,22 @@
import uk.ac.ebi.uniprot.dataservice.client.uniprot.UniProtService;
import uk.ac.ebi.uniprot.dataservice.query.Query;
+import java.net.SocketTimeoutException;
+
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
+import java.time.Duration;
import java.util.*;
public class UniProtGeneNamesRetriever {
+ private static final int MAX_NUM_ATTEMPTS = 10;
private static final Logger logger = LogManager.getLogger();
- private static final int MAX_UNIPROT_BATCH_QUERY_SIZE = 250;
+ // TODO: Make this value configurable.
+ private static final int MAX_UNIPROT_BATCH_QUERY_SIZE = 100;
/**
* Queries the UniProt mapping service through their Java API library. All Uniprot accession IDs are taken from the Panther
@@ -129,25 +134,58 @@ public static Set retrieveGeneNamesFromUniProt(List> partiti
int count = 0;
Set uniprotAccessionsToGeneNames = new HashSet<>();
for (Set uniprotIdentifierPartition : partitionedUniProtIds) {
+ int currentAttemptNum = 0;
// Build UniProt API query from Set of 250 UniProt identifiers.
Query query = UniProtQueryBuilder.accessions(uniprotIdentifierPartition);
// Perform UniProt API query to retrieve gene names associated with identifiers.
- QueryResult> uniprotEntries = uniprotService.getGenes(query);
-
- while (uniprotEntries.hasNext()) {
- count++;
- // Get Gene object returned from UniProt.
- UniProtComponent geneObject = uniprotEntries.next();
- if (!geneObject.getComponent().isEmpty()) {
- // Iterate through all Gene components in the response.
- for (Gene geneComponent : geneObject.getComponent()) {
- // Tab-separate UniProt accession ID and its associated gene name, and then store these in the Set that will be returned.
- uniprotAccessionsToGeneNames.add(geneObject.getAccession().toString() + "\t" + geneComponent.getGeneName().toString() + "\n");
+ QueryResult> uniprotEntries = null;
+ while (currentAttemptNum < MAX_NUM_ATTEMPTS && uniprotEntries == null)
+ {
+ try
+ {
+ currentAttemptNum++;
+ uniprotEntries = uniprotService.getGenes(query);
+ while (uniprotEntries.hasNext()) {
+ count++;
+ // Get Gene object returned from UniProt.
+ UniProtComponent geneObject = uniprotEntries.next();
+ if (!geneObject.getComponent().isEmpty()) {
+ // Iterate through all Gene components in the response.
+ for (Gene geneComponent : geneObject.getComponent()) {
+ // Tab-separate UniProt accession ID and its associated gene name, and then store these in the Set that will be returned.
+ uniprotAccessionsToGeneNames.add(geneObject.getAccession().toString() + "\t" + geneComponent.getGeneName().toString() + "\n");
+ }
+ }
+
+ if (count % 1000 == 0) {
+ logger.info(count + " UniProt identifiers have been queried for gene names");
+ }
}
}
-
- if (count % 1000 == 0) {
- logger.info(count + " UniProt identifiers have been queried for gene names");
+ catch (ServiceException e)
+ {
+ // Log the exception right away, just in case any code in the exception handler fails and we don't get a chance to log it later.
+ logger.error(e);
+ // If the exception was caused by a Timeout, then we want to retry.
+ boolean timeoutFound = false;
+ int i = 0;
+ while (!timeoutFound && i < e.getStackTrace().length)
+ {
+ // don't be too specific - there are other classes for timeouts. ANY type timeout should trigger a wait-retry.
+ timeoutFound = e.getStackTrace()[i].getClassName().toUpperCase().contains("TIMEOUT");
+ i++;
+ }
+ // If a timeout was found, sleep for a bit and then retry.
+ if (timeoutFound)
+ {
+ long sleepAmt = Duration.ofSeconds(currentAttemptNum * 2L).toMillis();
+ logger.warn("A timeout exception was caught while trying to connect to the UniProt service after {} attempts. A retry will be performed after {} milliseconds", currentAttemptNum, sleepAmt);
+ Thread.sleep(sleepAmt);
+ }
+ else
+ {
+ logger.error("ServiceException caught while trying to communicate with UniProt: " + e.getMessage(), e);
+ }
}
}
}