Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,23 @@
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<!-- Be aware that the UniProt japi dependency
will include google's guice 4.0
which does not play nicely with Java 9+.
I have not found a way to resolve this yet
other than running this code in a Java 8 docker
container. -->
<dependency>
<groupId>uk.ac.ebi.uniprot</groupId>
<artifactId>japi</artifactId>
<version>1.0.31</version>
<version>1.0.38</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest</artifactId>
<version>2.1</version>
<scope>test</scope>
</dependency>

</dependencies>

<repositories>
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/reactome/release/orthopairs/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static void main( String[] args ) throws IOException, ParseException, Ser
String pathToSpeciesConfig = props.getProperty("pathToSpeciesConfig", "src/main/resources/Species.json");
String pantherQfOFilename = props.getProperty("pantherQfOFilename", "QfO_Genome_Orthologs.tar.gz");
String pantherHCOPFilename = props.getProperty("pantherHCOPFilename", "Orthologs_HCOP.tar.gz");

// String uniprotQueryBatchSize = props.getProperty("uniprotQueryBatchSize", "100");
if (releaseNumber.isEmpty()) {
logger.fatal("Please populate config.properties file with releaseNumber");
throw new IllegalStateException("No releaseNumber attribute in config.properties");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,22 @@
import uk.ac.ebi.uniprot.dataservice.client.uniprot.UniProtService;
import uk.ac.ebi.uniprot.dataservice.query.Query;

import java.net.SocketTimeoutException;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.time.Duration;
import java.util.*;

public class UniProtGeneNamesRetriever {

private static final int MAX_NUM_ATTEMPTS = 10;
private static final Logger logger = LogManager.getLogger();
private static final int MAX_UNIPROT_BATCH_QUERY_SIZE = 250;
// TODO: Make this value configurable.
private static final int MAX_UNIPROT_BATCH_QUERY_SIZE = 100;

/**
* Queries the UniProt mapping service through their Java API library. All Uniprot accession IDs are taken from the Panther
Expand Down Expand Up @@ -129,25 +134,58 @@ public static Set<String> retrieveGeneNamesFromUniProt(List<Set<String>> partiti
int count = 0;
Set<String> uniprotAccessionsToGeneNames = new HashSet<>();
for (Set<String> uniprotIdentifierPartition : partitionedUniProtIds) {
int currentAttemptNum = 0;
// Build UniProt API query from Set of 250 UniProt identifiers.
Query query = UniProtQueryBuilder.accessions(uniprotIdentifierPartition);
// Perform UniProt API query to retrieve gene names associated with identifiers.
QueryResult<UniProtComponent<Gene>> uniprotEntries = uniprotService.getGenes(query);

while (uniprotEntries.hasNext()) {
count++;
// Get Gene object returned from UniProt.
UniProtComponent<Gene> geneObject = uniprotEntries.next();
if (!geneObject.getComponent().isEmpty()) {
// Iterate through all Gene components in the response.
for (Gene geneComponent : geneObject.getComponent()) {
// Tab-separate UniProt accession ID and its associated gene name, and then store these in the Set that will be returned.
uniprotAccessionsToGeneNames.add(geneObject.getAccession().toString() + "\t" + geneComponent.getGeneName().toString() + "\n");
QueryResult<UniProtComponent<Gene>> uniprotEntries = null;
while (currentAttemptNum < MAX_NUM_ATTEMPTS && uniprotEntries == null)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps after this loop if uniprotEntries is still null (because the maximum number of attempts is gone past), a logger error to indicate that the maximum number of attempts were surpassed and no entries were retrieved (in addition to the log for an error during retrieval).

{
try
{
currentAttemptNum++;
uniprotEntries = uniprotService.getGenes(query);
while (uniprotEntries.hasNext()) {
count++;
// Get Gene object returned from UniProt.
UniProtComponent<Gene> geneObject = uniprotEntries.next();
if (!geneObject.getComponent().isEmpty()) {
// Iterate through all Gene components in the response.
for (Gene geneComponent : geneObject.getComponent()) {
// Tab-separate UniProt accession ID and its associated gene name, and then store these in the Set that will be returned.
uniprotAccessionsToGeneNames.add(geneObject.getAccession().toString() + "\t" + geneComponent.getGeneName().toString() + "\n");
}
}

if (count % 1000 == 0) {
logger.info(count + " UniProt identifiers have been queried for gene names");
}
}
}

if (count % 1000 == 0) {
logger.info(count + " UniProt identifiers have been queried for gene names");
catch (ServiceException e)
{
// Log the exception right away, just in case any code in the exception handler fails and we don't get a chance to log it later.
logger.error(e);
// If the exception was caused by a Timeout, then we want to retry.
boolean timeoutFound = false;
int i = 0;
while (!timeoutFound && i < e.getStackTrace().length)
{
// don't be too specific - there are other classes for timeouts. ANY type timeout should trigger a wait-retry.
timeoutFound = e.getStackTrace()[i].getClassName().toUpperCase().contains("TIMEOUT");
i++;
}
// If a timeout was found, sleep for a bit and then retry.
if (timeoutFound)
{
long sleepAmt = Duration.ofSeconds(currentAttemptNum * 2L).toMillis();
logger.warn("A timeout exception was caught while trying to connect to the UniProt service after {} attempts. A retry will be performed after {} milliseconds", currentAttemptNum, sleepAmt);
Thread.sleep(sleepAmt);
}
else
{
logger.error("ServiceException caught while trying to communicate with UniProt: " + e.getMessage(), e);
}
}
}
}
Expand Down