Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 4 additions & 9 deletions ot_tractability_pipeline_v2/bin/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import time
import re
#import zipfile
import zlib
import sys
import argparse
Expand All @@ -19,7 +18,6 @@
import numpy as np
import pandas as pd
import mygene
import pkg_resources
from sqlalchemy import create_engine
import json
import requests
Expand All @@ -33,13 +31,11 @@
import psutil
import gc

PY3 = sys.version > '3'
PY3 = float(sys.version) > 3.0
if PY3:
import urllib.request as urllib2
import urllib.parse as urllib
pass
else:
import urllib2
import urllib
import urllib2 as urllib_request

from ot_tractability_pipeline_v2.queries_sm import *
from ot_tractability_pipeline_v2.queries_ab import *
Expand All @@ -51,8 +47,7 @@
from ot_tractability_pipeline_v2.buckets_protac import *
from ot_tractability_pipeline_v2.buckets_othercl import *


DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/')
from settings import DATA_PATH

POLLING_INTERVAL = 3
API_URL = "https://rest.uniprot.org"
Expand Down
35 changes: 16 additions & 19 deletions ot_tractability_pipeline_v2/buckets_ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,20 @@
import sys
import zipfile
import os

# import mygene
import numpy as np
import pandas as pd
import pkg_resources
from sqlalchemy import text

PY3 = sys.version > '3'
PY3 = float(sys.version) >= 3.0
if PY3:
import urllib.request as urllib2
import urllib.request as urllib_request
else:
import urllib2

import urllib2 as urllib_request

from ot_tractability_pipeline_v2.queries_ab import *
from ot_tractability_pipeline_v2 import queries_ab, settings
CHEMBL_VERSION = settings.chembl_version_str()

DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/')

class Antibody_buckets(object):
'''
Expand Down Expand Up @@ -167,14 +164,14 @@ def _assign_buckets_1_to_3(self):

print("\t- Assessing clinical buckets 1-3...")

self.all_chembl_targets = pd.read_sql_query(text(chembl_clinical_ab_targets), self.engine)
self.all_chembl_targets = pd.read_sql_query(text(queries_ab.chembl_clinical_ab_targets), self.engine)
self.all_chembl_targets.loc[self.all_chembl_targets['ref_type'] == 'Expert', ['ref_id', 'ref_url']] = 'NA'

#

self._process_protein_complexes()

ab_info = pd.read_sql_query(text(chembl_clinical_ab), self.engine)
ab_info = pd.read_sql_query(text(queries_ab.chembl_clinical_ab), self.engine)
self.all_chembl_targets = self.all_chembl_targets.merge(ab_info, how='left', on='parent_molregno')

if self.store_fetched:
Expand Down Expand Up @@ -260,11 +257,11 @@ def set_strings2(x):
##############################################################################################################

# def make_request(self, url, data):
# request = urllib2.Request(url)
# request = urllib_request.Request(url)

# try:
# url_file = urllib2.urlopen(request)
# except urllib2.HTTPError as e:
# url_file = urllib_request.urlopen(request)
# except urllib_request.HTTPError as e:
# if e.code == 404:
# print("[NOTFOUND %d] %s" % (e.code, url))
# else:
Expand All @@ -277,10 +274,10 @@ def set_strings2(x):
# Method is used in several workflows
@staticmethod
def make_request(url, data):
request = urllib2.Request(url)
request = urllib_request.Request(url)
try:
url_file = urllib2.urlopen(request, data)
except urllib2.HTTPError as e:
url_file = urllib_request.urlopen(request, data)
except urllib_request.HTTPError as e:
if e.code == 404:
print("[NOTFOUND %d] %s" % (e.code, url))
else:
Expand All @@ -292,7 +289,7 @@ def make_request(url, data):
def post_request_uniprot(url, data):
#base = 'http://legacy.uniprot.org'
#base = 'https://www.uniprot.org'
base = API_URL
base = settings.API_URL
full_url = "%s/%s" % (base, url)

if isinstance(data, (list, tuple)):
Expand Down Expand Up @@ -593,8 +590,8 @@ def _assign_bucket_9(self):
print("\t- Assessing Human Protein Atlas main location bucket 9...")

# Download latest file
#zip_file = urllib2.urlopen('https://www.proteinatlas.org/download/subcellular_location.tsv.zip')
zip_file = urllib2.urlopen('https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip')
#zip_file = urllib_request.urlopen('https://www.proteinatlas.org/download/subcellular_location.tsv.zip')
zip_file = urllib_request.urlopen('https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip')
with zipfile.ZipFile(io.BytesIO(zip_file.read()), 'r') as pa_file:
with pa_file.open('subcellular_location.tsv') as subcell_loc:
df = pd.read_csv(subcell_loc, sep='\t', header=0)
Expand Down
4 changes: 2 additions & 2 deletions ot_tractability_pipeline_v2/buckets_othercl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
# import mygene
import numpy as np
import pandas as pd
import pkg_resources
from sqlalchemy import create_engine, text

PY3 = sys.version > '3'
Expand All @@ -32,7 +31,8 @@

from ot_tractability_pipeline_v2.queries_othercl import *

DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/')
# Not used in this file.
from settings import DATA_PATH

class Othercl_buckets(object):
'''
Expand Down
17 changes: 8 additions & 9 deletions ot_tractability_pipeline_v2/buckets_protac.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,23 @@
# import mygene
import numpy as np
import pandas as pd
import pkg_resources
import ast
# from sqlalchemy import create_engine

import requests
import psutil
import gc

PY3 = sys.version > '3'
PY3 = float(sys.version) >= 3.0
if PY3:
import urllib.request as urllib2
import urllib.request as urllib_request
else:
import urllib2
import urllib2 as urllib_request

from ot_tractability_pipeline_v2 import queries_protac, settings

from ot_tractability_pipeline_v2.queries_protac import *
CHEMBL_VERSION = settings.chembl_version_str()

DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/')

class Protac_buckets(object):
'''
Expand Down Expand Up @@ -470,7 +469,7 @@ def _search_papers():
#url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28PROTAC%20AND%20%28degradation%20OR%20degrade%20OR%20ubiquitin%20OR%20proteolysis%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json"

# urllib not working returning 404 page error -> replaced by requests
#url = urllib2.urlopen("https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28%28PROTAC%20OR%20SNIPER%29%20AND%20%28degradation%20OR%20degrade%20OR%20proteolysis%29%29%20OR%20%28degrader%20AND%20%28proteasome%20OR%20ubiquitin%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json")
#url = urllib_request.urlopen("https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28%28PROTAC%20OR%20SNIPER%29%20AND%20%28degradation%20OR%20degrade%20OR%20proteolysis%29%29%20OR%20%28degrader%20AND%20%28proteasome%20OR%20ubiquitin%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json")
#data = url.read()
#try: data = json.loads(data.decode())
#except UnicodeDecodeError: data = json.loads(data)
Expand Down Expand Up @@ -509,7 +508,7 @@ def _get_tagged_targets(self):
tags_list = []
for chunk in chunks:
url_s = 'https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds?{}&type=Gene_Proteins&format=JSON'.format(chunk)
url = urllib2.urlopen(url_s)
url = urllib_request.urlopen(url_s)
data = url.read()
try: data = json.loads(data.decode())
except UnicodeDecodeError: data = json.loads(data)
Expand Down Expand Up @@ -1050,7 +1049,7 @@ def _search_chembl_smallmol_active(self):
self.engine = create_engine(database_url)

# small_mol_info = pd.read_sql_query(text(chembl_small_mol), self.engine)
self.all_chembl_targets = pd.read_sql_query(text(chembl_small_mol_active_targets), self.engine)
self.all_chembl_targets = pd.read_sql_query(text(queries_protac.chembl_small_mol_active_targets), self.engine)
# self.all_chembl_targets = self.all_chembl_targets.merge(small_mol_info, on='parent_molregno')

if self.store_fetched:
Expand Down
33 changes: 17 additions & 16 deletions ot_tractability_pipeline_v2/buckets_sm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,19 @@
# import mygene
import numpy as np
import pandas as pd
import pkg_resources
import itertools
from sqlalchemy import text

PY3 = sys.version > '3'
PY3 = float(sys.version) >= 3.0
if PY3:
import urllib.request as urllib2
import urllib.request as urllib_request
else:
import urllib2
import urllib2 as urllib_request


from ot_tractability_pipeline_v2.queries_sm import *
from ot_tractability_pipeline_v2 import queries_sm, settings

DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/')
CHEMBL_VERSION = settings.chembl_version_str()


class Small_molecule_buckets(object):
Expand Down Expand Up @@ -128,10 +127,10 @@ def _search_chembl_clinical(self):
'''

# print("\t- Querying ChEMBL...")
self.all_chembl_targets = pd.read_sql_query(text(chembl_clinical_targets), self.engine)
self.all_chembl_targets = pd.read_sql_query(text(queries_sm.chembl_clinical_targets), self.engine)
if self.store_fetched:
self.all_chembl_targets.to_csv("{}/sm_all_chembl_clinical_targets.csv".format(self.store_fetched))
small_mol_info = pd.read_sql_query(text(chembl_clinical_small_mol), self.engine)
small_mol_info = pd.read_sql_query(text(queries_sm.chembl_clinical_small_mol), self.engine)
if self.store_fetched:
small_mol_info.to_csv("{}/sm_all_chembl_clinical_small_mol.csv".format(self.store_fetched))
self.all_chembl_targets = self.all_chembl_targets.merge(small_mol_info, how='left', on='parent_molregno')
Expand Down Expand Up @@ -294,11 +293,11 @@ def set_strings2(x):
##############################################################################################################

# def make_request(self, url, data):
# request = urllib2.Request(url)
# request = urllib_request.Request(url)

# try:
# url_file = urllib2.urlopen(request, data)
# except urllib2.HTTPError as e:
# url_file = urllib_request.urlopen(request, data)
# except urllib_request.HTTPError as e:
# if e.code == 404:
# print("[NOTFOUND %d] %s" % (e.code, url))
# else:
Expand All @@ -311,11 +310,11 @@ def set_strings2(x):
# Method is used in several workflows
@staticmethod
def make_request(url, data):
request = urllib2.Request(url)
request = urllib_request.Request(url)

try:
url_file = urllib2.urlopen(request, data)
except urllib2.HTTPError as e:
url_file = urllib_request.urlopen(request, data)
except urllib_request.HTTPError as e:
if e.code == 404:
print("[NOTFOUND %d] %s" % (e.code, url))
else:
Expand All @@ -342,8 +341,10 @@ def _pdb_list(self, s):
if not isinstance(pdb, list): pdb = [pdb]

# Python 2/3 compatability
try: pdb = [p.lower() for p in pdb if isinstance(p,(str,unicode))] #Python 2
except: pdb = [p.lower() for p in pdb if isinstance(p,str)] #Python 3
if not PY3:
pdb = [p.lower() for p in pdb if isinstance(p,(str,unicode))] #Python 2
else:
pdb = [p.lower() for p in pdb if isinstance(p,str)] #Python 3

self.pdb_list += pdb
for p in pdb:
Expand Down
11 changes: 2 additions & 9 deletions ot_tractability_pipeline_v2/queries_sm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,8 @@

import os

try:
chembl_number = int(os.getenv('CHEMBL_VERSION'))
CHEMBL_VERSION = "chembl_{}".format(chembl_number)
print("\t ChEMBL version in use is "+CHEMBL_VERSION)
except TypeError:
print('''
Please set the CHEMBL version '''
)
raise
CHEMBL_VERSION = chembl_version_str()
global CHEMBL_VERSION


chembl_clinical_small_mol = """
Expand Down
5 changes: 5 additions & 0 deletions ot_tractability_pipeline_v2/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os
import inspect

DATA_PATH = os.path.join(os.path.dirname(inspect.getfile(inspect.currentframe())), "data{}".format(os.sep))
# print(DATA_PATH)