From 8604c4bfd714e2947229bf728780e94cd6d85d41 Mon Sep 17 00:00:00 2001 From: Matthew Manning Date: Thu, 19 Jun 2025 14:31:27 -0400 Subject: [PATCH 1/3] Replaces deprecated pkg_resources dependency. --- ot_tractability_pipeline_v2/bin/run_pipeline.py | 5 +---- ot_tractability_pipeline_v2/buckets_ab.py | 7 ++----- ot_tractability_pipeline_v2/buckets_othercl.py | 4 ++-- ot_tractability_pipeline_v2/buckets_protac.py | 3 +-- ot_tractability_pipeline_v2/buckets_sm.py | 5 +---- ot_tractability_pipeline_v2/settings.py | 3 +++ 6 files changed, 10 insertions(+), 17 deletions(-) create mode 100644 ot_tractability_pipeline_v2/settings.py diff --git a/ot_tractability_pipeline_v2/bin/run_pipeline.py b/ot_tractability_pipeline_v2/bin/run_pipeline.py index 606c111..36d6528 100644 --- a/ot_tractability_pipeline_v2/bin/run_pipeline.py +++ b/ot_tractability_pipeline_v2/bin/run_pipeline.py @@ -10,7 +10,6 @@ import time import re -#import zipfile import zlib import sys import argparse @@ -19,7 +18,6 @@ import numpy as np import pandas as pd import mygene -import pkg_resources from sqlalchemy import create_engine import json import requests @@ -51,8 +49,7 @@ from ot_tractability_pipeline_v2.buckets_protac import * from ot_tractability_pipeline_v2.buckets_othercl import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from settings import DATA_PATH POLLING_INTERVAL = 3 API_URL = "https://rest.uniprot.org" diff --git a/ot_tractability_pipeline_v2/buckets_ab.py b/ot_tractability_pipeline_v2/buckets_ab.py index 1e1be2b..1cbb967 100755 --- a/ot_tractability_pipeline_v2/buckets_ab.py +++ b/ot_tractability_pipeline_v2/buckets_ab.py @@ -16,11 +16,9 @@ import sys import zipfile import os - # import mygene import numpy as np import pandas as pd -import pkg_resources from sqlalchemy import text PY3 = sys.version > '3' @@ -30,9 +28,8 @@ import urllib2 -from ot_tractability_pipeline_v2.queries_ab import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from ot_tractability_pipeline_v2 import queries_ab +from settings import DATA_PATH class Antibody_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/buckets_othercl.py b/ot_tractability_pipeline_v2/buckets_othercl.py index c1c4604..1ef36f3 100755 --- a/ot_tractability_pipeline_v2/buckets_othercl.py +++ b/ot_tractability_pipeline_v2/buckets_othercl.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources from sqlalchemy import create_engine, text PY3 = sys.version > '3' @@ -32,7 +31,8 @@ from ot_tractability_pipeline_v2.queries_othercl import * -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +# Not used in this file. +from settings import DATA_PATH class Othercl_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/buckets_protac.py b/ot_tractability_pipeline_v2/buckets_protac.py index 5808730..c1ca7da 100755 --- a/ot_tractability_pipeline_v2/buckets_protac.py +++ b/ot_tractability_pipeline_v2/buckets_protac.py @@ -36,8 +36,7 @@ from ot_tractability_pipeline_v2.queries_protac import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from settings import DATA_PATH class Protac_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/buckets_sm.py b/ot_tractability_pipeline_v2/buckets_sm.py index b22b292..33bd1c3 100755 --- a/ot_tractability_pipeline_v2/buckets_sm.py +++ b/ot_tractability_pipeline_v2/buckets_sm.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources import itertools from sqlalchemy import text @@ -32,9 +31,7 @@ from ot_tractability_pipeline_v2.queries_sm import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') - +from settings import DATA_PATH class Small_molecule_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/settings.py b/ot_tractability_pipeline_v2/settings.py new file mode 100644 index 0000000..e0dcb41 --- /dev/null +++ b/ot_tractability_pipeline_v2/settings.py @@ -0,0 +1,3 @@ +import os + +DATA_PATH = os.path.join(os.path.dirname(os.path.abspath("ot_tractability_pipeline_v2")), "data/") From ac995b58daf34af3f17211f22cb6deb9e719acfd Mon Sep 17 00:00:00 2001 From: Matthew Manning Date: Thu, 19 Jun 2025 15:05:09 -0400 Subject: [PATCH 2/3] Replaces deprecated pkg_resources to find module directory. --- ot_tractability_pipeline_v2/bin/run_pipeline.py | 4 +--- ot_tractability_pipeline_v2/buckets_ab.py | 4 +--- ot_tractability_pipeline_v2/buckets_othercl.py | 1 - ot_tractability_pipeline_v2/buckets_protac.py | 4 +--- ot_tractability_pipeline_v2/buckets_sm.py | 5 +---- ot_tractability_pipeline_v2/settings.py | 5 +++++ 6 files changed, 9 insertions(+), 14 deletions(-) create mode 100644 ot_tractability_pipeline_v2/settings.py diff --git a/ot_tractability_pipeline_v2/bin/run_pipeline.py b/ot_tractability_pipeline_v2/bin/run_pipeline.py index 606c111..b8a917f 100644 --- a/ot_tractability_pipeline_v2/bin/run_pipeline.py +++ b/ot_tractability_pipeline_v2/bin/run_pipeline.py @@ -19,7 +19,6 @@ import numpy as np import pandas as pd import mygene -import pkg_resources from sqlalchemy import create_engine import json import requests @@ -51,8 +50,7 @@ from ot_tractability_pipeline_v2.buckets_protac import * from ot_tractability_pipeline_v2.buckets_othercl import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from settings import DATA_PATH POLLING_INTERVAL = 3 API_URL = "https://rest.uniprot.org" diff --git a/ot_tractability_pipeline_v2/buckets_ab.py b/ot_tractability_pipeline_v2/buckets_ab.py index 1e1be2b..91ea543 100755 --- a/ot_tractability_pipeline_v2/buckets_ab.py +++ b/ot_tractability_pipeline_v2/buckets_ab.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources from sqlalchemy import text PY3 = sys.version > '3' @@ -31,8 +30,7 @@ from ot_tractability_pipeline_v2.queries_ab import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from settings import DATA_PATH class Antibody_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/buckets_othercl.py b/ot_tractability_pipeline_v2/buckets_othercl.py index c1c4604..cb99e24 100755 --- a/ot_tractability_pipeline_v2/buckets_othercl.py +++ b/ot_tractability_pipeline_v2/buckets_othercl.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources from sqlalchemy import create_engine, text PY3 = sys.version > '3' diff --git a/ot_tractability_pipeline_v2/buckets_protac.py b/ot_tractability_pipeline_v2/buckets_protac.py index 5808730..1d8f26d 100755 --- a/ot_tractability_pipeline_v2/buckets_protac.py +++ b/ot_tractability_pipeline_v2/buckets_protac.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources import ast # from sqlalchemy import create_engine @@ -36,8 +35,7 @@ from ot_tractability_pipeline_v2.queries_protac import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') +from settings import DATA_PATH class Protac_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/buckets_sm.py b/ot_tractability_pipeline_v2/buckets_sm.py index b22b292..33bd1c3 100755 --- a/ot_tractability_pipeline_v2/buckets_sm.py +++ b/ot_tractability_pipeline_v2/buckets_sm.py @@ -20,7 +20,6 @@ # import mygene import numpy as np import pandas as pd -import pkg_resources import itertools from sqlalchemy import text @@ -32,9 +31,7 @@ from ot_tractability_pipeline_v2.queries_sm import * - -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') - +from settings import DATA_PATH class Small_molecule_buckets(object): ''' diff --git a/ot_tractability_pipeline_v2/settings.py b/ot_tractability_pipeline_v2/settings.py new file mode 100644 index 0000000..044a62c --- /dev/null +++ b/ot_tractability_pipeline_v2/settings.py @@ -0,0 +1,5 @@ +import os +import inspect + +DATA_PATH = os.path.join(os.path.dirname(inspect.getfile(inspect.currentframe())), "data{}".format(os.sep)) +# print(DATA_PATH) \ No newline at end of file From 11ecabb52bea6c358bc956368500dfdc683c0140 Mon Sep 17 00:00:00 2001 From: Matthew Manning Date: Fri, 20 Jun 2025 19:42:57 -0400 Subject: [PATCH 3/3] Moves reused constants to settings.py. Imports modules instead of *. Avoid namespace clashes. --- .../bin/run_pipeline.py | 8 ++--- ot_tractability_pipeline_v2/buckets_ab.py | 35 +++++++++---------- ot_tractability_pipeline_v2/buckets_protac.py | 17 ++++----- ot_tractability_pipeline_v2/buckets_sm.py | 34 ++++++++++-------- ot_tractability_pipeline_v2/queries_sm.py | 11 ++---- ot_tractability_pipeline_v2/settings.py | 1 + 6 files changed, 50 insertions(+), 56 deletions(-) diff --git a/ot_tractability_pipeline_v2/bin/run_pipeline.py b/ot_tractability_pipeline_v2/bin/run_pipeline.py index 36d6528..10eed47 100644 --- a/ot_tractability_pipeline_v2/bin/run_pipeline.py +++ b/ot_tractability_pipeline_v2/bin/run_pipeline.py @@ -31,13 +31,11 @@ import psutil import gc -PY3 = sys.version > '3' +PY3 = float(sys.version) > 3.0 if PY3: - import urllib.request as urllib2 - import urllib.parse as urllib + pass else: - import urllib2 - import urllib + import urllib2 as urllib_request from ot_tractability_pipeline_v2.queries_sm import * from ot_tractability_pipeline_v2.queries_ab import * diff --git a/ot_tractability_pipeline_v2/buckets_ab.py b/ot_tractability_pipeline_v2/buckets_ab.py index 1e1be2b..7004309 100755 --- a/ot_tractability_pipeline_v2/buckets_ab.py +++ b/ot_tractability_pipeline_v2/buckets_ab.py @@ -16,23 +16,20 @@ import sys import zipfile import os - # import mygene import numpy as np import pandas as pd -import pkg_resources from sqlalchemy import text -PY3 = sys.version > '3' +PY3 = float(sys.version) >= 3.0 if PY3: - import urllib.request as urllib2 + import urllib.request as urllib_request else: - import urllib2 - + import urllib2 as urllib_request -from ot_tractability_pipeline_v2.queries_ab import * +from ot_tractability_pipeline_v2 import queries_ab, settings +CHEMBL_VERSION = settings.chembl_version_str() -DATA_PATH = pkg_resources.resource_filename('ot_tractability_pipeline_v2', 'data/') class Antibody_buckets(object): ''' @@ -167,14 +164,14 @@ def _assign_buckets_1_to_3(self): print("\t- Assessing clinical buckets 1-3...") - self.all_chembl_targets = pd.read_sql_query(text(chembl_clinical_ab_targets), self.engine) + self.all_chembl_targets = pd.read_sql_query(text(queries_ab.chembl_clinical_ab_targets), self.engine) self.all_chembl_targets.loc[self.all_chembl_targets['ref_type'] == 'Expert', ['ref_id', 'ref_url']] = 'NA' # self._process_protein_complexes() - ab_info = pd.read_sql_query(text(chembl_clinical_ab), self.engine) + ab_info = pd.read_sql_query(text(queries_ab.chembl_clinical_ab), self.engine) self.all_chembl_targets = self.all_chembl_targets.merge(ab_info, how='left', on='parent_molregno') if self.store_fetched: @@ -260,11 +257,11 @@ def set_strings2(x): ############################################################################################################## # def make_request(self, url, data): - # request = urllib2.Request(url) + # request = urllib_request.Request(url) # try: - # url_file = urllib2.urlopen(request) - # except urllib2.HTTPError as e: + # url_file = urllib_request.urlopen(request) + # except urllib_request.HTTPError as e: # if e.code == 404: # print("[NOTFOUND %d] %s" % (e.code, url)) # else: @@ -277,10 +274,10 @@ def set_strings2(x): # Method is used in several workflows @staticmethod def make_request(url, data): - request = urllib2.Request(url) + request = urllib_request.Request(url) try: - url_file = urllib2.urlopen(request, data) - except urllib2.HTTPError as e: + url_file = urllib_request.urlopen(request, data) + except urllib_request.HTTPError as e: if e.code == 404: print("[NOTFOUND %d] %s" % (e.code, url)) else: @@ -292,7 +289,7 @@ def make_request(url, data): def post_request_uniprot(url, data): #base = 'http://legacy.uniprot.org' #base = 'https://www.uniprot.org' - base = API_URL + base = settings.API_URL full_url = "%s/%s" % (base, url) if isinstance(data, (list, tuple)): @@ -593,8 +590,8 @@ def _assign_bucket_9(self): print("\t- Assessing Human Protein Atlas main location bucket 9...") # Download latest file - #zip_file = urllib2.urlopen('https://www.proteinatlas.org/download/subcellular_location.tsv.zip') - zip_file = urllib2.urlopen('https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip') + #zip_file = urllib_request.urlopen('https://www.proteinatlas.org/download/subcellular_location.tsv.zip') + zip_file = urllib_request.urlopen('https://www.proteinatlas.org/download/tsv/subcellular_location.tsv.zip') with zipfile.ZipFile(io.BytesIO(zip_file.read()), 'r') as pa_file: with pa_file.open('subcellular_location.tsv') as subcell_loc: df = pd.read_csv(subcell_loc, sep='\t', header=0) diff --git a/ot_tractability_pipeline_v2/buckets_protac.py b/ot_tractability_pipeline_v2/buckets_protac.py index 1d8f26d..cd2cfb0 100755 --- a/ot_tractability_pipeline_v2/buckets_protac.py +++ b/ot_tractability_pipeline_v2/buckets_protac.py @@ -27,15 +27,16 @@ import psutil import gc -PY3 = sys.version > '3' +PY3 = float(sys.version) >= 3.0 if PY3: - import urllib.request as urllib2 + import urllib.request as urllib_request else: - import urllib2 + import urllib2 as urllib_request +from ot_tractability_pipeline_v2 import queries_protac, settings + +CHEMBL_VERSION = settings.chembl_version_str() -from ot_tractability_pipeline_v2.queries_protac import * -from settings import DATA_PATH class Protac_buckets(object): ''' @@ -468,7 +469,7 @@ def _search_papers(): #url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28PROTAC%20AND%20%28degradation%20OR%20degrade%20OR%20ubiquitin%20OR%20proteolysis%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json" # urllib not working returning 404 page error -> replaced by requests - #url = urllib2.urlopen("https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28%28PROTAC%20OR%20SNIPER%29%20AND%20%28degradation%20OR%20degrade%20OR%20proteolysis%29%29%20OR%20%28degrader%20AND%20%28proteasome%20OR%20ubiquitin%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json") + #url = urllib_request.urlopen("https://www.ebi.ac.uk/europepmc/webservices/rest/search?resultType=core&query=ABSTRACT%3A%28%28ABSTRACT%3A%22proteolysis%20targeting%20chimera%22%20OR%20ABSTRACT%3A%22proteolysis%20targeting%20chimeric%22%29%20OR%20%28%28PROTAC%20OR%20SNIPER%29%20AND%20%28degradation%20OR%20degrade%20OR%20proteolysis%29%29%20OR%20%28degrader%20AND%20%28proteasome%20OR%20ubiquitin%29%29%29&resultType=lite&cursorMark=*&pageSize=1000&format=json") #data = url.read() #try: data = json.loads(data.decode()) #except UnicodeDecodeError: data = json.loads(data) @@ -507,7 +508,7 @@ def _get_tagged_targets(self): tags_list = [] for chunk in chunks: url_s = 'https://www.ebi.ac.uk/europepmc/annotations_api/annotationsByArticleIds?{}&type=Gene_Proteins&format=JSON'.format(chunk) - url = urllib2.urlopen(url_s) + url = urllib_request.urlopen(url_s) data = url.read() try: data = json.loads(data.decode()) except UnicodeDecodeError: data = json.loads(data) @@ -1048,7 +1049,7 @@ def _search_chembl_smallmol_active(self): self.engine = create_engine(database_url) # small_mol_info = pd.read_sql_query(text(chembl_small_mol), self.engine) - self.all_chembl_targets = pd.read_sql_query(text(chembl_small_mol_active_targets), self.engine) + self.all_chembl_targets = pd.read_sql_query(text(queries_protac.chembl_small_mol_active_targets), self.engine) # self.all_chembl_targets = self.all_chembl_targets.merge(small_mol_info, on='parent_molregno') if self.store_fetched: diff --git a/ot_tractability_pipeline_v2/buckets_sm.py b/ot_tractability_pipeline_v2/buckets_sm.py index 33bd1c3..043e025 100755 --- a/ot_tractability_pipeline_v2/buckets_sm.py +++ b/ot_tractability_pipeline_v2/buckets_sm.py @@ -23,15 +23,17 @@ import itertools from sqlalchemy import text -PY3 = sys.version > '3' +PY3 = float(sys.version) >= 3.0 if PY3: - import urllib.request as urllib2 + import urllib.request as urllib_request else: - import urllib2 + import urllib2 as urllib_request -from ot_tractability_pipeline_v2.queries_sm import * -from settings import DATA_PATH +from ot_tractability_pipeline_v2 import queries_sm, settings + +CHEMBL_VERSION = settings.chembl_version_str() + class Small_molecule_buckets(object): ''' @@ -125,10 +127,10 @@ def _search_chembl_clinical(self): ''' # print("\t- Querying ChEMBL...") - self.all_chembl_targets = pd.read_sql_query(text(chembl_clinical_targets), self.engine) + self.all_chembl_targets = pd.read_sql_query(text(queries_sm.chembl_clinical_targets), self.engine) if self.store_fetched: self.all_chembl_targets.to_csv("{}/sm_all_chembl_clinical_targets.csv".format(self.store_fetched)) - small_mol_info = pd.read_sql_query(text(chembl_clinical_small_mol), self.engine) + small_mol_info = pd.read_sql_query(text(queries_sm.chembl_clinical_small_mol), self.engine) if self.store_fetched: small_mol_info.to_csv("{}/sm_all_chembl_clinical_small_mol.csv".format(self.store_fetched)) self.all_chembl_targets = self.all_chembl_targets.merge(small_mol_info, how='left', on='parent_molregno') @@ -291,11 +293,11 @@ def set_strings2(x): ############################################################################################################## # def make_request(self, url, data): - # request = urllib2.Request(url) + # request = urllib_request.Request(url) # try: - # url_file = urllib2.urlopen(request, data) - # except urllib2.HTTPError as e: + # url_file = urllib_request.urlopen(request, data) + # except urllib_request.HTTPError as e: # if e.code == 404: # print("[NOTFOUND %d] %s" % (e.code, url)) # else: @@ -308,11 +310,11 @@ def set_strings2(x): # Method is used in several workflows @staticmethod def make_request(url, data): - request = urllib2.Request(url) + request = urllib_request.Request(url) try: - url_file = urllib2.urlopen(request, data) - except urllib2.HTTPError as e: + url_file = urllib_request.urlopen(request, data) + except urllib_request.HTTPError as e: if e.code == 404: print("[NOTFOUND %d] %s" % (e.code, url)) else: @@ -339,8 +341,10 @@ def _pdb_list(self, s): if not isinstance(pdb, list): pdb = [pdb] # Python 2/3 compatability - try: pdb = [p.lower() for p in pdb if isinstance(p,(str,unicode))] #Python 2 - except: pdb = [p.lower() for p in pdb if isinstance(p,str)] #Python 3 + if not PY3: + pdb = [p.lower() for p in pdb if isinstance(p,(str,unicode))] #Python 2 + else: + pdb = [p.lower() for p in pdb if isinstance(p,str)] #Python 3 self.pdb_list += pdb for p in pdb: diff --git a/ot_tractability_pipeline_v2/queries_sm.py b/ot_tractability_pipeline_v2/queries_sm.py index 410ac17..bbfd3f3 100644 --- a/ot_tractability_pipeline_v2/queries_sm.py +++ b/ot_tractability_pipeline_v2/queries_sm.py @@ -3,15 +3,8 @@ import os -try: - chembl_number = int(os.getenv('CHEMBL_VERSION')) - CHEMBL_VERSION = "chembl_{}".format(chembl_number) - print("\t ChEMBL version in use is "+CHEMBL_VERSION) -except TypeError: - print(''' - Please set the CHEMBL version ''' - ) - raise +CHEMBL_VERSION = chembl_version_str() +global CHEMBL_VERSION chembl_clinical_small_mol = """ diff --git a/ot_tractability_pipeline_v2/settings.py b/ot_tractability_pipeline_v2/settings.py index 5efe80a..044a62c 100644 --- a/ot_tractability_pipeline_v2/settings.py +++ b/ot_tractability_pipeline_v2/settings.py @@ -1,4 +1,5 @@ import os +import inspect DATA_PATH = os.path.join(os.path.dirname(inspect.getfile(inspect.currentframe())), "data{}".format(os.sep)) # print(DATA_PATH) \ No newline at end of file