Skip to content

Commit 5bfc5a6

Browse files
author
=
committed
update post processing to match better database structure
fix dangling edge issues build on dev
1 parent 1ada3bd commit 5bfc5a6

File tree

6 files changed

+36
-23
lines changed

6 files changed

+36
-23
lines changed

src/input_adapters/go/go_gaf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def get_all(self) -> Generator[List[ProteinGoTermRelationship], None, None]:
6565
))
6666
if count >= self.batch_size:
6767
yield pro_go_edges
68+
count = 0
6869
pro_go_edges: List[ProteinGoTermRelationship] = []
6970

7071
yield pro_go_edges

src/input_adapters/pharos_arango/set_ligand_activity_flag.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,12 @@ def get_all(self) -> Generator[List[ProteinLigandRelationship], None, None]:
2828

2929
passing_activities_query = """FOR pro IN `biolink:Protein`
3030
FOR chem, rel IN OUTBOUND pro `biolink:interacts_with`
31-
FILTER (
32-
(pro.idg_family IN ["GPCR", "Nuclear Receptor"] AND LENGTH(rel.act_value[* FILTER CURRENT >= 7]) > 0) OR
33-
(pro.idg_family == "Kinase" AND LENGTH(rel.act_value[* FILTER CURRENT >= 7.52288]) > 0) OR
34-
(pro.idg_family == "Ion Channel" AND LENGTH(rel.act_value[* FILTER CURRENT >= 5]) > 0) OR
35-
(
36-
(pro.idg_family == null OR pro.idg_family NOT IN ["Ion Channel", "Kinase", "GPCR", "Nuclear Receptor"])
37-
AND LENGTH(rel.act_value[* FILTER CURRENT >= 6]) > 0
38-
)
39-
)
31+
LET act_values = rel.details[* FILTER CURRENT.act_value >= (
32+
pro.idg_family == "Kinase" ? 7.52288 :
33+
pro.idg_family == "Ion Channel" ? 5 :
34+
pro.idg_family IN ["GPCR", "Nuclear Receptor"] ? 7 : 6
35+
)]
36+
FILTER LENGTH(act_values) > 0
4037
RETURN DISTINCT {
4138
protein_id: pro.id,
4239
chemical_entity_id: chem.id

src/input_adapters/pharos_arango/tdl_input_adapter.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,21 @@ def make_set(list_query_result: list):
8383
"""
8484

8585
proteins_with_moa_drugs = """
86-
for pro in `biolink:Protein`
87-
for lig, act in outbound pro `biolink:interacts_with`
86+
FOR pro IN `biolink:Protein`
87+
FOR lig, act IN OUTBOUND pro `biolink:interacts_with`
8888
FILTER lig.isDrug == TRUE
89-
FILTER LENGTH(act.has_moa[* FILTER CURRENT == TRUE]) > 0
90-
RETURN distinct pro.id
89+
LET has_moa_flags = act.details[* FILTER CURRENT.has_moa == TRUE RETURN CURRENT.has_moa]
90+
FILTER LENGTH(has_moa_flags) > 0
91+
RETURN DISTINCT pro.id
9192
"""
9293

9394
proteins_with_experimental_f_or_p_go_terms = """
9495
FOR p IN `biolink:Protein`
9596
FOR g, r IN OUTBOUND p `ProteinGoTermRelationship`
9697
FILTER g.is_leaf == true
9798
AND g.type != 'C'
98-
AND 'Experimental evidence code' IN r.category
99+
LET evidence_categories = r.evidence[* RETURN CURRENT.category]
100+
FILTER 'Experimental evidence code' IN evidence_categories
99101
RETURN DISTINCT p.id
100102
"""
101103

src/output_adapters/arango_output_adapter.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,18 @@ def generate_edge_key(from_node, to_node, edge_type):
4848
edges.append(edge)
4949

5050
edge_collection.insert_many(edges, overwrite=True)
51-
else:
52-
if not db.has_collection(label):
53-
collection = db.create_collection(label)
54-
else:
55-
collection = db.collection(label)
5651

57-
keys = [self.safe_key(obj['id']) for obj in obj_list]
58-
existing_nodes = collection.get_many(keys)
52+
cursor = db.aql.execute(f"""
53+
FOR e IN `{edge_collection.name}`
54+
FILTER !DOCUMENT(e._from) || !DOCUMENT(e._to)
55+
REMOVE e IN `{edge_collection.name}`
56+
""")
57+
result = cursor.statistics()
58+
deleted_count = result.get('modified', 0)
59+
if deleted_count > 0:
60+
print(f"Deleted {deleted_count} dangling edges.")
61+
else:
62+
collection, existing_nodes = self.get_existing_nodes(db, label, obj_list)
5963
existing_record_map = {
6064
record['id']: record for record in existing_nodes
6165
}
@@ -69,6 +73,15 @@ def generate_edge_key(from_node, to_node, edge_type):
6973

7074
return True
7175

76+
def get_existing_nodes(self, db, label, obj_list):
77+
if not db.has_collection(label):
78+
collection = db.create_collection(label)
79+
else:
80+
collection = db.collection(label)
81+
keys = [self.safe_key(obj['id']) for obj in obj_list]
82+
existing_nodes = collection.get_many(keys)
83+
return collection, existing_nodes
84+
7285
def create_or_truncate_datastore(self) -> bool:
7386
sys_db = self.client.db('_system', username=self.credentials.user,
7487
password=self.credentials.password)

src/use_cases/target_graph.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
source_credentials: &source_credentials ./src/use_cases/secrets/ifxdev_pharos_prod.yaml
1+
source_credentials: &source_credentials ./src/use_cases/secrets/ifxdev_pharos_dev.yaml
22

33
pchembl_cutoff: &pchembl_cutoff 5
44

src/use_cases/target_graph_aql_post.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
source_credentials: &source_credentials ./src/use_cases/secrets/ifxdev_pharos_prod.yaml
1+
source_credentials: &source_credentials ./src/use_cases/secrets/ifxdev_pharos_dev.yaml
22
database_name: &database_name pharos
33

44
labeler:

0 commit comments

Comments
 (0)