11from typing import Dict , List , Optional
22
3- import pubchempy as pcp
4- import requests
53from pydantic import root_validator , validator
64from pydantic_yaml import YamlModel , YamlStrEnum
75
@@ -21,8 +19,6 @@ class Identifier(YamlModel):
2119 """Identifier information."""
2220
2321 id : str
24-
25- """A description of the field"""
2622 description : Optional [str ]
2723 type : IdentifierEnum
2824 names : Optional [List [str ]]
@@ -47,80 +43,17 @@ class ColumnTypes(YamlStrEnum):
4743 continuous = "continuous"
4844 categorical = "categorical"
4945 ordinal = "ordinal"
50- boolean = "boolean"
5146
5247
5348class Target (YamlModel ):
5449 """Target information."""
5550
5651 id : str
57-
58- """A english description of the field"""
5952 description : str
60-
61- """The units of the field. None if unitless."""
62- units : Optional [str ]
63-
64- """The type of the field. Can be one of `continuous`, `categorical`, `ordinal`, `boolean`."""
53+ units : str
6554 type : ColumnTypes
66-
67- """A list of names describing the field.
68-
69- Note that this will be used in building the prompts. Some example for prompts:
70-
71- - Boolean variables
72-
73- - `Is <name> <identifier>?`
74- - ```
75- What molecules in the list are <name>?
76- - <identifier_1>
77- - <identifier_2>
78- - <identifier_3>
79- ```
80-
81-
82- - Continuous variables
83-
84- - `What is <name> of <identifier>?`
85- - ```
86- What is the molecule with largest <name> in the following list?
87- - <identifier_1>
88- - <identifier_2>
89- - <identifier_3>
90- ```
91- """
9255 names : List [str ]
9356
94- """A URI or multiple (consitent ) URIs for the field.
95-
96- Ideally this would be a link to an entry in an ontrology or controlled
97- vocabulary that can also provide a canonical description for the field.
98- """
99- uris : Optional [List [str ]]
100-
101- """A PubChem assay IDs or multiple (consistent) PubChem assay IDs.
102-
103- Make sure that the first assay ID is the primary assay ID.
104- """
105- pubchem_aids : Optional [List [int ]]
106-
107- @validator ("pubchem_aids" )
108- def uris_resolves (cls , values ):
109- if values is not None :
110- for uri in values .get ("uris" ):
111- # perform a request to the URI and check if it resolves
112- response = requests .get (uri )
113- if response .status_code != 200 :
114- raise ValueError (f"URI { uri } does not resolve" )
115-
116- @validator ("pubchem_aids" )
117- def pubchem_assay_ids_resolve (cls , values ):
118- if values is not None :
119- for aid in values :
120- assays = pcp .get_assays (aid )
121- if len (assays ) == 0 :
122- raise ValueError (f"PubChem assay ID { aid } does not resolve" )
123-
12457
12558class Template (YamlModel ):
12659 prompt : str
@@ -162,12 +95,3 @@ class Dataset(YamlModel):
16295 def num_points_must_be_positive (cls , v ):
16396 if v < 0 :
16497 raise ValueError ("num_points must be positive" )
165-
166- @validator ("links" )
167- def links_must_resolve (cls , v ):
168- if v is not None :
169- for link in v :
170- response = requests .get (link .url )
171- if response .status_code != 200 :
172- if not (("acs" in response .text ) or ("sage" in response .text )):
173- raise ValueError (f"Link { link .url } does not resolve" )
0 commit comments