Skip to content

Commit af51931

Browse files
committed
Revert "feat: Add uris field for identifiers (#103)"
This reverts commit 0f766e5.
1 parent 4bd28ea commit af51931

File tree

2 files changed

+1
-78
lines changed

2 files changed

+1
-78
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ dev = [
3030
"pre-commit",
3131
"pydantic_yaml",
3232
"pytest",
33-
"pubchempy"
3433
]
3534

3635
# [project.optional-dependencies]

src/chemnlp/data_val/model.py

Lines changed: 1 addition & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from typing import Dict, List, Optional
22

3-
import pubchempy as pcp
4-
import requests
53
from pydantic import root_validator, validator
64
from pydantic_yaml import YamlModel, YamlStrEnum
75

@@ -21,8 +19,6 @@ class Identifier(YamlModel):
2119
"""Identifier information."""
2220

2321
id: str
24-
25-
"""A description of the field"""
2622
description: Optional[str]
2723
type: IdentifierEnum
2824
names: Optional[List[str]]
@@ -47,80 +43,17 @@ class ColumnTypes(YamlStrEnum):
4743
continuous = "continuous"
4844
categorical = "categorical"
4945
ordinal = "ordinal"
50-
boolean = "boolean"
5146

5247

5348
class Target(YamlModel):
5449
"""Target information."""
5550

5651
id: str
57-
58-
"""A english description of the field"""
5952
description: str
60-
61-
"""The units of the field. None if unitless."""
62-
units: Optional[str]
63-
64-
"""The type of the field. Can be one of `continuous`, `categorical`, `ordinal`, `boolean`."""
53+
units: str
6554
type: ColumnTypes
66-
67-
"""A list of names describing the field.
68-
69-
Note that this will be used in building the prompts. Some example for prompts:
70-
71-
- Boolean variables
72-
73-
- `Is <name> <identifier>?`
74-
- ```
75-
What molecules in the list are <name>?
76-
- <identifier_1>
77-
- <identifier_2>
78-
- <identifier_3>
79-
```
80-
81-
82-
- Continuous variables
83-
84-
- `What is <name> of <identifier>?`
85-
- ```
86-
What is the molecule with largest <name> in the following list?
87-
- <identifier_1>
88-
- <identifier_2>
89-
- <identifier_3>
90-
```
91-
"""
9255
names: List[str]
9356

94-
"""A URI or multiple (consitent ) URIs for the field.
95-
96-
Ideally this would be a link to an entry in an ontrology or controlled
97-
vocabulary that can also provide a canonical description for the field.
98-
"""
99-
uris: Optional[List[str]]
100-
101-
"""A PubChem assay IDs or multiple (consistent) PubChem assay IDs.
102-
103-
Make sure that the first assay ID is the primary assay ID.
104-
"""
105-
pubchem_aids: Optional[List[int]]
106-
107-
@validator("pubchem_aids")
108-
def uris_resolves(cls, values):
109-
if values is not None:
110-
for uri in values.get("uris"):
111-
# perform a request to the URI and check if it resolves
112-
response = requests.get(uri)
113-
if response.status_code != 200:
114-
raise ValueError(f"URI {uri} does not resolve")
115-
116-
@validator("pubchem_aids")
117-
def pubchem_assay_ids_resolve(cls, values):
118-
if values is not None:
119-
for aid in values:
120-
assays = pcp.get_assays(aid)
121-
if len(assays) == 0:
122-
raise ValueError(f"PubChem assay ID {aid} does not resolve")
123-
12457

12558
class Template(YamlModel):
12659
prompt: str
@@ -162,12 +95,3 @@ class Dataset(YamlModel):
16295
def num_points_must_be_positive(cls, v):
16396
if v < 0:
16497
raise ValueError("num_points must be positive")
165-
166-
@validator("links")
167-
def links_must_resolve(cls, v):
168-
if v is not None:
169-
for link in v:
170-
response = requests.get(link.url)
171-
if response.status_code != 200:
172-
if not (("acs" in response.text) or ("sage" in response.text)):
173-
raise ValueError(f"Link {link.url} does not resolve")

0 commit comments

Comments
 (0)