From 587c1665015f92e5be7cc4656e9874e1b4c7583c Mon Sep 17 00:00:00 2001
From: Hojin Jung <howziin@gmail.com>
Date: Mon, 23 Feb 2026 22:48:51 -0700
Subject: [PATCH 1/2] Adding installation options: Conda/pip

---
 README.md        | 20 +++++++++++++++++++-
 environment.yml  | 18 ++++++++++++++++++
 requirements.txt | 10 ++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 environment.yml
 create mode 100644 requirements.txt

diff --git a/README.md b/README.md
index c41b0b0..0ec26bd 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,25 @@
 # HoVpred
 
+## Installation
 
-usage:
+### Option A: Conda (recommended)
+
+```bash
+conda env create -f environment.yml
+conda activate hovpred
+```
+
+### Option B: pip
+
+```bash
+pip install -r requirements.txt
+```
+
+**Note:** `rdkit` is best installed via conda. The pip fallback uses `rdkit-pypi`, which
+may not be available for all platforms. If using pip and `rdkit-pypi` fails, install
+rdkit through conda instead: `conda install -c conda-forge rdkit`.
+
+## Usage:
 ```python
 python main.py [-h] [-predict] [-watsoneq] [-K_fold] [-maxatoms MAXATOMS]
                [-lr LR] [-epoch EPOCH] [-batchsize BATCHSIZE] [-layers LAYERS]
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..a5d74b1
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,18 @@
+name: hovpred
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python>=3.8,<3.11
+  - rdkit
+  - numpy
+  - pandas
+  - scikit-learn
+  - scipy
+  - matplotlib
+  - seaborn
+  - tqdm
+  - pip
+  - pip:
+      - tensorflow>=2.4,<2.13
+      - dgl
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e6814f7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+tensorflow>=2.4,<2.13
+dgl
+rdkit-pypi
+numpy
+pandas
+scikit-learn
+scipy
+matplotlib
+seaborn
+tqdm

From d9e5d296f8ba4b1cf6295f96eca976d73eefad8c Mon Sep 17 00:00:00 2001
From: Hojin Jung <howziin@gmail.com>
Date: Tue, 24 Feb 2026 12:11:47 -0700
Subject: [PATCH 2/2] additional fixes

---
 README.md        |  35 ++++---
 data/README.md   |   7 +-
 environment.yml  | 234 ++++++++++++++++++++++++++++++++++++++++++++---
 requirements.txt |  10 --
 4 files changed, 243 insertions(+), 43 deletions(-)
 delete mode 100644 requirements.txt

diff --git a/README.md b/README.md
index 0ec26bd..e97f4ae 100644
--- a/README.md
+++ b/README.md
@@ -2,25 +2,20 @@
 
 ## Installation
 
-### Option A: Conda (recommended)
+The environment requires **Linux with CUDA 11.0**.
 
 ```bash
 conda env create -f environment.yml
 conda activate hovpred
 ```
 
-### Option B: pip
+## Data
 
-```bash
-pip install -r requirements.txt
-```
+The training data originates from NIST and DIPPR databases, which are not freely available. The CSV files in `data/` contain only the molecular identifiers (SMILES) and temperatures, with the enthalpy of vaporization values redacted. As a result, **only the prediction workflow can be reproduced** using the provided pre-trained model weights; model training and cross-validation require access to the original databases.
 
-**Note:** `rdkit` is best installed via conda. The pip fallback uses `rdkit-pypi`, which
-may not be available for all platforms. If using pip and `rdkit-pypi` fails, install
-rdkit through conda instead: `conda install -c conda-forge rdkit`.
+## Usage
 
-## Usage:
-```python
+```
 python main.py [-h] [-predict] [-watsoneq] [-K_fold] [-maxatoms MAXATOMS]
                [-lr LR] [-epoch EPOCH] [-batchsize BATCHSIZE] [-layers LAYERS]
                [-heads HEADS] [-residcon] [-explicitH] [-dropout DROPOUT]
@@ -28,8 +23,9 @@ python main.py [-h] [-predict] [-watsoneq] [-K_fold] [-maxatoms MAXATOMS]
                [-loss LOSS] [-sw_thr SW_THR] [-sw_decay SW_DECAY]
 ```
 
-optional arguments:
-```python
+Optional arguments:
+
+```
   -h, --help            show this help message and exit
   -predict              If specified, prediction is carried out
                         (default=False)
@@ -53,14 +49,17 @@ optional arguments:
                         kl_div_normal
 ```
 
+### Prediction
+
+Prepare a `molecules_to_predict.csv` file with two columns: `smiles` and `temperature`, then run:
 
-example commands:
-```python
-# HoV prediction with uncertainty quantification - 'molecules_to_predict.csv' file is needed as an input which contains two columns: 'smiles' and 'temperature'
+```bash
 python main.py -predict -modelname best_211007 -loss kl_div_normal
+```
 
-# model training
-python main.py -modelname test_model -loss kl_div_normal
-# non-default hyperparameters can also be tested by adding more arguments
+### Training (requires NIST/DIPPR data)
 
+```bash
+python main.py -modelname test_model -loss kl_div_normal
 ```
+non-default hyperparameters can also be tested by adding more arguments
\ No newline at end of file
diff --git a/data/README.md b/data/README.md
index c61a3e3..0eea2c7 100644
--- a/data/README.md
+++ b/data/README.md
@@ -1,5 +1,6 @@
 # HoV databases
 
-- 'Data.csv': The dataframe with predefined training/validation/test set splits (to do data splits consistent with those in literature and compare the accuracy with literature)
-- 'Data_for_kfold.csv' for 10-fold cross-validation: The 'main.py' performs random data split into 10 training/validation folds + one held-out test set
-- Values from NIST-WTT were redacted
+- `Data.csv`: The dataframe with predefined training/validation/test set splits (to do data splits consistent with those in literature and compare the accuracy with literature)
+- `Data_for_kfold.csv` for 10-fold cross-validation: `main.py` performs random data split into 10 training/validation folds + one held-out test set
+
+**Note:** The enthalpy of vaporization values originating from NIST and DIPPR have been redacted because these databases are not freely redistributable. The CSV files retain SMILES and temperature columns so that the file structure is preserved, but training and cross-validation cannot be run without populating the missing values from the original sources.
diff --git a/environment.yml b/environment.yml
index a5d74b1..be7bbf3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,18 +1,228 @@
 name: hovpred
 channels:
+  - dglteam
   - conda-forge
   - defaults
 dependencies:
-  - python>=3.8,<3.11
-  - rdkit
-  - numpy
-  - pandas
-  - scikit-learn
-  - scipy
-  - matplotlib
-  - seaborn
-  - tqdm
-  - pip
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=4.5=1_gnu
+  - anyio=3.6.1=py38h578d9bd_0
+  - argon2-cffi=21.3.0=pyhd8ed1ab_0
+  - argon2-cffi-bindings=21.2.0=py38h7f8727e_0
+  - asttokens=2.0.5=pyhd8ed1ab_0
+  - attrs=21.4.0=pyhd8ed1ab_0
+  - babel=2.10.1=pyhd8ed1ab_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=py_2
+  - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
+  - beautifulsoup4=4.11.1=pyha770c72_0
+  - blas=1.0=mkl
+  - bleach=5.0.0=pyhd8ed1ab_0
+  - boost=1.74.0=py38hc10631b_3
+  - boost-cpp=1.74.0=h312852a_4
+  - brotlipy=0.7.0=py38h497a2fe_1001
+  - bzip2=1.0.8=h7f98852_4
+  - ca-certificates=2022.5.18.1=ha878542_0
+  - cairo=1.16.0=h6cf1ce9_1008
+  - certifi=2022.5.18.1=py38h578d9bd_0
+  - cffi=1.15.0=py38hd667e15_1
+  - cryptography=37.0.1=py38h9ce1e76_0
+  - cycler=0.10.0=py_2
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - defusedxml=0.7.1=pyhd8ed1ab_0
+  - dgl-cuda11.0=0.7.0=py38_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - executing=0.8.3=pyhd8ed1ab_0
+  - flit-core=3.7.1=pyhd8ed1ab_0
+  - fontconfig=2.13.1=hba837de_1005
+  - freetype=2.10.4=h0708190_1
+  - gettext=0.19.8.1=h0b5b191_1005
+  - icu=68.1=h58526e2_0
+  - importlib-metadata=4.11.4=py38h578d9bd_0
+  - importlib_metadata=4.11.4=hd8ed1ab_0
+  - importlib_resources=5.7.1=pyhd8ed1ab_1
+  - intel-openmp=2021.3.0=h06a4308_3350
+  - ipykernel=5.5.5=py38hd0cf306_0
+  - ipython=8.4.0=py38h578d9bd_0
+  - ipython_genutils=0.2.0=py_1
+  - jbig=2.1=h7f98852_2003
+  - jedi=0.18.1=py38h578d9bd_1
+  - jinja2=3.1.2=pyhd8ed1ab_1
+  - joblib=1.0.1=pyhd8ed1ab_0
+  - jpeg=9d=h36c2ea0_0
+  - json5=0.9.5=pyh9f0ad1d_0
+  - jsonschema=4.6.0=pyhd8ed1ab_0
+  - jupyter_client=7.0.6=pyhd8ed1ab_0
+  - jupyter_core=4.10.0=py38h578d9bd_0
+  - jupyter_server=1.17.1=pyhd8ed1ab_0
+  - jupyterlab=3.4.3=pyhd8ed1ab_0
+  - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
+  - jupyterlab_server=2.14.0=pyhd8ed1ab_0
+  - kiwisolver=1.3.1=py38h1fd1430_1
+  - lcms2=2.12=hddcbb42_0
+  - ld_impl_linux-64=2.35.1=h7274673_9
+  - lerc=2.2.1=h9c3ff4c_0
+  - libblas=3.9.0=11_linux64_mkl
+  - libcblas=3.9.0=11_linux64_mkl
+  - libdeflate=1.7=h7f98852_5
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=9.3.0=h5101ec6_17
+  - libgfortran-ng=7.5.0=ha8ba4b0_17
+  - libgfortran4=7.5.0=ha8ba4b0_17
+  - libglib=2.68.3=h3e27bee_0
+  - libgomp=9.3.0=h5101ec6_17
+  - libiconv=1.16=h516909a_0
+  - liblapack=3.9.0=11_linux64_mkl
+  - libpng=1.6.37=h21135ba_2
+  - libsodium=1.0.18=h36c2ea0_1
+  - libstdcxx-ng=9.3.0=hd4cf53a_17
+  - libtiff=4.3.0=hf544144_1
+  - libuuid=2.32.1=h7f98852_1000
+  - libwebp-base=1.2.0=h7f98852_2
+  - libxcb=1.13=h7f98852_1003
+  - libxml2=2.9.12=h72842e0_0
+  - lz4-c=1.9.3=h9c3ff4c_1
+  - markupsafe=2.0.1=py38h497a2fe_0
+  - matplotlib-base=3.3.4=py38h0efea84_0
+  - matplotlib-inline=0.1.3=pyhd8ed1ab_0
+  - mistune=0.8.4=py38h497a2fe_1004
+  - mkl=2021.3.0=h06a4308_520
+  - mkl-service=2.4.0=py38h7f8727e_0
+  - mkl_fft=1.3.0=py38h42c9631_2
+  - mkl_random=1.2.2=py38h51133e4_0
+  - nbclassic=0.3.7=pyhd8ed1ab_0
+  - nbclient=0.6.4=pyhd8ed1ab_1
+  - nbconvert=6.5.0=pyhd8ed1ab_0
+  - nbconvert-core=6.5.0=pyhd8ed1ab_0
+  - nbconvert-pandoc=6.5.0=pyhd8ed1ab_0
+  - nbformat=5.4.0=pyhd8ed1ab_0
+  - ncurses=6.2=he6710b0_1
+  - nest-asyncio=1.5.5=pyhd8ed1ab_0
+  - networkx=2.6.2=pyhd3eb1b0_0
+  - notebook=6.4.12=pyha770c72_0
+  - notebook-shim=0.1.0=pyhd8ed1ab_0
+  - olefile=0.46=pyh9f0ad1d_1
+  - openjpeg=2.4.0=hb52868f_1
+  - openssl=1.1.1k=h7f98852_0
+  - packaging=21.3=pyhd8ed1ab_0
+  - pandas=1.3.1=py38h1abd341_0
+  - pandoc=2.18=ha770c72_0
+  - pandocfilters=1.5.0=pyhd8ed1ab_0
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pcre=8.45=h9c3ff4c_0
+  - pexpect=4.8.0=pyh9f0ad1d_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=8.3.1=py38h8e6f84c_0
+  - pip=21.0.1=py38h06a4308_0
+  - pixman=0.40.0=h36c2ea0_0
+  - prometheus_client=0.14.1=pyhd8ed1ab_0
+  - prompt-toolkit=3.0.29=pyha770c72_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pycairo=1.20.1=py38hf61ee4a_0
+  - pycparser=2.21=pyhd8ed1ab_0
+  - pygments=2.12.0=pyhd8ed1ab_0
+  - pyopenssl=22.0.0=pyhd8ed1ab_0
+  - pyparsing=2.4.7=pyh9f0ad1d_0
+  - pyrsistent=0.18.0=py38heee7806_0
+  - pysocks=1.7.1=py38h578d9bd_5
+  - python=3.8.11=h12debd9_0_cpython
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-fastjsonschema=2.15.3=pyhd8ed1ab_0
+  - python_abi=3.8=2_cp38
+  - pytz=2021.1=pyhd8ed1ab_0
+  - pyzmq=19.0.2=py38ha71036d_2
+  - rdkit=2021.03.4=py38hf8acc3d_0
+  - readline=8.1=h27cfd23_0
+  - reportlab=3.5.68=py38hadf75a6_0
+  - scikit-learn=0.24.2=py38hdc147b9_0
+  - scipy=1.6.2=py38had2a1c9_1
+  - send2trash=1.8.0=pyhd8ed1ab_0
+  - setuptools=52.0.0=py38h06a4308_0
+  - sniffio=1.2.0=py38h578d9bd_3
+  - soupsieve=2.3.1=pyhd8ed1ab_0
+  - sqlalchemy=1.3.23=py38h497a2fe_0
+  - sqlite=3.36.0=hc218d9a_0
+  - stack_data=0.2.0=pyhd8ed1ab_0
+  - terminado=0.15.0=py38h578d9bd_0
+  - threadpoolctl=2.2.0=pyh8a188c0_0
+  - tinycss2=1.1.1=pyhd8ed1ab_0
+  - tk=8.6.10=hbc83047_0
+  - tornado=6.1=py38h497a2fe_1
+  - traitlets=5.2.2.post1=pyhd8ed1ab_0
+  - wcwidth=0.2.5=pyh9f0ad1d_2
+  - webencodings=0.5.1=py_1
+  - websocket-client=1.3.2=pyhd8ed1ab_0
+  - wheel=0.37.0=pyhd3eb1b0_0
+  - xorg-kbproto=1.0.7=h7f98852_1002
+  - xorg-libice=1.0.10=h7f98852_0
+  - xorg-libsm=1.2.3=hd9c2040_1000
+  - xorg-libx11=1.7.2=h7f98852_0
+  - xorg-libxau=1.0.9=h7f98852_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xorg-libxext=1.3.4=h7f98852_1
+  - xorg-libxrender=0.9.10=h7f98852_1003
+  - xorg-renderproto=0.11.1=h7f98852_1002
+  - xorg-xextproto=7.3.0=h7f98852_1002
+  - xorg-xproto=7.0.31=h7f98852_1007
+  - xz=5.2.5=h7b6447c_0
+  - zeromq=4.3.4=h9c3ff4c_0
+  - zipp=3.8.0=pyhd8ed1ab_0
+  - zlib=1.2.11=h7b6447c_3
+  - zstd=1.5.0=ha95c52a_0
   - pip:
-      - tensorflow>=2.4,<2.13
-      - dgl
+    - absl-py==0.15.0
+    - aqme==1.3.0
+    - ase==3.22.1
+    - astunparse==1.6.3
+    - cachetools==4.2.2
+    - cclib==1.7.2
+    - charset-normalizer==2.0.4
+    - cloudpickle==2.1.0
+    - flatbuffers==1.12
+    - gast==0.3.3
+    - google-auth==1.34.0
+    - google-auth-oauthlib==0.4.5
+    - google-pasta==0.2.0
+    - grpcio==1.32.0
+    - h5py==2.10.0
+    - idna==3.2
+    - keras==2.9.0
+    - keras-preprocessing==1.1.2
+    - libclang==14.0.6
+    - llvmlite==0.38.1
+    - markdown==3.3.4
+    - nfp==0.3.0
+    - numba==0.55.2
+    - numpy==1.19.5
+    - oauthlib==3.1.1
+    - opt-einsum==3.3.0
+    - periodictable==1.6.1
+    - progress==1.6
+    - protobuf==3.17.3
+    - pyasn1==0.4.8
+    - pyasn1-modules==0.2.8
+    - pyyaml==6.0
+    - requests==2.26.0
+    - requests-oauthlib==1.3.0
+    - rsa==4.7.2
+    - seaborn==0.11.2
+    - shap==0.40.0
+    - six==1.15.0
+    - slicer==0.0.7
+    - tensorboard==2.9.1
+    - tensorboard-data-server==0.6.1
+    - tensorboard-plugin-wit==1.8.0
+    - tensorflow==2.4.0
+    - tensorflow-addons==0.14.0
+    - tensorflow-estimator==2.4.0
+    - tensorflow-gpu==2.4.0
+    - tensorflow-io-gcs-filesystem==0.26.0
+    - termcolor==1.1.0
+    - tqdm==4.62.3
+    - typeguard==2.13.3
+    - typing-extensions==3.7.4.3
+    - urllib3==1.26.6
+    - werkzeug==2.0.1
+    - wrapt==1.12.1
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index e6814f7..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-tensorflow>=2.4,<2.13
-dgl
-rdkit-pypi
-numpy
-pandas
-scikit-learn
-scipy
-matplotlib
-seaborn
-tqdm