From a7e6d3b3c127550926cde2b621b98c14145d8fd0 Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 14:24:02 +0000 Subject: [PATCH 1/7] add git ignore --- .gitignore | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5af2f0c --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +test/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file From 4355e5311aa438928ad64cb657713352c59aab75 Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 14:28:26 +0000 Subject: [PATCH 2/7] update tf2 --- DeepDILI_mold2_simple_version/mold2_DeepDILI.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/DeepDILI_mold2_simple_version/mold2_DeepDILI.py b/DeepDILI_mold2_simple_version/mold2_DeepDILI.py index 3727691..5d5a98e 100644 --- a/DeepDILI_mold2_simple_version/mold2_DeepDILI.py +++ b/DeepDILI_mold2_simple_version/mold2_DeepDILI.py @@ -37,8 +37,6 @@ from numpy.random import seed seed(1) -from tensorflow import set_random_seed -set_random_seed(6) he_normal = initializers.he_normal() From b7cfe93d28d3995b0b73dfaa23c24254acb9474d Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 14:54:46 +0000 Subject: [PATCH 3/7] add parameter for sh script --- DeepDILI_mold2_simple_version/creat_dir.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DeepDILI_mold2_simple_version/creat_dir.sh b/DeepDILI_mold2_simple_version/creat_dir.sh index a92d83b..ed69d1e 100644 --- a/DeepDILI_mold2_simple_version/creat_dir.sh +++ b/DeepDILI_mold2_simple_version/creat_dir.sh @@ -1,12 +1,12 @@ #!/bin/bash - +# Usage: ./creat_dir.sh [dir path] echo "[start]" echo `date` ###build separate directory -base_path0='/account/tli/CDER/results/check' +base_path0=$1 echo "make base classifiers directory" mkdir -p $base_path0 From 5fc2285ae10aaf099df7efd257d93e6d2af510eb Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 15:23:44 +0000 Subject: [PATCH 4/7] update for tf2 --- DeepDILI_mold2_simple_version/mold2_DeepDILI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DeepDILI_mold2_simple_version/mold2_DeepDILI.py b/DeepDILI_mold2_simple_version/mold2_DeepDILI.py index 5d5a98e..84a3cef 100644 --- a/DeepDILI_mold2_simple_version/mold2_DeepDILI.py +++ b/DeepDILI_mold2_simple_version/mold2_DeepDILI.py @@ -153,6 +153,6 @@ def dili_prediction(probability_path, var, model_path, result_path): reform_result(train_results).to_csv(path3+'/validation_'+col_name2+'.csv') K.clear_session() - tf.reset_default_graph() + tf.compat.v1.reset_default_graph() print("--- %s seconds ---" % (time.time() - start_time)) From b5c73fa618e218b2c80a6483e19f8b102f55259d Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 15:24:09 +0000 Subject: [PATCH 5/7] refactor code for arguments --- DeepDILI_mold2_simple_version/creat_dir.sh | 0 DeepDILI_mold2_simple_version/main.py | 89 ++++++++++++++-------- 2 files changed, 58 insertions(+), 31 deletions(-) mode change 100644 => 100755 DeepDILI_mold2_simple_version/creat_dir.sh diff --git a/DeepDILI_mold2_simple_version/creat_dir.sh b/DeepDILI_mold2_simple_version/creat_dir.sh old mode 100644 new mode 100755 diff --git a/DeepDILI_mold2_simple_version/main.py b/DeepDILI_mold2_simple_version/main.py index e882bd9..87bef28 100644 --- a/DeepDILI_mold2_simple_version/main.py +++ b/DeepDILI_mold2_simple_version/main.py @@ -7,6 +7,8 @@ import warnings warnings.filterwarnings('ignore') +import os +import argparse import pandas as pd ### import scripts @@ -21,34 +23,59 @@ import mold2_DeepDILI -### please update the following path -features = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/important_features_order.csv').feature.unique() # path for important_features_order.csv -data = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/QSAR_year_338_pearson_0.9.csv',low_memory=False)# path for QSAR_year_338_pearson_0.9.csv -test_data = data[data.final_year>=1997] -#test_data = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_three/external_mold2.csv')# path for external_mold2.csv (This is the external validation set) - -data_split = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/data_split.csv')# path for data_split.csv -mcc = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/combined_score.csv') # path for combined_score.csv - -base_path = '/account/tli/CDER/results/check' # path for base classifiers -probability_path = '/account/tli/CDER/results/check/probabilities_output' # path for the combined probabilities (model-level representations) -name = 'test' # can be any name - -model_path = '/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/mold2_best_model.h5' # path for mold2_best_model.h5 -result_path = '/account/tli/CDER/results/check/result' # path for the final DeepDILI predictions - -### run the scripts -mold2_knn.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) -mold2_lr.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) -mold2_svm.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) -mold2_rf.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) -mold2_xgboost.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) - -mold2_validation_predictions_combine.combine_validation_probabilities(base_path, mcc, probability_path, name) -mold2_test_predictions_combine.combine_test_probabilities(base_path, mcc, probability_path, name) - -mold2_DeepDILI.dili_prediction(probability_path, name, model_path, result_path) - - -print("--- %s seconds ---" % (time.time() - start_time)) - +def mkdir_if_missing(dir): + if not os.path.exists(dir): + os.mkdir(dir) + +def main(data_path: str, base_path: str, name: str): + features = pd.read_csv(os.path.join(data_path,'important_features_order.csv')).feature.unique() # path for important_features_order.csv + data = pd.read_csv(os.path.join(data_path,'QSAR_year_338_pearson_0.9.csv'),low_memory=False)# path for QSAR_year_338_pearson_0.9.csv + test_data = data[data.final_year>=1997] + #test_data = pd.read_csv(os.path.join(data_path,'data_split.csv')# path for data_split.csv + + data_split = pd.read_csv(os.path.join(data_path,'data_split.csv'))# path for data_split.csv + mcc = pd.read_csv(os.path.join(data_path,'combined_score.csv')) # path for combined_score.csv + + model_path = os.path.join(data_path,'mold2_best_model.h5') # path for mold2_best_model.h5 + + #base_path = '/account/tli/CDER/results/check' # path for base classifiers + probability_path = os.path.join(base_path, 'probabilities_output') # path for the combined probabilities (model-level representations) + # mkdir_if_missing(probability_path) + + result_path = os.path.join(base_path,'result') # path for the final DeepDILI predictions + # mkdir_if_missing(result_path) + + ### run the scripts + mold2_knn.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) + mold2_lr.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) + mold2_svm.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) + mold2_rf.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) + mold2_xgboost.generate_baseClassifiers(features, data, test_data, data_split, name, base_path) + + mold2_validation_predictions_combine.combine_validation_probabilities(base_path, mcc, probability_path, name) + mold2_test_predictions_combine.combine_test_probabilities(base_path, mcc, probability_path, name) + + mold2_DeepDILI.dili_prediction(probability_path, name, model_path, result_path) + + + print("--- %s seconds ---" % (time.time() - start_time)) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Deep DILI') + parser.add_argument( + '--data_path', + default='.', + type=str, help='Data directory') + parser.add_argument( + '--base_path', + default='./test', + type=str, help='base path') + parser.add_argument( + '--name', + default='test', + type=str, help='Any text') + args = parser.parse_args() + # mkdir_if_missing(args.base_path) + os.system("chmod +x {}".format("creat_dir.sh")) + os.system("./{} {}".format("creat_dir.sh", args.base_path)) + main(args.data_path, args.base_path, args.name) \ No newline at end of file From 29ef36de7b1adf37f6c3a75cf0a9ad84309b9aa7 Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 15:26:45 +0000 Subject: [PATCH 6/7] add installation instruction --- DeepDILI_mold2_simple_version/environment.yml | 74 +++++++++++++++++++ DeepDILI_mold2_simple_version/install.sh | 6 ++ .../requirements.txt | 49 ++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 DeepDILI_mold2_simple_version/environment.yml create mode 100644 DeepDILI_mold2_simple_version/install.sh create mode 100644 DeepDILI_mold2_simple_version/requirements.txt diff --git a/DeepDILI_mold2_simple_version/environment.yml b/DeepDILI_mold2_simple_version/environment.yml new file mode 100644 index 0000000..9edc5df --- /dev/null +++ b/DeepDILI_mold2_simple_version/environment.yml @@ -0,0 +1,74 @@ +name: deepdili +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=5.1=1_gnu + - ca-certificates=2023.05.30=h06a4308_0 + - certifi=2021.5.30=py36h06a4308_0 + - ld_impl_linux-64=2.38=h1181459_1 + - libffi=3.3=he6710b0_2 + - libgcc-ng=11.2.0=h1234567_1 + - libgomp=11.2.0=h1234567_1 + - libstdcxx-ng=11.2.0=h1234567_1 + - ncurses=6.4=h6a678d5_0 + - openssl=1.1.1u=h7f8727e_0 + - pip=21.2.2=py36h06a4308_0 + - python=3.6.13=h12debd9_1 + - readline=8.2=h5eee18b_0 + - setuptools=58.0.4=py36h06a4308_0 + - sqlite=3.41.2=h5eee18b_0 + - tk=8.6.12=h1ccaba5_0 + - wheel=0.37.1=pyhd3eb1b0_0 + - xz=5.4.2=h5eee18b_0 + - zlib=1.2.13=h5eee18b_0 + - pip: + - absl-py==0.15.0 + - astunparse==1.6.3 + - cached-property==1.5.2 + - cachetools==4.2.4 + - charset-normalizer==2.0.12 + - clang==5.0 + - dataclasses==0.8 + - flatbuffers==1.12 + - gast==0.4.0 + - google-auth==1.35.0 + - google-auth-oauthlib==0.4.6 + - google-pasta==0.2.0 + - grpcio==1.48.2 + - h5py==3.1.0 + - idna==3.4 + - importlib-metadata==4.8.3 + - joblib==1.1.1 + - keras==2.6.0 + - keras-preprocessing==1.1.2 + - markdown==3.3.7 + - numpy==1.19.5 + - oauthlib==3.2.2 + - opt-einsum==3.3.0 + - pandas==1.1.5 + - protobuf==3.19.6 + - pyasn1==0.5.0 + - pyasn1-modules==0.3.0 + - python-dateutil==2.8.2 + - pytz==2023.3 + - requests==2.27.1 + - requests-oauthlib==1.3.1 + - rsa==4.9 + - scikit-learn==0.24.2 + - scipy==1.5.4 + - six==1.15.0 + - tensorboard==2.6.0 + - tensorboard-data-server==0.6.1 + - tensorboard-plugin-wit==1.8.1 + - tensorflow==2.6.2 + - tensorflow-estimator==2.6.0 + - termcolor==1.1.0 + - threadpoolctl==3.1.0 + - typing-extensions==3.7.4.3 + - urllib3==1.26.16 + - werkzeug==2.0.3 + - wrapt==1.12.1 + - xgboost==1.5.2 + - zipp==3.6.0 +prefix: /home/ubuntu/anaconda3/envs/deepdili diff --git a/DeepDILI_mold2_simple_version/install.sh b/DeepDILI_mold2_simple_version/install.sh new file mode 100644 index 0000000..a35ec4c --- /dev/null +++ b/DeepDILI_mold2_simple_version/install.sh @@ -0,0 +1,6 @@ +conda create -n deepdili tensorflow-gpu python=3.6 +conda activate deepdili +pip install pandas +pip install scikit-learn +pip install xgboost +pip install tensorflow \ No newline at end of file diff --git a/DeepDILI_mold2_simple_version/requirements.txt b/DeepDILI_mold2_simple_version/requirements.txt new file mode 100644 index 0000000..65f3c1f --- /dev/null +++ b/DeepDILI_mold2_simple_version/requirements.txt @@ -0,0 +1,49 @@ +absl-py==0.15.0 +astunparse==1.6.3 +cached-property==1.5.2 +cachetools==4.2.4 +certifi==2021.5.30 +charset-normalizer==2.0.12 +clang==5.0 +dataclasses==0.8 +flatbuffers==1.12 +gast==0.4.0 +google-auth==1.35.0 +google-auth-oauthlib==0.4.6 +google-pasta==0.2.0 +grpcio==1.48.2 +h5py==3.1.0 +idna==3.4 +importlib-metadata==4.8.3 +joblib==1.1.1 +keras==2.6.0 +Keras-Preprocessing==1.1.2 +Markdown==3.3.7 +numpy==1.19.5 +oauthlib==3.2.2 +opt-einsum==3.3.0 +pandas==1.1.5 +protobuf==3.19.6 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +python-dateutil==2.8.2 +pytz==2023.3 +requests==2.27.1 +requests-oauthlib==1.3.1 +rsa==4.9 +scikit-learn==0.24.2 +scipy==1.5.4 +six==1.15.0 +tensorboard==2.6.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorflow==2.6.2 +tensorflow-estimator==2.6.0 +termcolor==1.1.0 +threadpoolctl==3.1.0 +typing-extensions==3.7.4.3 +urllib3==1.26.16 +Werkzeug==2.0.3 +wrapt==1.12.1 +xgboost==1.5.2 +zipp==3.6.0 From b27b8be811614520621253e02da14dba8c22d874 Mon Sep 17 00:00:00 2001 From: vumna Date: Thu, 20 Jul 2023 15:27:22 +0000 Subject: [PATCH 7/7] update git ignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 5af2f0c..05bb301 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ test/ +settings.json + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod]