TingLi2016 · mainguyenanhvu · Jul 20, 2023 · Jul 20, 2023 · Jul 20, 2023 · Jul 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,164 @@
+test/
+
+settings.json
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/DeepDILI_mold2_simple_version/creat_dir.sh b/DeepDILI_mold2_simple_version/creat_dir.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
-
+# Usage: ./creat_dir.sh [dir path]
 echo "[start]"
 echo `date`
 
 ###build separate directory
 
 
-base_path0='/account/tli/CDER/results/check'
+base_path0=$1
 
 echo "make base classifiers directory"
 mkdir -p $base_path0

diff --git a/DeepDILI_mold2_simple_version/environment.yml b/DeepDILI_mold2_simple_version/environment.yml
@@ -0,0 +1,74 @@
+name: deepdili
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - ca-certificates=2023.05.30=h06a4308_0
+  - certifi=2021.5.30=py36h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - ncurses=6.4=h6a678d5_0
+  - openssl=1.1.1u=h7f8727e_0
+  - pip=21.2.2=py36h06a4308_0
+  - python=3.6.13=h12debd9_1
+  - readline=8.2=h5eee18b_0
+  - setuptools=58.0.4=py36h06a4308_0
+  - sqlite=3.41.2=h5eee18b_0
+  - tk=8.6.12=h1ccaba5_0
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.4.2=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+    - absl-py==0.15.0
+    - astunparse==1.6.3
+    - cached-property==1.5.2
+    - cachetools==4.2.4
+    - charset-normalizer==2.0.12
+    - clang==5.0
+    - dataclasses==0.8
+    - flatbuffers==1.12
+    - gast==0.4.0
+    - google-auth==1.35.0
+    - google-auth-oauthlib==0.4.6
+    - google-pasta==0.2.0
+    - grpcio==1.48.2
+    - h5py==3.1.0
+    - idna==3.4
+    - importlib-metadata==4.8.3
+    - joblib==1.1.1
+    - keras==2.6.0
+    - keras-preprocessing==1.1.2
+    - markdown==3.3.7
+    - numpy==1.19.5
+    - oauthlib==3.2.2
+    - opt-einsum==3.3.0
+    - pandas==1.1.5
+    - protobuf==3.19.6
+    - pyasn1==0.5.0
+    - pyasn1-modules==0.3.0
+    - python-dateutil==2.8.2
+    - pytz==2023.3
+    - requests==2.27.1
+    - requests-oauthlib==1.3.1
+    - rsa==4.9
+    - scikit-learn==0.24.2
+    - scipy==1.5.4
+    - six==1.15.0
+    - tensorboard==2.6.0
+    - tensorboard-data-server==0.6.1
+    - tensorboard-plugin-wit==1.8.1
+    - tensorflow==2.6.2
+    - tensorflow-estimator==2.6.0
+    - termcolor==1.1.0
+    - threadpoolctl==3.1.0
+    - typing-extensions==3.7.4.3
+    - urllib3==1.26.16
+    - werkzeug==2.0.3
+    - wrapt==1.12.1
+    - xgboost==1.5.2
+    - zipp==3.6.0
+prefix: /home/ubuntu/anaconda3/envs/deepdili
diff --git a/DeepDILI_mold2_simple_version/install.sh b/DeepDILI_mold2_simple_version/install.sh
@@ -0,0 +1,6 @@
+conda create -n deepdili tensorflow-gpu python=3.6
+conda activate deepdili
+pip install pandas
+pip install scikit-learn
+pip install xgboost
+pip install tensorflow
diff --git a/DeepDILI_mold2_simple_version/main.py b/DeepDILI_mold2_simple_version/main.py
@@ -7,6 +7,8 @@
 import warnings
 warnings.filterwarnings('ignore')
 
+import os
+import argparse
 import pandas as pd
 
 ### import scripts
@@ -21,34 +23,59 @@
 
 import mold2_DeepDILI
 
-### please update the following path 
-features = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/important_features_order.csv').feature.unique() # path for important_features_order.csv
-data = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/QSAR_year_338_pearson_0.9.csv',low_memory=False)# path for QSAR_year_338_pearson_0.9.csv
-test_data = data[data.final_year>=1997] 
-#test_data = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_three/external_mold2.csv')# path for external_mold2.csv (This is the external validation set)
-
-data_split = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/data_split.csv')# path for data_split.csv
-mcc = pd.read_csv('/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/combined_score.csv') # path for combined_score.csv
-
-base_path = '/account/tli/CDER/results/check' # path for base classifiers
-probability_path = '/account/tli/CDER/results/check/probabilities_output' # path for the combined probabilities (model-level representations)
-name = 'test' # can be any name 
-
-model_path = '/account/tli/CDER/script/train_validation_test/mold2/mold2_download_github_two/mold2_best_model.h5' # path for mold2_best_model.h5
-result_path = '/account/tli/CDER/results/check/result' # path for the final DeepDILI predictions
-
-### run the scripts
-mold2_knn.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
-mold2_lr.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
-mold2_svm.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
-mold2_rf.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
-mold2_xgboost.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
-
-mold2_validation_predictions_combine.combine_validation_probabilities(base_path, mcc, probability_path, name)
-mold2_test_predictions_combine.combine_test_probabilities(base_path, mcc, probability_path, name)
-
-mold2_DeepDILI.dili_prediction(probability_path, name, model_path, result_path)
-
-
-print("--- %s seconds ---" % (time.time() - start_time))
-
+def mkdir_if_missing(dir):
+    if not os.path.exists(dir):
+        os.mkdir(dir)
+
+def main(data_path: str, base_path: str, name: str):
+    features = pd.read_csv(os.path.join(data_path,'important_features_order.csv')).feature.unique() # path for important_features_order.csv
+    data = pd.read_csv(os.path.join(data_path,'QSAR_year_338_pearson_0.9.csv'),low_memory=False)# path for QSAR_year_338_pearson_0.9.csv
+    test_data = data[data.final_year>=1997] 
+    #test_data = pd.read_csv(os.path.join(data_path,'data_split.csv')# path for data_split.csv
+
+    data_split = pd.read_csv(os.path.join(data_path,'data_split.csv'))# path for data_split.csv
+    mcc = pd.read_csv(os.path.join(data_path,'combined_score.csv')) # path for combined_score.csv
+
+    model_path = os.path.join(data_path,'mold2_best_model.h5') # path for mold2_best_model.h5
+
+    #base_path = '/account/tli/CDER/results/check' # path for base classifiers
+    probability_path = os.path.join(base_path, 'probabilities_output') # path for the combined probabilities (model-level representations)
+    # mkdir_if_missing(probability_path)
+
+    result_path = os.path.join(base_path,'result') # path for the final DeepDILI predictions
+    # mkdir_if_missing(result_path)
+
+    ### run the scripts
+    mold2_knn.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
+    mold2_lr.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
+    mold2_svm.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
+    mold2_rf.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
+    mold2_xgboost.generate_baseClassifiers(features, data, test_data, data_split, name, base_path)
+
+    mold2_validation_predictions_combine.combine_validation_probabilities(base_path, mcc, probability_path, name)
+    mold2_test_predictions_combine.combine_test_probabilities(base_path, mcc, probability_path, name)
+
+    mold2_DeepDILI.dili_prediction(probability_path, name, model_path, result_path)
+
+
+    print("--- %s seconds ---" % (time.time() - start_time))
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Deep DILI')
+    parser.add_argument(
+            '--data_path', 
+            default='.',
+            type=str, help='Data directory')
+    parser.add_argument(
+            '--base_path', 
+            default='./test',
+            type=str, help='base path')
+    parser.add_argument(
+            '--name', 
+            default='test',
+            type=str, help='Any text')
+    args = parser.parse_args()
+    # mkdir_if_missing(args.base_path)
+    os.system("chmod +x {}".format("creat_dir.sh"))
+    os.system("./{} {}".format("creat_dir.sh", args.base_path))
+    main(args.data_path, args.base_path, args.name)
diff --git a/DeepDILI_mold2_simple_version/mold2_DeepDILI.py b/DeepDILI_mold2_simple_version/mold2_DeepDILI.py
@@ -37,8 +37,6 @@
 
 from numpy.random import seed
 seed(1)
-from tensorflow import set_random_seed
-set_random_seed(6)
 
 he_normal = initializers.he_normal()
 
@@ -155,6 +153,6 @@ def dili_prediction(probability_path, var, model_path, result_path):
     reform_result(train_results).to_csv(path3+'/validation_'+col_name2+'.csv')
 
     K.clear_session()
-    tf.reset_default_graph() 
+    tf.compat.v1.reset_default_graph()
 
 print("--- %s seconds ---" % (time.time() - start_time))