diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index ad21f07..f0b34cd 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,14 +1,14 @@ -# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3/.devcontainer/base.Dockerfile - # [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster -ARG VARIANT="3.10-bullseye" +ARG VARIANT=3-bullseye FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} +ENV PYTHONUNBUFFERED 1 + # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10 ARG NODE_VERSION="none" RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi -# [Optional] If your pip requirements rarely change, uncomment this section to add them to the image. +# [Optional] If your requirements rarely change, uncomment this section to add them to the image. # COPY requirements.txt /tmp/pip-tmp/ # RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ # && rm -rf /tmp/pip-tmp @@ -17,5 +17,5 @@ RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/ # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ # && apt-get -y install --no-install-recommends -# [Optional] Uncomment this line to install global node packages. -# RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g " 2>&1 \ No newline at end of file + + diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 34929e3..be1db70 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,19 +1,11 @@ // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: -// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3 +// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/python-3-postgres +// Update the VARIANT arg in docker-compose.yml to pick a Python version { - "name": "Python 3", - "build": { - "dockerfile": "Dockerfile", - "context": "..", - "args": { - // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 - // Append -bullseye or -buster to pin to an OS version. - // Use -bullseye variants on local on arm64/Apple Silicon. - "VARIANT": "3.8", - // Options - "NODE_VERSION": "lts/*" - } - }, + "name": "Python 3 & PostgreSQL", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", // Configure tool-specific properties. "customizations": { @@ -32,7 +24,8 @@ "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", - "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint", + "python.testing.pytestPath": "/usr/local/py-utils/bin/pytest" }, // Add the IDs of extensions you want installed when the container is created. @@ -44,10 +37,11 @@ }, // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], + // This can be used to network with other containers or the host. + // "forwardPorts": [5000, 5432], // Use 'postCreateCommand' to run commands after the container is created. - // "postCreateCommand": "pip3 install --user -r requirements.txt", + // "postCreateCommand": "pip install --user -r requirements.txt", // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "vscode", diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..7485e64 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,46 @@ +version: '3.8' + +services: + app: + container_name: dev-env + build: + context: .. + dockerfile: .devcontainer/Dockerfile + args: + # Update 'VARIANT' to pick a version of Python: 3, 3.10, 3.9, 3.8, 3.7, 3.6 + # Append -bullseye or -buster to pin to an OS version. + # Use -bullseye variants on local arm64/Apple Silicon. + VARIANT: "3.8" + # Optional Node.js version to install + NODE_VERSION: "lts/*" + + volumes: + - ..:/workspace:cached + + # Overrides default command so things don't shut down after the process ends. + command: sleep infinity + + # Runs app on the same network as the database container, allows "forwardPorts" in devcontainer.json function. + network_mode: service:db + # Uncomment the next line to use a non-root user for all processes. + # user: vscode + + env_file: + - ../.dev.env + + # Use "forwardPorts" in **devcontainer.json** to forward an app port locally. + # (Adding the "ports" property to this file will not forward from a Codespace.) + + db: + image: postgres:latest + container_name: dev-db + restart: unless-stopped + volumes: + - postgresql:/var/lib/postgresql/data + env_file: + - ../.dev-db.env + # Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally. + # (Adding the "ports" property to this file will not forward from a Codespace. + +volumes: + postgresql: diff --git a/.env.dev b/.env.dev deleted file mode 100644 index 57ac5c5..0000000 --- a/.env.dev +++ /dev/null @@ -1,9 +0,0 @@ -DEBUG=1 -SECRET_KEY=foo -DJANGO_ALLOWED_HOSTS=localhost 127.0.0.1 [::1] 129.170.70.76 0.0.0.0 -SQL_ENGINE=django.db.backends.postgresql -SQL_DATABASE=chp_api_dev -SQL_USER=chp_api_user -SQL_PASSWORD=chp_api_user -SQL_HOST=db -SQL_PORT=5432 \ No newline at end of file diff --git a/.env.prod b/.env.prod deleted file mode 100644 index 7eb93ee..0000000 --- a/.env.prod +++ /dev/null @@ -1,11 +0,0 @@ -DEBUG=0 -SECRET_KEY=BkbsAreTheBest2020 -DJANGO_ALLOWED_HOSTS=chp.thayer.dartmouth.edu -SQL_ENGINE=django.db.backends.postgresql -SQL_DATABASE=chp_api_prod -SQL_USER=chp_api_user -SQL_PASSWORD=chp_api_user -SQL_HOST=db -SQL_PORT=5432 -DATABASE=postgres -DJANGO_SETTINGS_MODULE=chp_api.settings.production \ No newline at end of file diff --git a/.env.prod.db b/.env.prod.db deleted file mode 100644 index a05584e..0000000 --- a/.env.prod.db +++ /dev/null @@ -1,3 +0,0 @@ -POSTGRES_USER=chp_api_user -POSTGRES_PASSWORD=chp_api_user -POSTGRES_DB=chp_api_prod diff --git a/.env.stage b/.env.stage deleted file mode 100644 index 284f3ad..0000000 --- a/.env.stage +++ /dev/null @@ -1,11 +0,0 @@ -DEBUG=0 -SECRET_KEY=BkbsAreTheBest2020 -DJANGO_ALLOWED_HOSTS=chp-dev.thayer.dartmouth.edu breast.chp-dev.thayer.dartmouth.edu brain.chp-dev.thayer.dartmouth.edu lung.chp-dev.thayer.dartmouth.edu 127.0.0.1 -SQL_ENGINE=django.db.backends.postgresql -SQL_DATABASE=chp_api_prod -SQL_USER=chp_api_user -SQL_PASSWORD=chp_api_user -SQL_HOST=db -SQL_PORT=5432 -DATABASE=postgres -DJANGO_SETTINGS_MODULE=chp_api.settings.staging diff --git a/.env.stage.db b/.env.stage.db deleted file mode 100644 index a05584e..0000000 --- a/.env.stage.db +++ /dev/null @@ -1,3 +0,0 @@ -POSTGRES_USER=chp_api_user -POSTGRES_PASSWORD=chp_api_user -POSTGRES_DB=chp_api_prod diff --git a/.gitignore b/.gitignore index 60e8e5c..77654d9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ +# env files for development +.dev-db.env +.dev.env + +# data files for development +chp.sql chp_db_fixture.json.gz -#deployment-script -deployment-script #SSH Keys id_rsa* @@ -134,3 +138,4 @@ dmypy.json # Pyre type checker .pyre/ +/Dockerfile.dev-db \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index e69de29..7f8ab1d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "gennifer"] + path = gennifer + url = git@github.com:di2ag/gennifer.git diff --git a/.vscode/settings.json b/.vscode/settings.json index fe654df..c49a2b3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,5 @@ { "python.analysis.typeCheckingMode": "off", "workbench.editor.enablePreview": false, + "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python" } \ No newline at end of file diff --git a/Dockerfile.dev b/Dockerfile.dev deleted file mode 100644 index c4df0b6..0000000 --- a/Dockerfile.dev +++ /dev/null @@ -1,104 +0,0 @@ -########### -# BUILDER # -########### - -# first stage of build to pull repos -FROM ubuntu:20.04 as intermediate - -# set work directory -WORKDIR /usr/src/chp_api - -# install git -RUN apt-get update \ - && apt-get install -y git python3-pip python3-dev dos2unix - -RUN git clone --single-branch --branch production https://github.com/di2ag/trapi_model.git -RUN git clone --single-branch --branch production https://github.com/di2ag/reasoner-validator.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_utils.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_look_up.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_learn.git -RUN git clone --single-branch --branch production https://github.com/di2ag/gene-specificity.git - -# lint -RUN pip3 install --upgrade pip -RUN pip3 install flake8 wheel -COPY . . - -# install dependencies -COPY ./requirements.txt . -RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/chp_api/wheels -r requirements.txt - -# gather trapi model wheel -RUN cd trapi_model && python3 setup.py bdist_wheel && cd dist && cp trapi_model-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather reasoner-validator wheel -RUN cd reasoner-validator && python3 setup.py bdist_wheel && cd dist && cp reasoner_validator-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather chp-utils wheel -RUN cd chp_utils && python3 setup.py bdist_wheel && cd dist && cp chp_utils-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_look_up wheel -RUN cd chp_look_up && python3 setup.py bdist_wheel && cd dist && cp chp_look_up-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_learn wheel -RUN cd chp_learn && python3 setup.py bdist_wheel && cd dist && cp chp_learn-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather gene specificity wheel -RUN cd gene-specificity && python3 setup.py bdist_wheel && cd dist && cp gene_specificity-*-py3-none-any.whl /usr/src/chp_api/wheels - -######### -# FINAL # -######### - -#pull official base image -FROM ubuntu:20.04 - -# add app user -RUN groupadd chp_api && useradd -ms /bin/bash -g chp_api chp_api - -# create the appropriate directories -ENV HOME=/home/chp_api -ENV APP_HOME=/home/chp_api/web -RUN mkdir $APP_HOME -RUN mkdir $APP_HOME/staticfiles -WORKDIR $APP_HOME - -# set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 -ENV TZ=America/New_York - -# set ARGs -ARG DEBIAN_FRONTEND=noninterative - -# install dependencies -RUN apt-get update \ - && apt-get install -y python3-pip graphviz openmpi-bin libopenmpi-dev build-essential libssl-dev libffi-dev python3-dev -RUN apt-get install -y libgraphviz-dev python3-pygraphviz -RUN apt-get install -y libpq-dev -RUN apt-get install -y netcat - -# copy repo to new image -COPY --from=intermediate /usr/src/chp_api/wheels /wheels -COPY --from=intermediate /usr/src/chp_api/requirements.txt . -RUN pip3 install --upgrade pip -RUN python3 -m pip install --upgrade pip -RUN pip3 install --no-cache /wheels/* - -# copy entry point -COPY ./entrypoint.prod.sh $APP_HOME - -# copy project -COPY ./chp_api $APP_HOME/chp_api -COPY ./manage.py $APP_HOME -COPY ./dispatcher $APP_HOME/dispatcher -COPY ./gunicorn.config-prod.py $APP_HOME - -# chown all the files to the app user -RUN chown -R chp_api:chp_api $APP_HOME - -# change to the app user -USER chp_api - -# run entrypoint.sh -ENTRYPOINT ["/home/chp_api/web/entrypoint.prod.sh"] \ No newline at end of file diff --git a/Dockerfile.prod b/Dockerfile.prod deleted file mode 100644 index e33236c..0000000 --- a/Dockerfile.prod +++ /dev/null @@ -1,105 +0,0 @@ -########### -# BUILDER # -########### - -# first stage of build to pull repos -FROM ubuntu:20.04 as intermediate - -# set work directory -WORKDIR /usr/src/chp_api - -# install git -RUN apt-get update \ - && apt-get install -y git python3-pip python3-dev dos2unix - -RUN git clone --single-branch --branch production https://github.com/di2ag/trapi_model.git -RUN git clone --single-branch --branch production https://github.com/di2ag/reasoner-validator.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_utils.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_look_up.git -RUN git clone --single-branch --branch production https://github.com/di2ag/chp_learn.git -RUN git clone --single-branch --branch production https://github.com/di2ag/gene-specificity.git - -# lint -RUN pip3 install --upgrade pip -RUN pip3 install flake8 wheel -COPY . . - -# install dependencies -COPY ./requirements.txt . -RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/chp_api/wheels -r requirements.txt - -# gather trapi model wheel -RUN cd trapi_model && python3 setup.py bdist_wheel && cd dist && cp trapi_model-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather reasoner-validator wheel -RUN cd reasoner-validator && python3 setup.py bdist_wheel && cd dist && cp reasoner_validator-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather chp-utils wheel -RUN cd chp_utils && python3 setup.py bdist_wheel && cd dist && cp chp_utils-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_look_up wheel -RUN cd chp_look_up && python3 setup.py bdist_wheel && cd dist && cp chp_look_up-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_learn wheel -RUN cd chp_learn && python3 setup.py bdist_wheel && cd dist && cp chp_learn-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather gene specificity wheel -RUN cd gene-specificity && python3 setup.py bdist_wheel && cd dist && cp gene_specificity-*-py3-none-any.whl /usr/src/chp_api/wheels - -######### -# FINAL # -######### - -#pull official base image -FROM ubuntu:20.04 - -# add app user -RUN groupadd chp_api && useradd -ms /bin/bash -g chp_api chp_api - -# create the appropriate directories -ENV HOME=/home/chp_api -ENV APP_HOME=/home/chp_api/web -RUN mkdir $APP_HOME -RUN mkdir $APP_HOME/staticfiles -WORKDIR $APP_HOME - -# set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 -ENV TZ=America/New_York - -# set ARGs -ARG DEBIAN_FRONTEND=noninterative - -# install dependencies -RUN apt-get update \ - && apt-get install -y python3-pip graphviz openmpi-bin libopenmpi-dev build-essential libssl-dev libffi-dev python3-dev -RUN apt-get install -y libgraphviz-dev python3-pygraphviz -RUN apt-get install -y libpq-dev -RUN apt-get install -y netcat - -# copy repo to new image -COPY --from=intermediate /usr/src/chp_api/wheels /wheels -COPY --from=intermediate /usr/src/chp_api/requirements.txt . -RUN pip3 install --upgrade pip -RUN python3 -m pip install --upgrade pip -RUN pip3 install --no-cache /wheels/* - -# copy entry point -COPY ./entrypoint.prod.sh $APP_HOME - -# copy project -COPY ./chp_api $APP_HOME/chp_api -COPY ./manage.py $APP_HOME -COPY ./dispatcher $APP_HOME/dispatcher -COPY ./gunicorn.config-prod.py $APP_HOME -COPY ./chp_db_fixture.json.gz $APP_HOME - -# chown all the files to the app user -RUN chown -R chp_api:chp_api $APP_HOME - -# change to the app user -USER chp_api - -# run entrypoint.sh -ENTRYPOINT ["/home/chp_api/web/entrypoint.prod.sh"] \ No newline at end of file diff --git a/Dockerfile.stage b/Dockerfile.stage deleted file mode 100644 index 8d3c4e1..0000000 --- a/Dockerfile.stage +++ /dev/null @@ -1,104 +0,0 @@ -########### -# BUILDER # -########### - -# first stage of build to pull repos -FROM ubuntu:20.04 as intermediate - -# set work directory -WORKDIR /usr/src/chp_api - -# install git -RUN apt-get update \ - && apt-get install -y git python3-pip python3-dev dos2unix - -RUN git clone --single-branch --branch staging https://github.com/di2ag/trapi_model.git -RUN git clone --single-branch --branch staging https://github.com/di2ag/reasoner-validator.git -RUN git clone --single-branch --branch staging https://github.com/di2ag/chp_utils.git -RUN git clone --single-branch --branch staging https://github.com/di2ag/chp_look_up.git -RUN git clone --single-branch --branch staging https://github.com/di2ag/chp_learn.git -RUN git clone --single-branch --branch staging https://github.com/di2ag/gene-specificity.git - -# lint -RUN pip3 install --upgrade pip -RUN pip3 install flake8 wheel -COPY . . - -# install dependencies -COPY ./requirements.txt . -RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/chp_api/wheels -r requirements.txt - -# gather trapi model wheel -RUN cd trapi_model && python3 setup.py bdist_wheel && cd dist && cp trapi_model-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather reasoner-validator wheel -RUN cd reasoner-validator && python3 setup.py bdist_wheel && cd dist && cp reasoner_validator-*-py3-none-any.whl /usr/src/chp_api/wheels - -# gather chp_utils wheel -RUN cd chp_utils && python3 setup.py bdist_wheel && cd dist && cp chp_utils-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_look_up wheel -RUN cd chp_look_up && python3 setup.py bdist_wheel && cd dist && cp chp_look_up-*-py3-none-any.whl /usr/src/chp_api/wheels - -#gather chp_learn wheel -RUN cd chp_learn && python3 setup.py bdist_wheel && cd dist && cp chp_learn-*-py3-none-any.whl /usr/src/chp_api/wheels - -#grather gene-specificity wheel -RUN cd gene-specificity && python3 setup.py bdist_wheel && cd dist && cp gene_specificity-*-py3-none-any.whl /usr/src/chp_api/wheels - -######### -# FINAL # -######### - -#pull official base image -FROM ubuntu:20.04 - -# add app user -RUN groupadd chp_api && useradd -ms /bin/bash -g chp_api chp_api - -# create the appropriate directories -ENV HOME=/home/chp_api -ENV APP_HOME=/home/chp_api/web -RUN mkdir $APP_HOME -RUN mkdir $APP_HOME/staticfiles -WORKDIR $APP_HOME - -# set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 -ENV TZ=America/New_York - -# set ARGs -ARG DEBIAN_FRONTEND=noninterative - -# install dependencies -RUN apt-get update \ - && apt-get install -y python3-pip graphviz openmpi-bin libopenmpi-dev build-essential libssl-dev libffi-dev python3-dev -RUN apt-get install -y libgraphviz-dev python3-pygraphviz -RUN apt-get install -y libpq-dev -RUN apt-get install -y netcat - -# copy repo to new image -COPY --from=intermediate /usr/src/chp_api/wheels /wheels -COPY --from=intermediate /usr/src/chp_api/requirements.txt . -RUN pip3 install --upgrade pip -RUN pip3 install --no-cache /wheels/* - -# copy entry point -COPY ./entrypoint.prod.sh $APP_HOME - -# copy project -COPY ./chp_api $APP_HOME/chp_api -COPY ./manage.py $APP_HOME -COPY ./dispatcher $APP_HOME/dispatcher -COPY ./gunicorn.config-stage.py $APP_HOME -COPY ./chp_db_fixture.json.gz $APP_HOME - -# chown all the files to the app user -RUN chown -R chp_api:chp_api $APP_HOME - -# change to the app user -USER chp_api - -# run entrypoint.sh -ENTRYPOINT ["/home/chp_api/web/entrypoint.stage.sh"] \ No newline at end of file diff --git a/README.md b/README.md index a0a2ed5..071a970 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,99 @@ # Connections Hypothesis Provider API Documentation -## Description -Connections Hypothesis Provider (CHP) is a service built by Dartmouth College (PI – Dr. Eugene Santos) and Tufts University (Co-PI – Joseph Gormley) in collaboration with the National Center for Advancing Translational Sciences (NCATS). CHP aims to leverage clinical data along with structured biochemical knowledge to derive a computational representation of pathway structures and molecular components to support human and machine-driven interpretation, enable pathway-based biomarker discovery, and aid in the drug development process. -In its current version, CHP supports queries relating to genetic, therapeutic, and patient clinical features (e.g. tumor staging) contribution toward patient survival, as computed within the context of our test pilot: a robust breast cancer dataset from The Cancer Genome Atlas (TCGA). We are using this as a proving ground for our system’s basic operations as we work to incorporate structured pathway knowledge from Reactome and pathway analysis methods into the tool. - ## Introduction -Our system utilizes the Bayesian Knowledge Base (BKB), which is a directed probabilistic graphical model capable of modeling incomplete, cyclic and heterogenous knowledge. We currently expose a portion of our computational framework as a proving ground to reason over patterns that exist within the TCGA dataset, determine sensitivity of features to the underlying conditional probabilities, and guide hypothesis generation. Querying semantics are of the form P(Target=t | Evidence=e). To this end we’ve explored setting survival time targets specifying mutational profiles and drug treatments as evidence, though this process is generalizable to any TCGA feature type (e.g., tumor staging, gene copy number, age of diagnosis, etc.). -However, as we incorporate pathway knowledge from domain-standard resources like Reactome and overlay biochemical pathway traversal logic, we will extend these querying semantics to derive inferences relating to biochemical mechanisms. The short-term benefits of tacking on this difficult task are to provide mechanism-of-action level analysis over cellular behavior and clinical outcomes. The long-term benefits of this work is to characterize three categories of information and discovery critical to pathway science, pathway components, pathway topology, and pathway dynamics. -Queries are governed by the Translator Reasoner API (TRAPI) which support the Biolink Model ontology. We’ve constructed a TRAPI compliant schema that represents our probabilistic queries and is digestible by our service. Upon receiving TRAPI compliant queries we return a conditional probability pertaining to the query as well as the auditable features our system captured and their overall sensitivity to the conditional probability. These features can be used to guide future exploration of the dataset and be used to lead to novel conclusions over the data. - -## Terms and Definitions -The greater NCATS consortium uses a series of terms (that we have adopted) to convey meaning quickly. A link to those terms and their definitions are available here: https://docs.google.com/spreadsheets/d/1C8hKXacxtQC5UzXI4opQs1r4pBJ_5hqgXrZH_raYQ4w/edit#gid=1581951609 -We extend this list local to our KP (Look, here is an NCATS term right here!) with the following terms: -• Connections Hypothesis Provider – CHP -• The Cancer Genome Atlas – TCGA -• Bayesian Knowledge Base – BKB +Connections Hypothesis Provider (CHP) is a collaborative service developed by Dartmouth College and Tufts University, in partnership with the National Center for Advancing Translational Sciences (NCATS). CHP's mission is to utilize clinical data and structured biochemical knowledge to create computational representations of pathway structures and molecular components. This effort supports both human and machine-driven analysis, enabling pathway-based biomarker discovery and contributing to the drug development process. -## Smart API -CHP is registered with Smart API: https://smart-api.info/ui/855adaa128ce5aa58a091d99e520d396 +Currently, CHP serves as a platform for Gene Regulatory Network (GRN) discovery, allowing researchers to upload their own RNASeq data, or work with pre-existing datasets. Users can analyze, refine, and explore novel gene-to-gene regulatory relationships through our core discovery tool, GenNIFER, a web-based portal featuring state-of-the-art GRN inferencing algorithms. Additionally, the platform integrates with the Translator ecosystem, allowing users to contextualize their findings using existing knowledge sources. -## How To Use -We encourage anyone looking for tooling/instructions, to interface with our API, to the following repository, CHP Client, https://github.com/di2ag/chp_client. CHP Client is a lightweight Python client that interfaces CHP. It is meant to be an easy-to-use wrapper utility to both run and build TRAPI queries that the CHP web service will understand. +Through its integration with the [Knowledge Collaboratory](https://github.com/MaastrichtU-IDS/knowledge-collaboratory) team, GenNIFER also enables researchers to publish their findings back into the Translator ecosystem, facilitating further collaboration and discovery. -Our API is in active developement and is currently following [Translator Reasoner API standards 1.2.0](https://github.com/NCATSTranslator/ReasonerAPI) +This Docker repository contains the necessary build instructions to launch our tooling in support of the CHP API. Specifically, CHP API powers the following: +* [GenNIFER](https://github.com/di2ag/gennifer): our tool for GRN discovery. +* [Tissue-Gene Specificity Tool](https://github.com/di2ag/gene-specificity): our tool for assessing a gene’s expression specificity to a tissue. + +For more specifics about either application, see their respective repository READMEs. -Our API is currently live at: [chp.thayer.dartmouth.edu](http://chp.thayer.dartmouth.edu/) +## Interacting with CHP API +The CHP API provides supporting data as a Knowledge Provider (KP) for the Translator consortium and can be interacted with from our build servers. For a list of knowledge that we support, see our meta knowledge graph. Further details about CHP API can be found in its [SmartAPI](http://smart-api.info/registry?q=412af63e15b73e5a30778aac84ce313f) registration. We also provide examples for how to interact with the individual tools in their own relevant repository. +### Build servers +* Production: https://chp-api.transltr.io +* Testing: https://chp-api.test.transltr.io +* Staging: https://chp-api.ci.transltr.io -## Open Endpoints +### Endpoints * [query](query.md) : `POST /query/` -* [predicates](predicates.md) : `GET /predicates/` -* [curies](curies.md) : `GET /curies/` - -## Other Notable Links -Our roadmap outlining or KP’s milestones and the progression of those milestones: https://github.com/di2ag/Connections-Hypothesis-Provider-Roadmap - -Our NCATS Wiki Page: https://github.com/NCATSTranslator/Translator-All/wiki/Connections-Hypothesis-Provider - -Our CHP Client repository: https://github.com/di2ag/chp_client - -A repository for our reasoning code: https://github.com/di2ag/chp - - -## Contacts -Dr. Eugene Santos (PI): Eugene.Santos.Jr@dartmouth.edu - -Joseph Gormley (Co-PI): jgormley@tuftsmedicalcenter.org - +* [predicates](predicates.md) : `GET /meta_knowledge_graph/` + +### Meta Knowledge Graph +
+ Click to view json example + + ```json + { + "nodes": { + "biolink:Gene": { + "id_prefixes": [ + "ENSEMBL" + ], + "attributes": null + }, + "biolink:GrossAnatomicalStructure": { + "id_prefixes": [ + "UBERON", + "EFO" + ], + "attributes": null + } + }, + "edges": [ + { + "subject": "biolink:Gene", + "predicate": "biolink:expressed_in", + "object": "biolink:GrossAnatomicalStructure", + "qualifiers": null, + "attributes": null, + "knowledge_types": null, + "association": null + }, + { + "subject": "biolink:GrossAnatomicalStructure", + "predicate": "biolink:expresses", + "object": "biolink:Gene", + "qualifiers": null, + "attributes": null, + "knowledge_types": null, + "association": null + }, + { + "subject": "biolink:Gene", + "predicate": "biolink:regulates", + "object": "biolink:Gene", + "qualifiers": null, + "attributes": null, + "knowledge_types": null, + "association": null + }, + { + "subject": "biolink:Gene", + "predicate": "biolink:regulated_by", + "object": "biolink:Gene", + "qualifiers": null, + "attributes": null, + "knowledge_types": null, + "association": null + } + ] +} +``` +
+ +### SmartAPI +CHP is registered with [SmartAPI](http://smart-api.info/registry?q=412af63e15b73e5a30778aac84ce313f). + +## Contact for this code +Gregory Hyde (gregory.m.hyde.th@dartmouth.edu) + +## TRAPI and Biolink +Trapi = 1.5.0 + +Biolink = 4.2.0 diff --git a/chp_api/Dockerfile b/chp_api/Dockerfile new file mode 100644 index 0000000..5c61142 --- /dev/null +++ b/chp_api/Dockerfile @@ -0,0 +1,77 @@ +########### +# BUILDER # +########### + +# first stage of build to pull repos +FROM python:3.9 as intermediate + +# set work directory +WORKDIR /usr/src/chp_api + +RUN git clone --single-branch --branch master https://github.com/di2ag/gene-specificity.git + +# Upgrade pip +RUN pip3 install --upgrade pip + +# Upgrade pip +RUN pip3 install --upgrade pip + +# install dependencies +COPY ./requirements.txt . +RUN pip3 wheel --no-cache-dir --no-deps --wheel-dir /usr/src/chp_api/wheels -r requirements.txt + +#gather gene specificity wheel +RUN cd gene-specificity && python3 setup.py bdist_wheel && cd dist && cp gene_specificity-*-py3-none-any.whl /usr/src/chp_api/wheels + +######### +# FINAL # +######### + +#pull official base image +FROM python:3.9 + +# add app user +RUN groupadd chp_api && useradd -ms /bin/bash -g chp_api chp_api + +# create the appropriate directories +ENV HOME=/home/chp_api +ENV APP_HOME=/home/chp_api/web +RUN mkdir $APP_HOME +RUN mkdir $APP_HOME/staticfiles +WORKDIR $APP_HOME + +# set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 +ENV TZ=America/New_York + +# set ARGs +ARG DEBIAN_FRONTEND=noninterative + +# copy repo to new image +COPY --from=intermediate /usr/src/chp_api/wheels /wheels +COPY --from=intermediate /usr/src/chp_api/requirements.txt . +# Running this command to ensure version 1 of pydantic is being used until reasoner-pydantic is updated. +RUN pip3 install pydantic==1.10.12 +RUN pip3 install --no-cache /wheels/* + +# copy project +COPY . . +#COPY ./chp_api $APP_HOME/chp_api +#COPY ./manage.py $APP_HOME +#COPY ./dispatcher $APP_HOME/dispatcher +#COPY ./gennifer $APP_HOME/gennifer +#COPY ./chp_db_fixture.json.gz $APP_HOME +#COPY ./gunicorn.config.py $APP_HOME + +# set DJANGO_SETTINGS_MODULE environment variable +#ENV DJANGO_SETTINGS_MODULE=chp_api.settings + +# chown all the files to the app user +RUN chown -R chp_api:chp_api $APP_HOME \ + && chmod 700 $APP_HOME/staticfiles + +# change to the app user +USER chp_api + +CMD ["sh", "python manage.py migrate"] diff --git a/chp_api/chp_api/__init__.py b/chp_api/chp_api/__init__.py index e69de29..53f4ccb 100644 --- a/chp_api/chp_api/__init__.py +++ b/chp_api/chp_api/__init__.py @@ -0,0 +1,3 @@ +from .celery import app as celery_app + +__all__ = ("celery_app",) diff --git a/chp_api/chp_api/celery.py b/chp_api/chp_api/celery.py new file mode 100644 index 0000000..79b8df3 --- /dev/null +++ b/chp_api/chp_api/celery.py @@ -0,0 +1,15 @@ +import os +from celery import Celery + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "chp_api.settings") +app = Celery( + "chp_api", + include=['gennifer.tasks'], + ) +app.config_from_object("django.conf:settings", namespace="CELERY") +app.autodiscover_tasks() +app.conf.update({ + "task_routes": { + "create_gennifer_task": {"queue": 'chp_api'} + } + }) diff --git a/chp_api/chp_api/serializers.py b/chp_api/chp_api/serializers.py new file mode 100644 index 0000000..fb8942d --- /dev/null +++ b/chp_api/chp_api/serializers.py @@ -0,0 +1,12 @@ +from rest_framework_simplejwt.serializers import TokenObtainPairSerializer + +class ChpTokenObtainPairSerializer(TokenObtainPairSerializer): + @classmethod + def get_token(cls, user): + token = super().get_token(user) + + # Add custom claims + token['email'] = user.email + token['username'] = user.username + + return token diff --git a/chp_api/chp_api/settings.py b/chp_api/chp_api/settings.py new file mode 100644 index 0000000..3ae79b7 --- /dev/null +++ b/chp_api/chp_api/settings.py @@ -0,0 +1,253 @@ +""" +Base Django settings for chp_api project. + +Generated by 'django-admin startproject' using Django 3.0.7. + +For more information on this file, see +https://docs.djangoproject.com/en/3.0/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/3.0/ref/settings/ +""" +import os +import environ as environ # type: ignore + +from importlib import import_module + + +# Initialise environment variables +env = environ.Env() +environ.Env.read_env() + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = int(env("DEBUG", default=0)) + +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +DATA_UPLOAD_MAX_MEMORY_SIZE = None +REST_FRAMEWORK = { + 'DEFAULT_PARSER_CLASSES': [ + 'rest_framework.parsers.JSONParser', + ], + 'DEFAULT_AUTHENTICATION_CLASSES': ( + #'rest_framework_simplejwt.authentication.JWTAuthentication', + 'oauth2_provider.contrib.rest_framework.OAuth2Authentication', + ), + 'DEFAULT_PERMISSION_CLASSES': ( + 'rest_framework.permissions.IsAuthenticatedOrReadOnly', + ) +} + +AUTHENTICATION_BACKENDS = [ + 'oauth2_provider.backends.OAuth2Backend', + # Uncomment following if you want to access the admin + 'django.contrib.auth.backends.ModelBackend', +] + +OAUTH2_PROVIDER = { + # this is the list of available scopes + 'SCOPES': {'read': 'Read scope', 'write': 'Write scope', 'groups': 'Access to your groups'} +} + +# Cors stuff (must go before installed apps) +CORS_ALLOWED_ORIGINS = [ + 'http://localhost', + 'http://localhost:3000', + ] + +# Application definition +INSTALLED_BASE_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'corsheaders', + 'rest_framework', + 'rest_framework_simplejwt', + 'django_filters', + 'dispatcher.apps.DispatcherConfig', + 'django_extensions', + 'users', + 'oauth2_provider', + #'gennifer', # Need to make into CHP app +] + +INSTALLED_CHP_APPS = [ + 'gene_specificity', + 'gennifer', + ] + +# CHP Versions +VERSIONS = {app_name: app.__version__ for app_name, app in [(app_name, import_module(app_name)) for app_name in INSTALLED_CHP_APPS]} + +# Sets up installed apps relevent to django +INSTALLED_APPS = INSTALLED_BASE_APPS + INSTALLED_CHP_APPS + +MIDDLEWARE = [ + 'corsheaders.middleware.CorsMiddleware', + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'oauth2_provider.middleware.OAuth2TokenMiddleware', +] + +ROOT_URLCONF = 'chp_api.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +# Logging +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'handlers': { + 'console': { + 'class': 'logging.StreamHandler', + }, + }, + 'root': { + 'handlers': ['console'], + 'level': 'WARNING', + }, +} + +WSGI_APPLICATION = 'chp_api.wsgi.application' + +# Password validation +# https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + +# Authorization +AUTH_USER_MODEL='users.User' +LOGIN_URL='/admin/login/' + +# Internationalization +# https://docs.djangoproject.com/en/3.0/topics/i18n/ +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/3.0/howto/static-files/ + +STATIC_URL = '/static/' +STATIC_ROOT = os.path.join(BASE_DIR, "staticfiles") + +# Hosts Configuration +#ROOT_HOSTCONF = 'chp_api.hosts' + +DB_PASSWORD = env("POSTGRES_PASSWORD", default=None) + +if not DB_PASSWORD: + with open(env("POSTGRES_PASSWORD_FILE"), 'r') as db_pwd: + DB_PASSWORD = db_pwd.readline().strip() + +# Database +# https://docs.djangoproject.com/en/3.0/ref/settings/#databases +DATABASES = { + 'default': { + "ENGINE": "django.db.backends.postgresql_psycopg2", + "NAME": env("POSTGRES_DB"), + "USER": env("POSTGRES_USER"), + "PASSWORD": DB_PASSWORD, + "HOST": env("POSTGRES_HOST"), + "PORT": env("POSTGRES_PORT"), + } +} + + +ALLOWED_HOSTS = env("DJANGO_ALLOWED_HOSTS", default=None) +if not ALLOWED_HOSTS: + with open(env("DJANGO_ALLOWED_HOSTS_FILE"), 'r') as ah_file: + ALLOWED_HOSTS = ah_file.readline().strip().split(" ") +else: + ALLOWED_HOSTS = ALLOWED_HOSTS.split(',') + +# SECURITY WARNING: keep the secret key used in production secret! + # Read the secret key from file +SECRET_KEY = env("SECRET_KEY", default=None) +if not SECRET_KEY: + with open(env("SECRET_KEY_FILE"), 'r') as sk_file: + SECRET_KEY = sk_file.readline().strip() + +CSRF_TRUSTED_ORIGINS = env("CSRF_TRUSTED_ORIGINS", default=None) +if not CSRF_TRUSTED_ORIGINS: + with open(env("CSRF_TRUSTED_ORIGINS_FILE"), 'r') as csrf_file: + CSRF_TRUSTED_ORIGINS = csrf_file.readline().strip().split(" ") +else: + CSRF_TRUSTED_ORIGINS = CSRF_TRUSTED_ORIGINS.split(',') + +# Set UN, Email and Password for superuser +DJANGO_SUPERUSER_USERNAME = env("DJANGO_SUPERUSER_USERNAME", default=None) +if not DJANGO_SUPERUSER_USERNAME: + with open(env("DJANGO_SUPERUSER_USERNAME_FILE"), 'r') as dsu_file: + os.environ["DJANGO_SUPERUSER_USERNAME"] = dsu_file.readline().strip() + +DJANGO_SUPERUSER_EMAIL = env("DJANGO_SUPERUSER_EMAIL", default=None) +if not DJANGO_SUPERUSER_EMAIL: + with open(env("DJANGO_SUPERUSER_EMAIL_FILE"), 'r') as dse_file: + os.environ["DJANGO_SUPERUSER_EMAIL"] = dse_file.readline().strip() + +DJANGO_SUPERUSER_PASSWORD = env("DJANGO_SUPERUSER_PASSWORD", default=None) +if not DJANGO_SUPERUSER_PASSWORD: + with open(env("DJANGO_SUPERUSER_PASSWORD_FILE"), 'r') as dsp_file: + os.environ["DJANGO_SUPERUSER_PASSWORD"] = dsp_file.readline().strip() + +# Simple JWT Settings +SIMPLE_JWT = { + "TOKEN_OBTAIN_SERIALIZER": "chp_api.serializers.ChpTokenObtainPairSerializer", + } + +# Celery Settings +CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379") +CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379") + +# Gennifer settings +GENNIFER_ALGORITHM_URLS = [ + "http://pidc:5000", + "http://grisli:5000", + "http://genie3:5000", + "http://grnboost2:5000", + "http://bkb-grn:5000", + ] diff --git a/chp_api/chp_api/settings/base.py b/chp_api/chp_api/settings/base.py deleted file mode 100644 index 9500cfd..0000000 --- a/chp_api/chp_api/settings/base.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Base Django settings for chp_api project. - -Generated by 'django-admin startproject' using Django 3.0.7. - -For more information on this file, see -https://docs.djangoproject.com/en/3.0/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/3.0/ref/settings/ -""" - -import os -from importlib import import_module - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -DATA_UPLOAD_MAX_MEMORY_SIZE = None - -REST_FRAMEWORK = { - 'DEFAULT_PARSER_CLASSES': [ - 'rest_framework.parsers.JSONParser', - ] -} - - -# Application definition -INSTALLED_BASE_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'rest_framework', - 'dispatcher.apps.DispatcherConfig', - 'chp_utils', - 'django_extensions', -] - -INSTALLED_CHP_APPS = [ - 'chp_look_up', - 'chp_learn', - 'gene_specificity', - ] - -OTHER_APPS = [ - 'chp_utils' - ] - -# CHP Versions -VERSIONS = {app_name: app.__version__ for app_name, app in [(app_name, import_module(app_name)) for app_name in INSTALLED_CHP_APPS + OTHER_APPS]} - -# Sets up installed apps relevent to django -INSTALLED_APPS = INSTALLED_BASE_APPS + INSTALLED_CHP_APPS - -MIDDLEWARE = [ - #'django_hosts.middleware.HostsRequestMiddleware', - #'django_hosts.middleware.HostsResponseMiddleware', - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -] - -ROOT_URLCONF = 'dispatcher.urls' - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -# Logging -LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'handlers': { - 'console': { - 'class': 'logging.StreamHandler', - }, - }, - 'root': { - 'handlers': ['console'], - 'level': 'WARNING', - }, -} - -WSGI_APPLICATION = 'chp_api.wsgi.application' - - -# Password validation -# https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, -] - - -# Internationalization -# https://docs.djangoproject.com/en/3.0/topics/i18n/ -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/3.0/howto/static-files/ - -STATIC_URL = '/staticfiles/' -STATIC_ROOT = os.path.join(BASE_DIR, "staticfiles") - -# Hosts Configuration -#ROOT_HOSTCONF = 'chp_api.hosts' diff --git a/chp_api/chp_api/settings/dev.py b/chp_api/chp_api/settings/dev.py deleted file mode 100644 index 9e33ec2..0000000 --- a/chp_api/chp_api/settings/dev.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Local Development Django settings for chp_api project. -""" - -from .base import * -import environ - -# Initialise environment variables -env = environ.Env() -environ.Env.read_env() - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = env("SECRET_KEY") - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True - -ALLOWED_HOSTS = ['localhost', '127.0.0.1'] - -# Database -# https://docs.djangoproject.com/en/3.0/ref/settings/#databases -DATABASES = { - 'default': { - "ENGINE": "django.db.backends.postgresql_psycopg2", - "NAME": env("DATABASE_NAME"), - "USER": env("DATABASE_USER"), - "PASSWORD": env("SQL_PASSWORD"), - "HOST": env("SQL_HOST"), - "PORT": env("SQL_PORT"), - } -} \ No newline at end of file diff --git a/chp_api/chp_api/settings/production.py b/chp_api/chp_api/settings/production.py deleted file mode 100644 index b3b62c0..0000000 --- a/chp_api/chp_api/settings/production.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Production Django settings for chp_api project. -""" - -from .base import * -import environ - -# Initialise environment variables -env = environ.Env() -environ.Env.read_env() - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = env("SECRET_KEY") - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = int(os.environ.get("DEBUG", default=0)) - -ALLOWED_HOSTS = os.environ.get("DJANGO_ALLOWED_HOSTS").split(" ") - -# Database -# https://docs.djangoproject.com/en/3.0/ref/settings/#databases -DATABASES = { - 'default': { - "ENGINE": "django.db.backends.postgresql_psycopg2", - "NAME": env("DATABASE_NAME"), - "USER": env("DATABASE_USER"), - "PASSWORD": env("SQL_PASSWORD"), - "HOST": env("SQL_HOST"), - "PORT": env("SQL_PORT"), - } -} \ No newline at end of file diff --git a/chp_api/chp_api/settings/staging.py b/chp_api/chp_api/settings/staging.py deleted file mode 100644 index 91e4485..0000000 --- a/chp_api/chp_api/settings/staging.py +++ /dev/null @@ -1,33 +0,0 @@ -""" -Staging Django settings for chp_api project. -""" - -from .base import * -import environ - -# Initialise environment variables -env = environ.Env() -environ.Env.read_env() - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = env("SECRET_KEY") - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = int(os.environ.get("DEBUG", default=0)) - -ALLOWED_HOSTS = os.environ.get("DJANGO_ALLOWED_HOSTS").split(" ") - -DATA_UPLOAD_MAX_MEMORY_SIZE = None - -# Database -# https://docs.djangoproject.com/en/3.0/ref/settings/#databases -DATABASES = { - 'default': { - "ENGINE": "django.db.backends.postgresql_psycopg2", - "NAME": env("DATABASE_NAME"), - "USER": env("DATABASE_USER"), - "PASSWORD": env("SQL_PASSWORD"), - "HOST": env("SQL_HOST"), - "PORT": env("SQL_PORT"), - } -} \ No newline at end of file diff --git a/chp_api/chp_api/urls.py b/chp_api/chp_api/urls.py index 148c33e..5a71ddc 100644 --- a/chp_api/chp_api/urls.py +++ b/chp_api/chp_api/urls.py @@ -14,11 +14,23 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ from django.contrib import admin -from django.urls import path +from django.urls import path, include from rest_framework.urlpatterns import format_suffix_patterns -from dispatcher import views +from rest_framework_simplejwt.views import ( + TokenObtainPairView, + TokenRefreshView, +) + urlpatterns = [ + path('admin/', admin.site.urls), path('', include('dispatcher.urls')), + path('gennifer/api/', include('gennifer.urls')), + path('api/token/', TokenObtainPairView.as_view(), name='token_obtain_pair'), + path('api/token/refresh/', TokenRefreshView.as_view(), name='token_refresh'), + path('o/', include('oauth2_provider.urls', namespace='oauth2_provider')), + path('users/', include('users.urls')), ] + + diff --git a/chp_api/dispatcher/admin.py b/chp_api/dispatcher/admin.py index 8c38f3f..06e6480 100644 --- a/chp_api/dispatcher/admin.py +++ b/chp_api/dispatcher/admin.py @@ -1,3 +1,7 @@ from django.contrib import admin -# Register your models here. +from .models import App, ZenodoFile, DispatcherSetting + +admin.site.register(App) +admin.site.register(ZenodoFile) +admin.site.register(DispatcherSetting) diff --git a/chp_api/dispatcher/apps.py b/chp_api/dispatcher/apps.py index 4fe2ce2..a4df4e0 100644 --- a/chp_api/dispatcher/apps.py +++ b/chp_api/dispatcher/apps.py @@ -1,5 +1,12 @@ +import requests_cache + from django.apps import AppConfig + class DispatcherConfig(AppConfig): default_auto_field = 'django.db.models.BigAutoField' name = 'dispatcher' + + # Install a requests cache + requests_cache.install_cache('dispatcher_cache') + diff --git a/chp_api/dispatcher/base.py b/chp_api/dispatcher/base.py index ce85926..92e7108 100644 --- a/chp_api/dispatcher/base.py +++ b/chp_api/dispatcher/base.py @@ -1,20 +1,20 @@ -import logging +import uuid import time +import logging +from .logger import Logger +import itertools from copy import deepcopy from re import A +#from reasoner_validator import TRAPISchemaValidator from django.http import JsonResponse from django.apps import apps from django.conf import settings from importlib import import_module from collections import defaultdict -from chp_utils.trapi_query_processor import BaseQueryProcessor -from chp_utils.curie_database import merge_curies_databases -from trapi_model.meta_knowledge_graph import merge_meta_knowledge_graphs -from trapi_model.query import Query -from trapi_model.biolink import TOOLKIT - -from .models import Transaction +from .models import Transaction, App, DispatcherSetting, Template, TemplateMatch +from reasoner_pydantic import MetaKnowledgeGraph, Message, MetaEdge, MetaNode +from reasoner_pydantic.qgraph import QNode, QEdge # Setup logging logging.addLevelName(25, "NOTE") @@ -24,17 +24,11 @@ def note(self, message, *args, **kwargs): logging.Logger.note = note logger = logging.getLogger(__name__) -# Installed CHP Apps -#CHP_APPS = [ -# "chp.app", -# "chp_look_up.app", -# ] - # Import CHP Apps APPS = [import_module(app+'.app_interface') for app in settings.INSTALLED_CHP_APPS] -class Dispatcher(BaseQueryProcessor): - def __init__(self, request, trapi_version): +class Dispatcher(): + def __init__(self, request, trapi_version, biolink_version): """ Base API Query Processor class used to abstract the processing infrastructure from the views. Inherits from the CHP Utilities Trapi Query Processor which handles node normalization, curie ontology expansion, and semantic operations. @@ -43,59 +37,71 @@ def __init__(self, request, trapi_version): :type request: requests.request """ self.request_data = deepcopy(request.data) - - #self.chp_config, self.passed_subdomain = self.get_app_config(request) + self.biolink_version = biolink_version self.trapi_version = trapi_version - super().__init__(None) - - def get_curies(self): - curies_dbs = [] - for app in APPS: - get_app_curies_fn = getattr(app, 'get_curies') - curies_dbs.append(get_app_curies_fn()) - return merge_curies_databases(curies_dbs) + #self.validator = TRAPISchemaValidator(self.trapi_version) + self.logger = Logger() + + def merge_meta_kg(self, metakg1, metakg2): + new_metakg = MetaKnowledgeGraph.parse_obj({"nodes": [], "edges": []}) + # Merge nodes + new_metakg.nodes = metakg1.nodes + for n, v in metakg2.nodes.items(): + if n in new_metakg.nodes: + id_prefixes = list(set.union(*[set(list(metakg1.nodes[n].id_prefixes)), set(list(metakg2.nodes[n].id_prefixes))])) + new_node = MetaNode.parse_obj({"id_prefixes": id_prefixes}) + new_metakg.nodes[n] = new_node + else: + new_metakg.nodes[n] = MetaNode.parse_obj(v) + # Merge edges + for e in metakg1.edges: + new_metakg.edges.append(e) + for e in metakg2.edges: + new_metakg.edges.append(e) + return new_metakg def get_meta_knowledge_graph(self): - meta_kgs = [] - for app in APPS: - get_app_meta_kg_fn = getattr(app, 'get_meta_knowledge_graph') - meta_kgs.append(get_app_meta_kg_fn()) - return merge_meta_knowledge_graphs(meta_kgs) + # Get current trapi and biolink versions + dispatcher_settings = DispatcherSetting.load() + merged_meta_kg = None + for app, app_name in zip(APPS, settings.INSTALLED_CHP_APPS): + app_db_obj = App.objects.get(name=app_name) + # Load location from uploaded Zenodo files + if app_db_obj.meta_knowledge_graph_zenodo_file: + meta_kg = app_db_obj.meta_knowledge_graph_zenodo_file.load_file(base_url="https://sandbox.zenodo.org/api/records") + # Load default location + else: + get_app_meta_kg_fn = getattr(app, 'get_meta_knowledge_graph') + meta_kg = get_app_meta_kg_fn().to_dict() + meta_kg = MetaKnowledgeGraph.parse_obj(meta_kg) + if merged_meta_kg is None: + merged_meta_kg = meta_kg + else: + merged_meta_kg = self.merge_meta_kg(merged_meta_kg, meta_kg) + return merged_meta_kg def process_invalid_trapi(self, request): invalid_query_json = request.data - invalid_query_json['status'] = 'Bad TRAPI.' - return JsonResponse(invalid_query_json, status=400) - - def process_invalid_workflow(self, request, status_msg): - invalid_query_json = request.data - invalid_query_json['status'] = status_msg + invalid_query_json['status'] = 'Malformed Query' return JsonResponse(invalid_query_json, status=400) def process_request(self, request, trapi_version): - """ Helper function that extracts the query from the message. + """ Helper function that extracts the message from the request data. """ - logger.info('Starting query.') - query = Query.load( - self.trapi_version, - biolink_version=None, - query=request.data - ) - - # Setup query in Base Processor - self.setup_query(query) - - logger.info('Query loaded') - - return query - - def get_app_configs(self, query): + logger.info('Starting query') + message = Message.parse_obj(request.data['message']) + #logger.info('Validating query') + #self.validator.validate(message.to_dict(), 'Message') + logger.info('Message loaded') + return message + + def get_app_configs(self, message): """ Should get a base app configuration for your app or nothing. """ app_configs = [] for app in APPS: get_app_config_fn = getattr(app, 'get_app_config') - app_configs.append(get_app_config_fn(query)) + app_configs.append(get_app_config_fn(message)) return app_configs def get_trapi_interfaces(self, app_configs): @@ -109,166 +115,100 @@ def get_trapi_interfaces(self, app_configs): base_interfaces.append(get_trapi_interface_fn(app_config)) return base_interfaces - def collect_app_queries(self, queries_list_of_lists): - all_queries = [] - for queries in queries_list_of_lists: - if type(queries) == list: - all_queries.extend(queries) - else: - all_queries.append(queries) - return all_queries - - def get_response(self, query): + def extract_message_templates(self, message): + assert len(message.query_graph.edges) == 1, 'CHP apps do not support multihop queries' + subject = None + predicates = [] + for edge_id, q_edge in message.query_graph.edges.items(): + subject = q_edge.subject + if q_edge.predicates is None: + q_edge = QEdge(subject = q_edge.subject, predicates=['biolink:related_to'], object = q_edge.object) + for predicate in q_edge.predicates: + predicates.append(predicate) + subject_categories = [] + object_categories = [] + for node_id, q_node in message.query_graph.nodes.items(): + if q_node.categories is None: + q_node = QNode(categories=['biolink:Entity']) + for category in q_node.categories: + if node_id == subject: + subject_categories.append(category) + else: + object_categories.append(category) + templates = [] + for edge in itertools.product(*[subject_categories, predicates, object_categories]): + meta_edge = MetaEdge(subject=edge[0], predicate=edge[1], object=edge[2]) + templates.append(meta_edge) + return templates + + def get_app_template_matches(self, app_name, templates): + template_matches = [] + for template in templates: + matches = TemplateMatch.objects.filter(template__app_name=app_name, + template__subject = template.subject, + template__object = template.object, + template__predicate = template.predicate) + template_matches.extend(matches) + return template_matches + + def apply_templates_to_message(self, message, matching_templates): + consistent_queries = [] + for template in matching_templates: + consistent_query = message.copy(deep=True) + for edge_id, edge in consistent_query.query_graph.edges.items(): + edge.predicates = [template.predicate] + subject_id = edge.subject + object_id = edge.object + consistent_query.query_graph.nodes[subject_id].categories = [template.subject] + consistent_query.query_graph.nodes[object_id].categories = [template.object] + consistent_queries.append(consistent_query) + return consistent_queries + + def get_response(self, message): """ Main function of the processor that handles primary logic for obtaining a cached or calculated query response. """ - query_copy = query.get_copy() + self.logger.info('Running message.') start_time = time.time() - logger.info('Running query.') - - base_app_configs = self.get_app_configs(query_copy) - base_interfaces = self.get_trapi_interfaces(base_app_configs) - - # Expand - expand_queries = self.expand_batch_query(query) - - # For each app run the normalization and semops pipline - - # Make a copy of the expanded queries for each app - app_queries = [[q.get_copy() for q in expand_queries] for _ in range(len(base_interfaces))] - consistent_app_queries = [] - inconsistent_app_queries = [] - app_normalization_maps = [] - for interface, _expand_queries in zip(base_interfaces, app_queries): - _ex_copy = [] - # Normalize to Preferred Curies - normalization_time = time.time() - normalize_queries, normalization_map = self.normalize_to_preferred( - _expand_queries, - meta_knowledge_graph=interface.get_meta_knowledge_graph(), - with_normalization_map=True, - ) - app_normalization_maps.append(normalization_map) - logger.info('Normalizaion time: {} seconds.'.format(time.time() - normalization_time)) - # Conflate - conflation_time = time.time() - - conflate_queries = self.conflate_categories( - normalize_queries, - conflation_map=interface.get_conflation_map(), - ) - logger.info('Conflation time: {} seconds.'.format(time.time() - conflation_time)) - # Onto Expand - onto_time = time.time() - onto_queries = self.expand_supported_ontological_descendants( - conflate_queries, - curies_database=interface.get_curies(), - ) - logger.info('Ontological expansion time: {} seconds.'.format(time.time() - onto_time)) - # Semantic Ops Expand - semops_time = time.time() - semops_queries = self.expand_with_semantic_ops( - onto_queries, - meta_knowledge_graph=interface.get_meta_knowledge_graph(), - ) - logger.info('Sem ops time: {} seconds.'.format(time.time() - semops_time)) - # Filter out inconsistent queries - filter_time = time.time() - consistent_queries, inconsistent_queries = self.filter_queries_inconsistent_with_meta_knowledge_graph( - semops_queries, - meta_knowledge_graph=interface.get_meta_knowledge_graph(), - with_inconsistent_queries=True - ) - logger.info('Consistency filter time: {} seconds.'.format(time.time() - filter_time)) - - logger.info('Number of consistent queries derived from passed query: {}.'.format(len(consistent_queries))) - consistent_app_queries.append(consistent_queries) - inconsistent_app_queries.append(inconsistent_queries) - # Ensure that there are actually consistent queries that have been extracted - if sum([len(_qs) for _qs in consistent_app_queries]) == 0: - # Add all logs from inconsistent queries of all apps - all_inconsistent_queries = self.collect_app_queries(inconsistent_queries) - query_copy = self.add_logs_from_query_list(query_copy, all_inconsistent_queries) - query_copy.set_status('Bad request. See description.') - query_copy.set_description('Could not extract any supported queries from query graph.') - self.add_transaction(query_copy) - return JsonResponse(query_copy.to_dict()) - # Collect responses from each CHP app - app_responses = [] - app_logs = [] - app_status = [] - app_descriptions = [] - for app, consistent_queries in zip(APPS, consistent_app_queries): - get_app_response_fn = getattr(app, 'get_response') - responses, logs, status, description = get_app_response_fn(consistent_queries) - app_responses.extend(responses) - app_logs.extend(logs) - app_status.append(status) - app_descriptions.append(description) - # Check if any responses came back from any apps - if len(app_responses) == 0: - # Add logs from consistent queries of all apps - all_consistent_queries = self.collect_app_queries(consistent_app_queries) - query_copy = self.add_logs_from_query_list(query_copy, all_consistent_queries) - # Add app level logs - query_copy.logger.add_logs(app_logs) - query_copy.set_status('No results.') - self.add_transaction(query_copy) - return JsonResponse(query_copy.to_dict()) - - # Add responses into database - self.add_transactions(app_responses, app_names=[interface.get_name() for interface in base_interfaces]) - - # Construct merged response - response = self.merge_responses(query_copy, app_responses) - - # Now merge all app level log messages from each app - response.logger.add_logs(app_logs) - - # Log any error messages for apps - for app_name, status, description in zip(APPS, app_status, app_descriptions): - if status != 'Success': - response.warning('CHP App: {} reported a unsuccessful status: {} with description: {}'.format( - app_name, status, description) - ) - - # Unnormalize with each apps normalization map - unnormalized_response = response - for normalization_map in app_normalization_maps: - unnormalized_response = self.undo_normalization(unnormalized_response, normalization_map) - - logger.info('Constructed TRAPI response.') - - logger.info('Responded in {} seconds'.format(time.time() - start_time)) - unnormalized_response.set_status('Success') - - # Add workflow - unnormalized_response.add_workflow("lookup") - - # Set the used biolink version - unnormalized_response.biolink_version = TOOLKIT.get_model_version() - - # Add response to database - self.add_transaction(unnormalized_response) - - return JsonResponse(unnormalized_response.to_dict()) - - def add_logs_from_query_list(self, target_query, query_list): - for query in query_list: - target_query.logger.add_logs(query.logger.to_dict()) - return target_query - - def add_transaction(self, response, chp_app='dispatcher'): + self.logger.info('Getting message templates.') + message_templates = self.extract_message_templates(message) + + for app, app_name in zip(APPS, settings.INSTALLED_CHP_APPS): + self.logger.info('Checking template matches for {}'.format(app_name)) + matching_templates = self.get_app_template_matches(app_name, message_templates) + self.logger.info('Detected {} matches for {}'.format(len(matching_templates), app_name)) + if len(matching_templates) > 0: + self.logger.info('Constructing queries on matching templates') + consistent_app_queries = self.apply_templates_to_message(message, matching_templates) + self.logger.info('Sending {} consistent queries'.format(len(consistent_app_queries))) + get_app_response_fn = getattr(app, 'get_response') + responses = get_app_response_fn(consistent_app_queries, self.logger) + self.logger.info('Received responses from {}'.format(app_name)) + for response in responses: + response.query_graph = message.query_graph + self.add_transaction({'message':response.to_dict()}, str(uuid.uuid4()), 'Success', app_name) + message.update(response) + + message = message.to_dict() + message = {'message':message} + message['logs'] = self.logger.to_dict() + message['trapi_version'] = self.trapi_version + message['biolink_version'] = self.biolink_version + message['status'] = 'Success' + message['id'] = str(uuid.uuid4()) + message['workflow'] = [{"id": "lookup"}] + self.add_transaction(message, message['id'], 'Success', 'dispatcher') + + return JsonResponse(message) + + def add_transaction(self, response, _id, status, app_name): + app_db_obj = App.objects.get(name=app_name) # Save the transaction transaction = Transaction( - id = response.id, - status = response.status, - query = response.to_dict(), + id = _id, + status = status, + query = response, versions = settings.VERSIONS, - chp_app = chp_app, + chp_app = app_db_obj, ) transaction.save() - - def add_transactions(self, responses, app_names): - for response, chp_app in zip(responses, app_names): - self.add_transaction(response, chp_app) diff --git a/chp_api/dispatcher/logger.py b/chp_api/dispatcher/logger.py new file mode 100644 index 0000000..fdd7369 --- /dev/null +++ b/chp_api/dispatcher/logger.py @@ -0,0 +1,66 @@ +import datetime + +class LogEntry(): + def __init__(self, level, message, code=None, timestamp=None): + if timestamp is None: + self.timestamp = datetime.datetime.utcnow().isoformat() + else: + self.timestamp = timestamp + self.level = level + self.message = message + self.code = code + + def to_dict(self): + return { + "timestamp": self.timestamp, + "level": self.level, + "message": self.message, + "code": self.code, + } + + @staticmethod + def load_log(log_dict): + timestamp = log_dict.pop("timestamp") + level = log_dict.pop("level") + message = log_dict.pop("message") + code = log_dict.pop("code") + return LogEntry( + level, + message, + code=code, + timestamp=timestamp, + ) + + +class Logger(): + def __init__(self): + self.logs = [] + + def add_log(self, level, message, code=None): + self.logs.append( + LogEntry( + level, + message, + code, + ) + ) + + def add_logs(self, logs): + for log in logs: + self.logs.append(LogEntry.load_log(log)) + + def info(self, message, code=None): + self.add_log('INFO', message, code) + + def debug(self, message, code=None): + self.add_log('DEBUG', message, code) + + def warning(self, message, code=None): + self.add_log('WARNING', message, code) + + def error(self, message, code=None): + self.add_log('ERROR', message, code) + + def to_dict(self): + logs = [log.to_dict() for log in self.logs] + return logs diff --git a/chp_api/dispatcher/migrations/0004_app_alter_transaction_chp_app.py b/chp_api/dispatcher/migrations/0004_app_alter_transaction_chp_app.py new file mode 100755 index 0000000..c54df21 --- /dev/null +++ b/chp_api/dispatcher/migrations/0004_app_alter_transaction_chp_app.py @@ -0,0 +1,28 @@ +# Generated by Django 4.2 on 2023-04-18 04:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0003_transaction_chp_app'), + ] + + operations = [ + migrations.CreateModel( + name='App', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('curies_file', models.FileField(blank=True, null=True, upload_to='curies_files/')), + ('meta_knowledge_graph_file', models.FileField(blank=True, null=True, upload_to='meta_knowledge_graph_files')), + ], + ), + migrations.AlterField( + model_name='transaction', + name='chp_app', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='dispatcher.app'), + ), + ] diff --git a/chp_api/dispatcher/migrations/0005_zenodofile_remove_app_curies_file_and_more.py b/chp_api/dispatcher/migrations/0005_zenodofile_remove_app_curies_file_and_more.py new file mode 100755 index 0000000..41fd227 --- /dev/null +++ b/chp_api/dispatcher/migrations/0005_zenodofile_remove_app_curies_file_and_more.py @@ -0,0 +1,40 @@ +# Generated by Django 4.2 on 2023-04-18 19:28 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0004_app_alter_transaction_chp_app'), + ] + + operations = [ + migrations.CreateModel( + name='ZenodoFile', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('zenodo_id', models.CharField(max_length=128)), + ('file_key', models.CharField(max_length=128)), + ], + ), + migrations.RemoveField( + model_name='app', + name='curies_file', + ), + migrations.RemoveField( + model_name='app', + name='meta_knowledge_graph_file', + ), + migrations.AddField( + model_name='app', + name='curies_zenodo_file', + field=models.OneToOneField(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='curies_zenodo_file', to='dispatcher.zenodofile'), + ), + migrations.AddField( + model_name='app', + name='meta_knowledge_graph_zenodo_file', + field=models.OneToOneField(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='meta_knowledge_graph_zenodo_file', to='dispatcher.zenodofile'), + ), + ] diff --git a/chp_api/dispatcher/migrations/0006_dispatchersettings.py b/chp_api/dispatcher/migrations/0006_dispatchersettings.py new file mode 100755 index 0000000..7b57b18 --- /dev/null +++ b/chp_api/dispatcher/migrations/0006_dispatchersettings.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2 on 2023-05-01 16:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0005_zenodofile_remove_app_curies_file_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DispatcherSettings', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('trapi_version', models.CharField(default='1.4', max_length=28)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/chp_api/dispatcher/migrations/0007_template_templatematch.py b/chp_api/dispatcher/migrations/0007_template_templatematch.py new file mode 100755 index 0000000..f67e43f --- /dev/null +++ b/chp_api/dispatcher/migrations/0007_template_templatematch.py @@ -0,0 +1,34 @@ +# Generated by Django 4.2.1 on 2023-05-25 19:53 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0006_dispatchersettings'), + ] + + operations = [ + migrations.CreateModel( + name='Template', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('app_name', models.CharField(max_length=128)), + ('subject', models.CharField(max_length=128)), + ('object', models.CharField(max_length=128)), + ('predicate', models.CharField(max_length=128)), + ], + ), + migrations.CreateModel( + name='TemplateMatch', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('subject', models.CharField(max_length=128)), + ('object', models.CharField(max_length=128)), + ('predicate', models.CharField(max_length=128)), + ('template', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='dispatcher.template')), + ], + ), + ] diff --git a/chp_api/dispatcher/migrations/0008_dispatchersettings_sri_node_normalizer_baseurl.py b/chp_api/dispatcher/migrations/0008_dispatchersettings_sri_node_normalizer_baseurl.py new file mode 100755 index 0000000..9a69512 --- /dev/null +++ b/chp_api/dispatcher/migrations/0008_dispatchersettings_sri_node_normalizer_baseurl.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-29 22:26 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0007_template_templatematch'), + ] + + operations = [ + migrations.AddField( + model_name='dispatchersettings', + name='sri_node_normalizer_baseurl', + field=models.URLField(default='https://nodenormalization-sri.renci.org', max_length=128), + ), + ] diff --git a/chp_api/dispatcher/migrations/0009_rename_dispatchersettings_dispatchersetting.py b/chp_api/dispatcher/migrations/0009_rename_dispatchersettings_dispatchersetting.py new file mode 100755 index 0000000..8f5d55f --- /dev/null +++ b/chp_api/dispatcher/migrations/0009_rename_dispatchersettings_dispatchersetting.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.1 on 2023-05-29 23:32 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0008_dispatchersettings_sri_node_normalizer_baseurl'), + ] + + operations = [ + migrations.RenameModel( + old_name='DispatcherSettings', + new_name='DispatcherSetting', + ), + ] diff --git a/chp_api/dispatcher/migrations/0010_alter_dispatchersetting_trapi_version.py b/chp_api/dispatcher/migrations/0010_alter_dispatchersetting_trapi_version.py new file mode 100644 index 0000000..1952c00 --- /dev/null +++ b/chp_api/dispatcher/migrations/0010_alter_dispatchersetting_trapi_version.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.11 on 2024-05-08 22:06 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('dispatcher', '0009_rename_dispatchersettings_dispatchersetting'), + ] + + operations = [ + migrations.AlterField( + model_name='dispatchersetting', + name='trapi_version', + field=models.CharField(default='1.5', max_length=28), + ), + ] diff --git a/chp_api/dispatcher/models.py b/chp_api/dispatcher/models.py index 28e69ac..9b62d73 100644 --- a/chp_api/dispatcher/models.py +++ b/chp_api/dispatcher/models.py @@ -1,10 +1,78 @@ +import os +import json +import requests from django.db import models + +class ZenodoFile(models.Model): + zenodo_id = models.CharField(max_length=128) + file_key = models.CharField(max_length=128) + + def __str__(self): + return f'{self.zenodo_id}/{self.file_key}' + + def get_record(self): + return requests.get(f"https://zenodo.org/api/records/{self.zenodo_id}").json() + + def load_file(self, base_url="https://zenodo.org/api/records"): + r = requests.get(f"{base_url}/{self.zenodo_id}").json() + files = {f["key"]: f for f in r["files"]} + f = files[self.file_key] + download_link = f["links"]["self"] + file_type = f["type"] + if file_type == 'json': + return requests.get(download_link).json() + raise NotImplementedError(f'File type of: {ext} is not implemented.') + +class App(models.Model): + name = models.CharField(max_length=128) + curies_zenodo_file = models.OneToOneField(ZenodoFile, on_delete=models.CASCADE, null=True, blank=True, related_name='curies_zenodo_file') + meta_knowledge_graph_zenodo_file = models.OneToOneField(ZenodoFile, on_delete=models.CASCADE, null=True, blank=True, related_name='meta_knowledge_graph_zenodo_file') + + def __str__(self): + return self.name + +class Template(models.Model): + app_name = models.CharField(max_length=128) + subject = models.CharField(max_length=128) + object = models.CharField(max_length=128) + predicate = models.CharField(max_length=128) + +class TemplateMatch(models.Model): + template = models.ForeignKey(Template, on_delete=models.CASCADE) + subject = models.CharField(max_length=128) + object = models.CharField(max_length=128) + predicate = models.CharField(max_length=128) + class Transaction(models.Model): id = models.CharField(max_length=100, primary_key=True) date_time = models.DateTimeField(auto_now=True) query = models.JSONField(default=dict) status = models.CharField(max_length=100, default="", null=True) versions = models.JSONField(default=dict) - chp_app = models.CharField(max_length=128, null=True) + chp_app = models.ForeignKey(App, on_delete=models.CASCADE, null=True, blank=True) + + +class Singleton(models.Model): + + class Meta: + abstract = True + + def save(self, *args, **kwargs): + self.pk = 1 + super(Singleton, self).save(*args, **kwargs) + + def delete(self, *args, **kwargs): + pass + + @classmethod + def load(cls): + obj, _ = cls.objects.get_or_create(pk=1) + return obj + +class DispatcherSetting(Singleton): + trapi_version = models.CharField(max_length=28, default='1.5') + sri_node_normalizer_baseurl = models.URLField(max_length=128, default='https://nodenormalization-sri.renci.org') + def __str__(self): + return 'settings' diff --git a/chp_api/dispatcher/permissions.py b/chp_api/dispatcher/permissions.py new file mode 100644 index 0000000..7ffebf9 --- /dev/null +++ b/chp_api/dispatcher/permissions.py @@ -0,0 +1,10 @@ +from rest_framework import permissions + + +class CustomQueryPostPermission(permissions.BasePermission): + """ + Allows the query POST endpoint to work without any permissions. + """ + + def has_permission(self, request, view): + return True diff --git a/chp_api/chp_api/settings/__init__.py b/chp_api/dispatcher/scripts/__init__.py similarity index 100% rename from chp_api/chp_api/settings/__init__.py rename to chp_api/dispatcher/scripts/__init__.py diff --git a/chp_api/dispatcher/scripts/gene_spec_curie_templater.py b/chp_api/dispatcher/scripts/gene_spec_curie_templater.py new file mode 100644 index 0000000..ddeea57 --- /dev/null +++ b/chp_api/dispatcher/scripts/gene_spec_curie_templater.py @@ -0,0 +1,78 @@ +import tqdm +import json +import requests +from collections import defaultdict +from gene_specificity.models import CurieTemplate, CurieTemplateMatch, GeneToTissue, TissueToGene + +CHUNK_SIZE = 500 + +def _get_ascendants(curies, category): + mapping = defaultdict(set) + # map curie with curie + for curie in curies: + mapping[curie].add(curie) + if category == 'biolink:Gene': + return dict(mapping) + for i in tqdm.tqdm(range(0, len(curies), CHUNK_SIZE), desc='Getting ancestors in chunks of size {}'.format(CHUNK_SIZE)): + curie_subset = curies[i:i+CHUNK_SIZE] + query_graph = { + "nodes": { + "n0": { + "categories":[category] + }, + "n1": { + "ids": curie_subset + }, + }, + "edges": { + "e0": { + "subject": "n1", + "object": "n0", + "predicates": ["biolink:part_of", "biolink:subclass_of"], + } + } + } + query = { + "message": { + "query_graph": query_graph, + } + } + url = 'https://ontology-kp.apps.renci.org/query' + r = requests.post(url, json=query, timeout=1000) + answer = json.loads(r.content) + for edge_id, edge in answer['message']['knowledge_graph']['edges'].items(): + subject = edge['subject'] + object = edge['object'] + mapping[object].add(subject) + return dict(mapping) + + +def run(): + gene_objects = GeneToTissue.objects.all() + tissue_objects = TissueToGene.objects.all() + gene_curies = set() + for gene_object in gene_objects: + gene_curies.add(gene_object.gene_id) + tissue_curies = set() + for tissue_object in tissue_objects: + tissue_curies.add(tissue_object.tissue_id) + gene_ascendants = _get_ascendants(list(gene_curies), 'biolink:Gene') + tissue_ascendants = _get_ascendants(list(tissue_curies), 'biolink:GrossAnatomicalStructure') + + CurieTemplate.objects.all().delete() + CurieTemplateMatch.objects.all().delete() + + for gene_ancestor, handled_gene_descendants in gene_ascendants.items(): + curie_template = CurieTemplate(curie=gene_ancestor) + curie_template.save() + for handled_gene_descendant in handled_gene_descendants: + curie_template_match = CurieTemplateMatch(curie_template=curie_template, + curie=handled_gene_descendant) + curie_template_match.save() + for tissue_ancestor, handled_tissue_descendants in tissue_ascendants.items(): + curie_template = CurieTemplate(curie=tissue_ancestor) + curie_template.save() + for handled_tissue_descendant in handled_tissue_descendants: + curie_template_match = CurieTemplateMatch(curie_template=curie_template, + curie=handled_tissue_descendant) + curie_template_match.save() diff --git a/chp_api/dispatcher/scripts/load_db_apps.py b/chp_api/dispatcher/scripts/load_db_apps.py new file mode 100644 index 0000000..a340d3e --- /dev/null +++ b/chp_api/dispatcher/scripts/load_db_apps.py @@ -0,0 +1,15 @@ +from django.conf import settings + +from ..models import App + + +def run(): + for app_name in settings.INSTALLED_CHP_APPS: + app_db_obj, created = App.objects.get_or_create(name=app_name) + if created: + app_db_obj.save() + + # Create a dummy app for the dispatcher + app_db_obj, created = App.objects.get_or_create(name='dispatcher') + if created: + app_db_obj.save() diff --git a/chp_api/dispatcher/scripts/populate_gene_spec.py b/chp_api/dispatcher/scripts/populate_gene_spec.py new file mode 100644 index 0000000..dfe8a48 --- /dev/null +++ b/chp_api/dispatcher/scripts/populate_gene_spec.py @@ -0,0 +1,41 @@ +import json +from gene_specificity.models import GeneToTissue, TissueToGene + + +def run(): + max_count = 20 + p_val_thresh = 0.05 + + with open('gene_to_tissue.json', 'r') as f: + gene_to_tissue = json.load(f) + + for gene_id, tissue_dict in gene_to_tissue.items(): + i = 0 + for tissue_id, data_obj in tissue_dict.items(): + if i == max_count: + break + spec_val = data_obj['spec'] + norm_spec_val = data_obj['norm_spec'] + p_val = data_obj['p_val'] + if p_val > p_val_thresh: + break + gtt = GeneToTissue(gene_id = gene_id, tissue_id = tissue_id, spec = spec_val, norm_spec = norm_spec_val, p_val = p_val) + gtt.save() + i += 1 + + with open('tissue_to_gene.json', 'r') as f: + tissue_to_gene = json.load(f) + + for tissue_id, gene_dict in tissue_to_gene.items(): + i = 0 + for gene_id, data_obj in gene_dict.items(): + if i == max_count: + break + spec_val = data_obj['spec'] + norm_spec_val = data_obj['norm_spec'] + p_val = data_obj['p_val'] + if p_val > p_val_thresh: + break + ttg = TissueToGene(tissue_id = tissue_id, gene_id = gene_id, spec = spec_val, norm_spec = norm_spec_val, p_val = p_val) + ttg.save() + i += 1 diff --git a/chp_api/dispatcher/scripts/templater.py b/chp_api/dispatcher/scripts/templater.py new file mode 100644 index 0000000..3c96f42 --- /dev/null +++ b/chp_api/dispatcher/scripts/templater.py @@ -0,0 +1,67 @@ +import itertools +from bmt import Toolkit +from importlib import import_module +from collections import defaultdict +from django.core.management.base import BaseCommand +from reasoner_pydantic import MetaKnowledgeGraph, MetaEdge +from django.conf import settings +from ..models import App, Template, TemplateMatch + +APPS = [import_module(app+'.app_interface') for app in settings.INSTALLED_CHP_APPS] +TK = Toolkit() + + +def _collect_metakgs_by_app(): + # Collect each app's meta kg + app_to_meta_kg = dict() + for app, app_name in zip(APPS, settings.INSTALLED_CHP_APPS): + app_db_obj = App.objects.get(name=app_name) + if app_db_obj.meta_knowledge_graph_zenodo_file: + meta_kg = app_db_obj.meta_knowledge_graph_zenodo_file.load_file(base_url="https://sandbox.zenodo.org/api/records") + # Load default location + else: + get_app_meta_kg_fn = getattr(app, 'get_meta_knowledge_graph') + meta_kg = get_app_meta_kg_fn() + meta_kg = MetaKnowledgeGraph.parse_obj(meta_kg.to_dict()) + app_to_meta_kg[app_name] = meta_kg + return app_to_meta_kg + +def _build_app_templates(meta_kg): + matcher = defaultdict(set) + for meta_edge in meta_kg.edges: + subject_ancestors = TK.get_ancestors(meta_edge.subject, reflexive=True, mixin=False, formatted=True) + predicate_ancestors = TK.get_ancestors(meta_edge.predicate, reflexive=True, mixin=False, formatted=True) + object_ancestors = TK.get_ancestors(meta_edge.object, reflexive=True, mixin=False, formatted=True) + for edge in itertools.product(*[subject_ancestors, predicate_ancestors, object_ancestors]): + template_meta_edge = MetaEdge(subject=edge[0], predicate=edge[1], object=edge[2]) + matcher[template_meta_edge].add(meta_edge) + return dict(matcher) + +def run(): + app_to_meta_kg = _collect_metakgs_by_app() + app_to_templates = dict() + for app_name, meta_kg in app_to_meta_kg.items(): + app_to_templates[app_name] = _build_app_templates(meta_kg) + Template.objects.all().delete() + TemplateMatch.objects.all().delete() + + # Populate Templater + for app_name, app_templates in app_to_templates.items(): + for app_template, app_template_matches in app_templates.items(): + template = Template(app_name = app_name, + subject = app_template.subject, + object = app_template.object, + predicate = app_template.predicate) + template.save() + for app_template_match in app_template_matches: + template_match = TemplateMatch(template=template, + subject = app_template_match.subject, + object = app_template_match.object, + predicate = app_template_match.predicate) + template_match.save() + + + + + + diff --git a/chp_api/dispatcher/templates.py.save b/chp_api/dispatcher/templates.py.save new file mode 100644 index 0000000..f4de3a8 --- /dev/null +++ b/chp_api/dispatcher/templates.py.save @@ -0,0 +1,2 @@ +8 85.4% - 396072s +H 0 0 83.0000000 12.25038 diff --git a/chp_api/dispatcher/urls.py b/chp_api/dispatcher/urls.py index f339597..24e0751 100644 --- a/chp_api/dispatcher/urls.py +++ b/chp_api/dispatcher/urls.py @@ -23,16 +23,8 @@ path('query/', views.query.as_view()), path('query', views.query.as_view()), path('meta_knowledge_graph/', views.meta_knowledge_graph.as_view()), - path('curies/', views.curies.as_view()), + path('meta_knowledge_graph', views.meta_knowledge_graph.as_view()), path('versions/', views.versions.as_view()), - path('v1.1/query/', views.query.as_view(trapi_version='1.1')), - path('v1.1/meta_knowledge_graph/', views.meta_knowledge_graph.as_view()), - path('v1.1/curies/', views.curies.as_view(trapi_version='1.1')), - path('v1.1/versions/', views.versions.as_view(trapi_version='1.1')), - path('v1.2/query/', views.query.as_view(trapi_version='1.2')), - path('v1.2/meta_knowledge_graph/', views.meta_knowledge_graph.as_view(trapi_version='1.2')), - path('v1.2/curies/', views.curies.as_view(trapi_version='1.2')), - path('v1.2/versions/', views.versions.as_view(trapi_version='1.2')), path('transactions/', views.TransactionList.as_view(), name='transaction-list'), path('recent/', views.RecentTransactionList.as_view(), name='recent-transaction-list'), path('transactions//', views.TransactionDetail.as_view(), name='transactions-detail') diff --git a/chp_api/dispatcher/views.py b/chp_api/dispatcher/views.py index 3a26e38..413f6c2 100644 --- a/chp_api/dispatcher/views.py +++ b/chp_api/dispatcher/views.py @@ -1,12 +1,13 @@ """ CHP Core API Views """ -from jsonschema import ValidationError from copy import deepcopy from datetime import datetime, timedelta +from bmt import Toolkit from .base import Dispatcher -from .models import Transaction +from .models import Transaction, DispatcherSetting from .serializers import TransactionListSerializer, TransactionDetailSerializer +from .permissions import CustomQueryPostPermission from django.http import HttpResponse, JsonResponse from django.shortcuts import get_object_or_404 @@ -16,69 +17,66 @@ from rest_framework import mixins from rest_framework import generics +TOOLKIT = Toolkit() class query(APIView): - trapi_version = '1.2' - def __init__(self, trapi_version='1.2', **kwargs): - self.trapi_version = trapi_version - super(query, self).__init__(**kwargs) - + permission_classes = [CustomQueryPostPermission] + def post(self, request): + # Get current trapi and biolink versions + dispatcher_settings = DispatcherSetting.load() + if request.method == 'POST': # Initialize Dispatcher - dispatcher = Dispatcher(request, self.trapi_version) + dispatcher = Dispatcher( + request, + dispatcher_settings.trapi_version, + TOOLKIT.get_model_version() + ) # Process Query - query = None try: - query = dispatcher.process_request(request, trapi_version=self.trapi_version) + message = dispatcher.process_request( + request, + trapi_version=dispatcher_settings.trapi_version, + ) except Exception as e: if 'Workflow Error' in str(e): return dispatcher.process_invalid_workflow(request, str(e)) else: return dispatcher.process_invalid_trapi(request) - # Return responses - return dispatcher.get_response(query) - -class curies(APIView): - trapi_version = '1.2' - def __init__(self, trapi_version='1.2', **kwargs): - self.trapi_version = trapi_version - super(curies, self).__init__(**kwargs) - - def get(self, request): - if request.method == 'GET': - # Initialize dispatcher - dispatcher = Dispatcher(request, self.trapi_version) - - # Get all chp app curies - curies_db = dispatcher.get_curies() - return JsonResponse(curies_db) + return dispatcher.get_response(message) class meta_knowledge_graph(APIView): - trapi_version = '1.2' - def __init__(self, trapi_version='1.2', **kwargs): - self.trapi_version = trapi_version - super(meta_knowledge_graph, self).__init__(**kwargs) - + def get(self, request): + # Get current trapi and biolink versions + dispatcher_settings = DispatcherSetting.load() + if request.method == 'GET': # Initialize Dispatcher - dispatcher = Dispatcher(request, self.trapi_version) + dispatcher = Dispatcher( + request, + dispatcher_settings.trapi_version, + TOOLKIT.get_model_version() + ) # Get merged meta KG meta_knowledge_graph = dispatcher.get_meta_knowledge_graph() return JsonResponse(meta_knowledge_graph.to_dict()) class versions(APIView): - trapi_version = '1.2' - def __init__(self, trapi_version='1.2', **kwargs): - self.trapi_version = trapi_version - super(version, self).__init__(**kwargs) def get(self, request): + # Get current trapi and biolink versions + dispatcher_settings = DispatcherSetting.load() + if request.method == 'GET': # Initialize Dispatcher - dispatcher = Dispatcher(request, self.trapi_version) + dispatcher = Dispatcher( + request, + dispatcher_settings.trapi_version, + TOOLKIT.get_model_version() + ) return JsonResponse(dispatcher.get_versions()) class TransactionList(mixins.ListModelMixin, generics.GenericAPIView): diff --git a/chp_api/dispatcher/zenodo.py b/chp_api/dispatcher/zenodo.py new file mode 100644 index 0000000..705d7f6 --- /dev/null +++ b/chp_api/dispatcher/zenodo.py @@ -0,0 +1,29 @@ +import os +import json +import requests + +def zenodo_get(zenodo_id, file_key, file_type='infer'): + """ This function will download the requested Zenodo file into memory. + + Args: + :param zenodo_id: The string id for the Zenodo file. For example, if the Zenodo url is: https://zenodo.org/record/1184524#.ZD7aF_bML-g, + then the zenodo_id is: 1184524. + :type zenodo_id: str + :param file_key: This is a string to the file_key in the Zenodo bucket or in the zenodo record. + :type file_key: string + + Kwargs: + :param file_type: The file type of the hosted Zenodo file. If inferred, will try to infer type from file extension. + """ + r = requests.get(f"https://zenodo.org/api/records/{zenodo_id}").json() + files = {f[key]: f for f in r["files"]} + f = files[file_key] + download_link = f["links"]["self"] + if file_type == 'infer': + file_type = f["type"] + if file_type == 'json': + return requests.get(download_link).json() + raise NotImplementedError(f'File type of: {ext} is not implemented.') + + + diff --git a/chp_api/gennifer/__init__.py b/chp_api/gennifer/__init__.py new file mode 100644 index 0000000..b8023d8 --- /dev/null +++ b/chp_api/gennifer/__init__.py @@ -0,0 +1 @@ +__version__ = '0.0.1' diff --git a/chp_api/gennifer/_version.py b/chp_api/gennifer/_version.py new file mode 100644 index 0000000..1f356cc --- /dev/null +++ b/chp_api/gennifer/_version.py @@ -0,0 +1 @@ +__version__ = '1.0.0' diff --git a/chp_api/gennifer/admin.py b/chp_api/gennifer/admin.py new file mode 100644 index 0000000..c50b8c4 --- /dev/null +++ b/chp_api/gennifer/admin.py @@ -0,0 +1,15 @@ +from django.contrib import admin + +from .models import Algorithm, Dataset, Study, Task, Result, Gene, UserAnalysisSession, AlgorithmInstance, Hyperparameter, Annotation, Annotated + +admin.site.register(Algorithm) +admin.site.register(AlgorithmInstance) +admin.site.register(Hyperparameter) +admin.site.register(Dataset) +admin.site.register(Study) +admin.site.register(Task) +admin.site.register(Result) +admin.site.register(Gene) +admin.site.register(UserAnalysisSession) +admin.site.register(Annotation) +admin.site.register(Annotated) \ No newline at end of file diff --git a/docker-compose.local.yml b/chp_api/gennifer/algorithm-loader.py similarity index 100% rename from docker-compose.local.yml rename to chp_api/gennifer/algorithm-loader.py diff --git a/chp_api/gennifer/app_interface.py b/chp_api/gennifer/app_interface.py new file mode 100644 index 0000000..49ab30f --- /dev/null +++ b/chp_api/gennifer/app_interface.py @@ -0,0 +1,27 @@ +from asyncio import constants +from .trapi_interface import TrapiInterface +from .apps import GenniferConfig +from reasoner_pydantic import MetaKnowledgeGraph, Message +from typing import TYPE_CHECKING, Union, List + +def get_app_config(message: Union[Message, None]) -> GenniferConfig: + return GenniferConfig + + +def get_trapi_interface(get_app_config: GenniferConfig = get_app_config(None)): + return TrapiInterface(trapi_version='1.5') + + +def get_meta_knowledge_graph() -> MetaKnowledgeGraph: + interface: TrapiInterface = get_trapi_interface() + return interface.get_meta_knowledge_graph() + + +def get_response(consistent_queries: List[Message], logger): + """ Should return app responses plus app_logs, status, and description information.""" + responses = [] + interface = get_trapi_interface() + for consistent_query in consistent_queries: + response = interface.get_response(consistent_query, logger) + responses.append(response) + return responses diff --git a/chp_api/gennifer/app_meta_data/conflation_map.json b/chp_api/gennifer/app_meta_data/conflation_map.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/chp_api/gennifer/app_meta_data/conflation_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/chp_api/gennifer/app_meta_data/epc.json b/chp_api/gennifer/app_meta_data/epc.json new file mode 100644 index 0000000..63a13b9 --- /dev/null +++ b/chp_api/gennifer/app_meta_data/epc.json @@ -0,0 +1,11 @@ +[ + { + "attribute_type_id": "biolink:primary_knowledge_source", + "original_attribute_name": null, + "value": "infores:connections-hypothesis", + "value_type_id": "biolink:InformationResource", + "attribute_source": "infores:connections-hypothesis", + "value_url": "http://chp.thayer.dartmouth.edu", + "description": "The Connections Hypothesis Provider from NCATS Translator." + } +] diff --git a/chp_api/gennifer/app_meta_data/meta_knowledge_graph.json b/chp_api/gennifer/app_meta_data/meta_knowledge_graph.json new file mode 100644 index 0000000..41ffac5 --- /dev/null +++ b/chp_api/gennifer/app_meta_data/meta_knowledge_graph.json @@ -0,0 +1,21 @@ +{ + "nodes": { + "biolink:Gene": { + "id_prefixes": [ + "ENSEMBL" + ] + } + }, + "edges": [ + { + "subject": "biolink:Gene", + "object": "biolink:Gene", + "predicate": "biolink:regulates" + }, + { + "subject": "biolink:Gene", + "object": "biolink:Gene", + "predicate": "biolink:regulated_by" + } + ] +} diff --git a/chp_api/gennifer/apps.py b/chp_api/gennifer/apps.py new file mode 100644 index 0000000..2967f54 --- /dev/null +++ b/chp_api/gennifer/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class GenniferConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'gennifer' diff --git a/chp_api/gennifer/migrations/0001_initial.py b/chp_api/gennifer/migrations/0001_initial.py new file mode 100755 index 0000000..b48d24a --- /dev/null +++ b/chp_api/gennifer/migrations/0001_initial.py @@ -0,0 +1,72 @@ +# Generated by Django 4.2.1 on 2023-05-29 22:28 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Algorithm', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('run_url', models.URLField(max_length=128)), + ], + ), + migrations.CreateModel( + name='Dataset', + fields=[ + ('title', models.CharField(max_length=128)), + ('zenodo_id', models.CharField(max_length=128, primary_key=True, serialize=False)), + ('doi', models.CharField(max_length=128)), + ('description', models.TextField(blank=True, null=True)), + ('upload_user', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + migrations.CreateModel( + name='Gene', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('curie', models.CharField(max_length=128)), + ('variant', models.TextField(blank=True, null=True)), + ], + ), + migrations.CreateModel( + name='InferenceStudy', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('max_study_edge_weight', models.FloatField(null=True)), + ('min_study_edge_weight', models.FloatField(null=True)), + ('avg_study_edge_weight', models.FloatField(null=True)), + ('std_study_edge_weight', models.FloatField(null=True)), + ('is_public', models.BooleanField(default=False)), + ('status', models.CharField(max_length=10)), + ('error_message', models.TextField(blank=True, null=True)), + ('algorithm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='studies', to='gennifer.algorithm')), + ('dataset', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='studies', to='gennifer.dataset')), + ('user', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='studies', to=settings.AUTH_USER_MODEL)), + ], + ), + migrations.CreateModel( + name='InferenceResult', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('edge_weight', models.FloatField()), + ('is_public', models.BooleanField(default=False)), + ('study', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='results', to='gennifer.inferencestudy')), + ('target', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inference_result_target', to='gennifer.gene')), + ('tf', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inference_result_tf', to='gennifer.gene')), + ], + ), + ] diff --git a/chp_api/gennifer/migrations/0002_inferenceresult_user.py b/chp_api/gennifer/migrations/0002_inferenceresult_user.py new file mode 100755 index 0000000..4fa44e3 --- /dev/null +++ b/chp_api/gennifer/migrations/0002_inferenceresult_user.py @@ -0,0 +1,21 @@ +# Generated by Django 4.2.1 on 2023-05-29 23:15 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('gennifer', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='inferenceresult', + name='user', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='results', to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/chp_api/gennifer/migrations/0003_remove_algorithm_run_url_algorithm_url.py b/chp_api/gennifer/migrations/0003_remove_algorithm_run_url_algorithm_url.py new file mode 100755 index 0000000..c4184e4 --- /dev/null +++ b/chp_api/gennifer/migrations/0003_remove_algorithm_run_url_algorithm_url.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.1 on 2023-05-30 00:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0002_inferenceresult_user'), + ] + + operations = [ + migrations.RemoveField( + model_name='algorithm', + name='run_url', + ), + migrations.AddField( + model_name='algorithm', + name='url', + field=models.CharField(default='localhost', max_length=128), + preserve_default=False, + ), + ] diff --git a/chp_api/gennifer/migrations/0004_remove_inferencestudy_algorithm_and_more.py b/chp_api/gennifer/migrations/0004_remove_inferencestudy_algorithm_and_more.py new file mode 100755 index 0000000..f15b3db --- /dev/null +++ b/chp_api/gennifer/migrations/0004_remove_inferencestudy_algorithm_and_more.py @@ -0,0 +1,47 @@ +# Generated by Django 4.2.1 on 2023-05-30 22:18 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0003_remove_algorithm_run_url_algorithm_url'), + ] + + operations = [ + migrations.RemoveField( + model_name='inferencestudy', + name='algorithm', + ), + migrations.AddField( + model_name='algorithm', + name='description', + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name='algorithm', + name='edge_weight_description', + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name='algorithm', + name='edge_weight_type', + field=models.CharField(blank=True, max_length=128, null=True), + ), + migrations.CreateModel( + name='AlgorithmInstance', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('hyperparameters', models.JSONField(null=True)), + ('algorithm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='instances', to='gennifer.algorithm')), + ], + ), + migrations.AddField( + model_name='inferencestudy', + name='algorithm_instance', + field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, related_name='studies', to='gennifer.algorithminstance'), + preserve_default=False, + ), + ] diff --git a/chp_api/gennifer/migrations/0005_gene_chp_preferred_curie.py b/chp_api/gennifer/migrations/0005_gene_chp_preferred_curie.py new file mode 100755 index 0000000..3b1955b --- /dev/null +++ b/chp_api/gennifer/migrations/0005_gene_chp_preferred_curie.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-31 01:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0004_remove_inferencestudy_algorithm_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='gene', + name='chp_preferred_curie', + field=models.CharField(blank=True, max_length=128, null=True), + ), + ] diff --git a/chp_api/gennifer/migrations/0006_algorithm_directed.py b/chp_api/gennifer/migrations/0006_algorithm_directed.py new file mode 100755 index 0000000..a7382ec --- /dev/null +++ b/chp_api/gennifer/migrations/0006_algorithm_directed.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.1 on 2023-06-04 02:08 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0005_gene_chp_preferred_curie'), + ] + + operations = [ + migrations.AddField( + model_name='algorithm', + name='directed', + field=models.BooleanField(default=False), + preserve_default=False, + ), + ] diff --git a/chp_api/gennifer/migrations/0007_useranalysissession.py b/chp_api/gennifer/migrations/0007_useranalysissession.py new file mode 100755 index 0000000..76be5da --- /dev/null +++ b/chp_api/gennifer/migrations/0007_useranalysissession.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.2 on 2023-06-18 22:33 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('gennifer', '0006_algorithm_directed'), + ] + + operations = [ + migrations.CreateModel( + name='UserAnalysisSession', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('name', models.CharField(max_length=128)), + ('session_data', models.JSONField()), + ('is_saved', models.BooleanField(default=False)), + ('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + ), + ] diff --git a/chp_api/gennifer/migrations/0008_result_study_task_and_more.py b/chp_api/gennifer/migrations/0008_result_study_task_and_more.py new file mode 100755 index 0000000..a50eeb5 --- /dev/null +++ b/chp_api/gennifer/migrations/0008_result_study_task_and_more.py @@ -0,0 +1,83 @@ +# Generated by Django 4.2.2 on 2023-06-25 23:19 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0007_useranalysissession'), + ] + + operations = [ + migrations.CreateModel( + name='Result', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('edge_weight', models.FloatField()), + ('is_public', models.BooleanField(default=False)), + ], + ), + migrations.CreateModel( + name='Study', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('description', models.TextField(blank=True, null=True)), + ('status', models.CharField(max_length=10)), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ], + options={ + 'verbose_name_plural': 'studies', + }, + ), + migrations.CreateModel( + name='Task', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('max_study_edge_weight', models.FloatField(null=True)), + ('min_study_edge_weight', models.FloatField(null=True)), + ('avg_study_edge_weight', models.FloatField(null=True)), + ('std_study_edge_weight', models.FloatField(null=True)), + ('is_public', models.BooleanField(default=False)), + ('status', models.CharField(max_length=10)), + ('error_message', models.TextField(blank=True, null=True)), + ('algorithm_instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tasks', to='gennifer.algorithminstance')), + ], + ), + migrations.RemoveField( + model_name='inferencestudy', + name='algorithm_instance', + ), + migrations.RemoveField( + model_name='inferencestudy', + name='dataset', + ), + migrations.RemoveField( + model_name='inferencestudy', + name='user', + ), + migrations.RenameField( + model_name='dataset', + old_name='upload_user', + new_name='user', + ), + migrations.DeleteModel( + name='InferenceResult', + ), + migrations.DeleteModel( + name='InferenceStudy', + ), + migrations.AddField( + model_name='task', + name='dataset', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tasks', to='gennifer.dataset'), + ), + migrations.AddField( + model_name='task', + name='study', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='tasks', to='gennifer.study'), + ), + ] diff --git a/chp_api/gennifer/migrations/0009_task_user_study_user_result_target_result_task_and_more.py b/chp_api/gennifer/migrations/0009_task_user_study_user_result_target_result_task_and_more.py new file mode 100755 index 0000000..83374e4 --- /dev/null +++ b/chp_api/gennifer/migrations/0009_task_user_study_user_result_target_result_task_and_more.py @@ -0,0 +1,46 @@ +# Generated by Django 4.2.2 on 2023-06-25 23:19 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0008_result_study_task_and_more'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AddField( + model_name='task', + name='user', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='tasks', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='study', + name='user', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='studies', to=settings.AUTH_USER_MODEL), + ), + migrations.AddField( + model_name='result', + name='target', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inference_result_target', to='gennifer.gene'), + ), + migrations.AddField( + model_name='result', + name='task', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='results', to='gennifer.task'), + ), + migrations.AddField( + model_name='result', + name='tf', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='inference_result_tf', to='gennifer.gene'), + ), + migrations.AddField( + model_name='result', + name='user', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='results', to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/chp_api/gennifer/migrations/0010_hyperparameter_and_more.py b/chp_api/gennifer/migrations/0010_hyperparameter_and_more.py new file mode 100755 index 0000000..1f0d671 --- /dev/null +++ b/chp_api/gennifer/migrations/0010_hyperparameter_and_more.py @@ -0,0 +1,57 @@ +# Generated by Django 4.2.3 on 2023-07-10 19:16 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0009_task_user_study_user_result_target_result_task_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='Hyperparameter', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128)), + ('type', models.CharField(choices=[('int', 'Integer'), ('bool', 'Boolean'), ('str', 'String'), ('float', 'Float')], default='float', max_length=5)), + ('info', models.TextField(blank=True, null=True)), + ('algorithm', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='hyperparameters', to='gennifer.algorithm')), + ], + ), + migrations.RenameField( + model_name='task', + old_name='avg_study_edge_weight', + new_name='avg_task_edge_weight', + ), + migrations.RenameField( + model_name='task', + old_name='max_study_edge_weight', + new_name='max_task_edge_weight', + ), + migrations.RenameField( + model_name='task', + old_name='min_study_edge_weight', + new_name='min_task_edge_weight', + ), + migrations.RenameField( + model_name='task', + old_name='std_study_edge_weight', + new_name='std_task_edge_weight', + ), + migrations.RemoveField( + model_name='algorithminstance', + name='hyperparameters', + ), + migrations.CreateModel( + name='HyperparameterInstance', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('value_str', models.CharField(max_length=128)), + ('algorithm_instance', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='hyperparameters', to='gennifer.algorithminstance')), + ('hyperparameter', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='instances', to='gennifer.hyperparameter')), + ], + ), + ] diff --git a/chp_api/gennifer/migrations/0011_annotated_annotation_dataset_public_publication_and_more.py b/chp_api/gennifer/migrations/0011_annotated_annotation_dataset_public_publication_and_more.py new file mode 100755 index 0000000..3a33b2b --- /dev/null +++ b/chp_api/gennifer/migrations/0011_annotated_annotation_dataset_public_publication_and_more.py @@ -0,0 +1,65 @@ +# Generated by Django 4.2.5 on 2023-09-11 21:24 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('gennifer', '0010_hyperparameter_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='Annotated', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ], + ), + migrations.CreateModel( + name='Annotation', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('type', models.CharField(choices=[('openai', 'OpenAI'), ('translator', 'Translator')], default='translator', max_length=32)), + ('timestamp', models.DateTimeField(auto_now_add=True)), + ('tr_formatted_relation_string', models.CharField(blank=True, max_length=256, null=True)), + ('tr_predicate', models.CharField(blank=True, max_length=128, null=True)), + ('tr_qualified_predicate', models.CharField(blank=True, max_length=128, null=True)), + ('tr_object_modifier', models.CharField(blank=True, max_length=128, null=True)), + ('tr_object_aspect', models.CharField(blank=True, max_length=128, null=True)), + ('tr_resource_id', models.CharField(blank=True, max_length=128, null=True)), + ('tr_primary_source', models.CharField(blank=True, max_length=128, null=True)), + ('oai_justification', models.TextField(blank=True, null=True)), + ('results', models.ManyToManyField(through='gennifer.Annotated', to='gennifer.result')), + ], + ), + migrations.AddField( + model_name='dataset', + name='public', + field=models.BooleanField(default=False), + ), + migrations.CreateModel( + name='Publication', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('curie', models.CharField(max_length=128)), + ('annotation', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='publications', to='gennifer.annotation')), + ], + ), + migrations.AddField( + model_name='annotated', + name='annotation', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gennifer.annotation'), + ), + migrations.AddField( + model_name='annotated', + name='result', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='gennifer.result'), + ), + migrations.AddField( + model_name='result', + name='annotations', + field=models.ManyToManyField(through='gennifer.Annotated', to='gennifer.annotation'), + ), + ] diff --git a/chp_api/gennifer/migrations/__init__.py b/chp_api/gennifer/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chp_api/gennifer/models.py b/chp_api/gennifer/models.py new file mode 100644 index 0000000..3c09378 --- /dev/null +++ b/chp_api/gennifer/models.py @@ -0,0 +1,199 @@ +import requests +import uuid + +from django.conf import settings +from django.db import models +from django.contrib.auth.models import User + + +class UserAnalysisSession(models.Model): + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + name = models.CharField(max_length=128) + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) + session_data = models.JSONField() + is_saved = models.BooleanField(default=False) + + def update_session_data(self, new_data): + self.session_data.update(new_data) + self.save() + + def __str__(self): + return self.name + +class Algorithm(models.Model): + name = models.CharField(max_length=128) + url = models.CharField(max_length=128) + edge_weight_description = models.TextField(null=True, blank=True) + edge_weight_type = models.CharField(max_length=128, null=True, blank=True) + description = models.TextField(null=True, blank=True) + directed = models.BooleanField() + + def __str__(self): + return self.name + + +class AlgorithmInstance(models.Model): + algorithm = models.ForeignKey(Algorithm, on_delete=models.CASCADE, related_name='instances') + + def __str__(self): + if self.hyperparameters: + hypers = tuple([f'{h}' for h in self.hyperparameters.all()]) + else: + hypers = '()' + return f'{self.algorithm.name}{hypers}' + + +class Hyperparameter(models.Model): + INT = "int" + BOOL = "bool" + STR = "str" + FLOAT = "float" + TYPE_CHOICES = ( + (INT, "Integer"), + (BOOL, "Boolean"), + (STR, "String"), + (FLOAT, "Float"), + ) + name = models.CharField(max_length=128) + type = models.CharField(max_length=5, choices=TYPE_CHOICES, default=FLOAT) + algorithm = models.ForeignKey(Algorithm, on_delete=models.CASCADE, related_name='hyperparameters') + info = models.TextField(null=True, blank=True) + + def get_type(self): + known_types = { + "int": int, + "bool": bool, + "str": str, + "float": float, + } + return known_types[self.type] + + def __str__(self): + return self.name + + +class HyperparameterInstance(models.Model): + hyperparameter = models.ForeignKey(Hyperparameter, on_delete=models.CASCADE, related_name='instances') + value_str = models.CharField(max_length=128) + algorithm_instance = models.ForeignKey(AlgorithmInstance, on_delete=models.CASCADE, related_name='hyperparameters') + + def get_value(self): + return self.hyperparameter.get_type()(self.value_str) + + def __str__(self): + return f'{self.hyperparameter.name}={self.value_str}' + +class Dataset(models.Model): + title = models.CharField(max_length=128) + zenodo_id = models.CharField(max_length=128, primary_key=True) + doi = models.CharField(max_length=128) + description = models.TextField(null=True, blank=True) + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True) + public = models.BooleanField(default=False) + + def save(self, *args, **kwargs): + import re + + CLEANR = re.compile('<.*?>') + + info = self.get_record() + if 'status' in info and 'message' in info and len(info) == 2: + # This means that retrieval failed + raise ValueError(f'Could not retrieve zenodo record {self.zenodo_id}. Failed with message: {info["message"]}') + self.doi = info["doi"] + self.description = re.sub(CLEANR, '', info["metadata"]["description"]) + self.title = re.sub(CLEANR, '', info["metadata"]["title"]) + + super(Dataset, self).save(*args, **kwargs) + + def get_record(self): + return requests.get(f"https://zenodo.org/api/records/{self.zenodo_id}").json() + + def __str__(self): + return f'zenodo:{self.zenodo_id}' + +class Gene(models.Model): + name = models.CharField(max_length=128) + curie = models.CharField(max_length=128) + variant = models.TextField(null=True, blank=True) + chp_preferred_curie = models.CharField(max_length=128, null=True, blank=True) + + def __str__(self): + return self.name + +class Study(models.Model): + class Meta: + verbose_name_plural = "studies" + + name = models.CharField(max_length=128) + description = models.TextField(null=True, blank=True) + status = models.CharField(max_length=10) + timestamp = models.DateTimeField(auto_now_add=True) + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name='studies') + + def __str__(self): + return self.name + + +class Task(models.Model): + algorithm_instance = models.ForeignKey(AlgorithmInstance, on_delete=models.CASCADE, related_name='tasks') + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name='tasks') + dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE, related_name='tasks') + timestamp = models.DateTimeField(auto_now_add=True) + # Study characteristics for all edge weights in a given study over a dataset + max_task_edge_weight = models.FloatField(null=True) + min_task_edge_weight = models.FloatField(null=True) + avg_task_edge_weight = models.FloatField(null=True) + std_task_edge_weight = models.FloatField(null=True) + is_public = models.BooleanField(default=False) + status = models.CharField(max_length=10) + error_message = models.TextField(null=True, blank=True) + study = models.ForeignKey(Study, on_delete=models.CASCADE, related_name='tasks') + + def __str__(self): + return f'{self.algorithm_instance} on {self.dataset.zenodo_id}' + + +class Result(models.Model): + # Stands for transcription factor + tf = models.ForeignKey(Gene, on_delete=models.CASCADE, related_name='inference_result_tf') + # Target is the gene that is regulated by the transcription factor + target = models.ForeignKey(Gene, on_delete=models.CASCADE, related_name='inference_result_target') + edge_weight = models.FloatField() + task = models.ForeignKey(Task, on_delete=models.CASCADE, related_name='results') + is_public = models.BooleanField(default=False) + user = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name='results') + annotations = models.ManyToManyField('Annotation', through='Annotated') + + def __str__(self): + return f'{self.tf}:{self.tf.curie} -> regulates -> {self.target}:{self.target.curie}' + +class Annotation(models.Model): + # Type of annotation + TYPE_CHOICES = ( + ('openai', "OpenAI"), + ('translator', "Translator"), + ) + type = models.CharField(max_length=32, choices=TYPE_CHOICES, default='translator') + timestamp = models.DateTimeField(auto_now_add=True) + results = models.ManyToManyField('Result', through='Annotated') + # Translator fields + tr_formatted_relation_string = models.CharField(max_length=256, null=True, blank=True) + tr_predicate = models.CharField(max_length=128, null=True, blank=True) + tr_qualified_predicate = models.CharField(max_length=128, null=True, blank=True) + tr_object_modifier = models.CharField(max_length=128, null=True, blank=True) + tr_object_aspect = models.CharField(max_length=128, null=True, blank=True) + tr_resource_id = models.CharField(max_length=128, null=True, blank=True) + tr_primary_source = models.CharField(max_length=128, null=True, blank=True) + # OpenAI fields + oai_justification = models.TextField(null=True, blank=True) + + +class Publication(models.Model): + curie = models.CharField(max_length=128) + annotation = models.ForeignKey(Annotation, on_delete=models.CASCADE, related_name='publications') + +class Annotated(models.Model): + result = models.ForeignKey(Result, on_delete=models.CASCADE) + annotation = models.ForeignKey(Annotation, on_delete=models.CASCADE) + diff --git a/chp_api/gennifer/permissions.py b/chp_api/gennifer/permissions.py new file mode 100644 index 0000000..6001dfe --- /dev/null +++ b/chp_api/gennifer/permissions.py @@ -0,0 +1,29 @@ +from rest_framework import permissions + + +class IsOwnerOrReadOnly(permissions.BasePermission): + """ + Object-level permission to only allow owners of an object to edit it. + Assumes the model instance has an `owner` attribute. + """ + + def has_object_permission(self, request, view, obj): + # Read permissions are allowed to any request, + # so we'll always allow GET, HEAD or OPTIONS requests. + if request.method in permissions.SAFE_METHODS: + return True + + # Instance must have an attribute named `owner`. + return obj.user == request.user + +class IsAdminOrReadOnly(permissions.BasePermission): + + def has_object_permission(self, request, view, obj): + # Read permissions are allowed to any request, + # so we'll always allow GET, HEAD or OPTIONS requests. + if request.method in permissions.SAFE_METHODS: + return True + + # Instance must have an attribute named `owner`. + return request.user.is_staff + diff --git a/chp_api/gennifer/scripts/algorithm_loader.py b/chp_api/gennifer/scripts/algorithm_loader.py new file mode 100644 index 0000000..9a9dab8 --- /dev/null +++ b/chp_api/gennifer/scripts/algorithm_loader.py @@ -0,0 +1,29 @@ +import requests +from django.conf import settings + +from ..models import Algorithm, Hyperparameter + + +def run(): + Algorithm.objects.all().delete() + for url in settings.GENNIFER_ALGORITHM_URLS: + algo_info = requests.get(f'{url}/info').json() + algo = Algorithm.objects.create( + name=algo_info["name"], + url=url, + edge_weight_description=algo_info["edge_weight_description"], + edge_weight_type=algo_info["edge_weight_type"], + description=algo_info["description"], + directed=algo_info["directed"], + ) + algo.save() + # Load Hyperparameters + if algo_info["hyperparameters"]: + for hp_name, hp_info in algo_info["hyperparameters"].items(): + hp = Hyperparameter.objects.create( + name=hp_name, + type=getattr(Hyperparameter, hp_info["type"]), + algorithm=algo, + info=hp_info["info"], + ) + hp.save() diff --git a/chp_api/gennifer/serializers.py b/chp_api/gennifer/serializers.py new file mode 100644 index 0000000..155cacd --- /dev/null +++ b/chp_api/gennifer/serializers.py @@ -0,0 +1,168 @@ +from collections import defaultdict +from rest_framework import serializers + +from .models import ( + Dataset, + Study, + Task, + Result, + Algorithm, + Gene, + UserAnalysisSession, + AlgorithmInstance, + Hyperparameter, + HyperparameterInstance, + Annotation, + ) + + +class UserAnalysisSessionSerializer(serializers.ModelSerializer): + class Meta: + model = UserAnalysisSession + fields = ['id', 'user', 'name', 'session_data', 'is_saved'] + +class DatasetSerializer(serializers.ModelSerializer): + class Meta: + model = Dataset + fields = ['pk', 'title', 'zenodo_id', 'doi', 'description'] + read_only_fields = ['pk', 'title', 'doi', 'description'] + +class StudySerializer(serializers.ModelSerializer): + task_status = serializers.SerializerMethodField('get_task_status') + + def get_task_status(self, study): + status = defaultdict(int) + for task in study.tasks.all(): + status[task.status] += 1 + status = dict(status) + status = sorted([f'{count} {state}'.title() for state, count in status.items()]) + if len(status) == 0: + return '' + elif len(status) == 1: + return status[0] + return ' and '.join([', '.join(status[:-1]), status[-1]]) + + class Meta: + model = Study + fields = ['pk', 'name', 'status', 'task_status', 'description', 'timestamp', 'user', 'tasks'] + read_only_fields = ['pk', 'status', 'task_status'] + + +class TaskSerializer(serializers.ModelSerializer): + name = serializers.SerializerMethodField('get_name') + + def get_name(self, study): + return f'{study.algorithm_instance.algorithm.name} on {study.dataset.title}' + + class Meta: + model = Task + fields = [ + 'pk', + 'algorithm_instance', + 'dataset', + 'timestamp', + 'max_task_edge_weight', + 'min_task_edge_weight', + 'avg_task_edge_weight', + 'std_task_edge_weight', + 'name', + 'study', + 'status', + ] + read_only_fields = [ + 'pk', + 'max_task_edge_weight', + 'min_task_edge_weight', + 'avg_task_edge_weight', + 'std_task_edge_weight', + 'name', + 'status', + ] + +class ResultSerializer(serializers.ModelSerializer): + class Meta: + model = Result + fields = [ + 'pk', + 'tf', + 'target', + 'edge_weight', + 'task', + ] + +class AlgorithmSerializer(serializers.ModelSerializer): + class Meta: + model = Algorithm + fields = [ + 'pk', + 'name', + 'description', + 'edge_weight_type', + 'directed', + ] + +class AlgorithmInstanceSerializer(serializers.ModelSerializer): + class Meta: + model = AlgorithmInstance + fields = [ + 'pk', + 'algorithm', + ] + read_only_fields = ['pk'] + + def create(self, validated_data): + instance, _ = AlgorithmInstance.objects.get_or_create(**validated_data) + return instance + +class HyperparameterSerializer(serializers.ModelSerializer): + class Meta: + model = Hyperparameter + fields = [ + 'pk', + 'name', + 'algorithm', + 'type', + ] + read_only_fields = ['pk', 'name', 'algorithm', 'type'] + +class HyperparameterInstanceSerializer(serializers.ModelSerializer): + class Meta: + model = HyperparameterInstance + fields = [ + 'pk', + 'algorithm_instance', + 'value_str', + 'hyperparameter', + ] + read_only_fields = ['pk'] + + def create(self, validated_data): + instance, _ = HyperparameterInstance.objects.get_or_create(**validated_data) + return instance + +class GeneSerializer(serializers.ModelSerializer): + class Meta: + model = Gene + fields = [ + 'pk', + 'name', + 'curie', + 'variant', + 'chp_preferred_curie', + ] + +class AnnotationSerializer(serializers.ModelSerializer): + class Meta: + model = Annotation + fields = [ + 'type', + 'timestamp', + 'tr_formatted_relation_string', + 'tr_predicate', + 'tr_qualified_predicate', + 'tr_object_modifier', + 'tr_object_aspect', + 'tr_resource_id', + 'tr_primary_source', + 'oai_justification', + ] \ No newline at end of file diff --git a/chp_api/gennifer/tasks.py b/chp_api/gennifer/tasks.py new file mode 100644 index 0000000..af28b2e --- /dev/null +++ b/chp_api/gennifer/tasks.py @@ -0,0 +1,358 @@ +import os +import time +import pandas as pd +import requests + +from django.db import transaction +from django.contrib.auth import get_user_model +from django.db.models import Q +from celery import shared_task +from celery.utils.log import get_task_logger +from copy import deepcopy +from nltk.stem import WordNetLemmatizer +from pattern.en import conjugate + +from .models import Dataset, Gene, Study, Task, Result, Algorithm, AlgorithmInstance, Annotated, Annotation, Publication +from dispatcher.models import DispatcherSetting + +logger = get_task_logger(__name__) +User = get_user_model() +wnl = WordNetLemmatizer() + +def normalize_nodes(curies): + dispatcher_settings = DispatcherSetting.load() + base_url = dispatcher_settings.sri_node_normalizer_baseurl + res = requests.post(f'{base_url}/get_normalized_nodes', json={"curies": curies}) + return res.json() + +def extract_variant_info(gene_id): + split = gene_id.split('(') + gene_id = split[0] + if len(split) > 1: + variant_info = split[1][:-1] + else: + variant_info = None + return gene_id, variant_info + +def get_chp_preferred_curie(info): + for _id in info['equivalent_identifiers']: + if 'ENSEMBL' in _id['identifier']: + return _id['identifier'] + return None + +def save_inference_task(task, status, failed=False): + task.status = status["task_status"] + if failed: + task.message = status["task_result"] + else: + # Construct Dataframe from result + df = pd.DataFrame.from_records(status["task_result"]) + + # Add task edge weight features + stats = df["EdgeWeight"].astype(float).describe() + task.max_task_edge_weight = stats["max"] + task.min_task_edge_weight = stats["min"] + task.avg_task_edge_weight = stats["mean"] + task.std_task_edge_weight = stats["std"] + + # Collect all genes + genes = set() + for _, row in df.iterrows(): + gene1, _ = extract_variant_info(row["Gene1"]) + gene2, _ = extract_variant_info(row["Gene2"]) + genes.add(gene1) + genes.add(gene2) + + # Normalize + res = normalize_nodes(list(genes)) + + # Now Extract results + for _, row in df.iterrows(): + # Construct Gene Objects + gene1, variant_info1 = extract_variant_info(row["Gene1"]) + gene2, variant_info2 = extract_variant_info(row["Gene2"]) + try: + gene1_name = res[gene1]["id"]["label"] + gene1_chp_preferred_curie = get_chp_preferred_curie(res[gene1]) + except TypeError: + gene1_name = 'Not found in SRI Node Normalizer.' + gene1_chp_preferred_curie = None + except KeyError: + _, gene1_name = res[gene1]["id"]["identifier"].split(':') + gene1_chp_preferred_curie = get_chp_preferred_curie(res[gene1]) + try: + gene2_name = res[gene2]["id"]["label"] + gene2_chp_preferred_curie = get_chp_preferred_curie(res[gene2]) + except TypeError: + gene2_name = 'Not found in SRI Node Normalizer.' + gene2_chp_preferred_curie = None + except KeyError: + _, gene2_name = res[gene2]["id"]["identifier"].split(':') + gene2_chp_preferred_curie = get_chp_preferred_curie(res[gene2]) + gene1_obj, created = Gene.objects.get_or_create( + name=gene1_name, + curie=gene1, + variant=variant_info1, + chp_preferred_curie=gene1_chp_preferred_curie, + ) + if created: + gene1_obj.save() + gene2_obj, created = Gene.objects.get_or_create( + name=gene2_name, + curie=gene2, + variant=variant_info2, + chp_preferred_curie=gene2_chp_preferred_curie, + ) + if created: + gene2_obj.save() + # Construct and save Result + result, created = Result.objects.get_or_create( + tf=gene1_obj, + target=gene2_obj, + edge_weight=row["EdgeWeight"], + task=task, + user=task.user, + ) + if created: + result.save() + task.save() + # Collect all result PKs for this task + result_pks = [res.pk for res in task.results.all()] + # Send to annotation worker + create_annotations_task(result_pks, task.algorithm_instance.algorithm.directed) + return True + +def get_status(algo, task_id, url=None): + if url: + return requests.get(f'{url}/status/{task_id}', headers={'Cache-Control': 'no-cache'}).json() + return requests.get(f'{algo.url}/status/{task_id}', headers={'Cache-Control': 'no-cache'}).json() + +def return_saved_task(tasks, user): + task = tasks[0] + # Copy task results + results = deepcopy(task.results) + # Create a new task that is a duplicate but assign to this user. + task.pk = None + task.results = None + task.save() + + # Now go through and assign all results to this task and user. + for result in results: + result.pk = None + result.task = task + result.user = user + result.save() + return True + +def construct_annotation_request(results, directed): + data = [] + for result in results: + data.append({ + "source": { + "id": result.tf.curie, + "name": result.tf.name, + }, + "target": { + "id": result.target.curie, + "name": result.target.name, + }, + "result_pk": result.pk, + }) + return {"data": data, "directed": directed} + +def make_tr_formatted_relation( + predicate, + qualified_predicate, + object_modifier, + object_aspect, + ): + formatted_str = predicate.replace('biolink:', '').replace('_', ' ') + if qualified_predicate: + qp = wnl.lemmatize(qualified_predicate.replace('biolink:', ''), 'v') + try: + qp = conjugate(qp, 'part') + except RuntimeError: + # This function fails the first time its run so just run again, see: https://github.com/clips/pattern/issues/295 + qp = conjugate(qp, 'part') + pass + formatted_str += f' By {qp}' + if object_modifier: + om = object_modifier.replace('_', ' ') + formatted_str += f' {om}' + if object_aspect: + oa = object_aspect.replace('_', ' ') + formatted_str += f' {oa}' + return formatted_str.title() + +def save_annotation_task(status, failed=False): + if failed: + print('Annotation Failed') + return + annotations = status["task_result"] + for annotation in annotations: + result = Result.objects.get(pk=annotation["result_pk"]) + if annotation["justification"]: + # Make OpenAI Annotation + oai_justification = Annotation.objects.create( + type='openai', + oai_justification=annotation["justification"] + ) + oai_annotated = Annotated.objects.create( + result=result, + annotation=oai_justification, + ) + oai_justification.save() + oai_annotated.save() + # Make translator annotations + for tr_result in annotation["results"]: + tr_annotation = Annotation.objects.create( + type='translator', + tr_formatted_relation_string=make_tr_formatted_relation( + tr_result["predicate"], + tr_result["qualified_predicate"], + tr_result["object_modifier"], + tr_result["object_aspect"], + ), + tr_predicate= tr_result["predicate"], + tr_qualified_predicate=tr_result["qualified_predicate"], + tr_object_modifier=tr_result["object_modifier"], + tr_object_aspect=tr_result["object_aspect"], + tr_resource_id=tr_result["resource_id"], + tr_primary_source=tr_result["primary_source"], + ) + tr_annotation.save() + tr_annotated = Annotated.objects.create( + result=result, + annotation=tr_annotation, + ) + tr_annotated.save() + print('Saved annotations.') + return + +@shared_task(name="create_annotations_task") +def create_annotations_task(result_pks, directed): + results = Result.objects.filter(pk__in = result_pks) + results_to_be_annotated = [] + # First go through results and ensure we haven't already made an annotation request + for result in results: + matched_annotations = [a.annotation for a in Annotated.objects.filter( + result__tf__curie=result.tf.curie, + result__target__curie=result.target.curie, + result__task__algorithm_instance__algorithm__directed=result.task.algorithm_instance.algorithm.directed + )] + if len(matched_annotations) == 0: + results_to_be_annotated.append(result) + continue + for ma in matched_annotations: + annotated = Annotated.objects.create( + annotation = ma, + result=result, + ) + annotated.save() + # Construct annotation service request + r = construct_annotation_request(results_to_be_annotated, directed) + # Send to annotation service and wait + annotate_id = requests.post('http://annotator:5000/run', json=r).json()["task_id"] + # Get initial status + status = get_status(None, annotate_id, url='http://annotator:5000') + + # Enter a loop to keep checking back in and populate the task once it has completed. + #TODO: Not sure if this is best practice + while True: + # Check in every 10 seconds + time.sleep(10) + status = get_status(None, annotate_id, url='http://annotator:5000') + if status["task_status"] == 'SUCCESS': + return save_annotation_task(status) + if status["task_status"] == "FAILURE": + return save_annotation_task(status, failed=True) + +@shared_task(name="create_gennifer_task") +def create_task(task_pk): + # Get task + task = Task.objects.get(pk=task_pk) + algo = task.algorithm_instance.algorithm + user = task.user + ## Get algorithm obj + #algo = Algorithm.objects.get(name=algorithm_name) + + ## Get or create a new algorithm instance based on the hyperparameters + #if not hyperparameters: + # algo_instance, algo_instance_created = AlgorithmInstance.objects.get_or_create( + # algorithm=algo, + # hyperparameters__isnull=True, + # ) + #else: + # algo_instance, algo_instance_created = AlgorithmInstance.objects.get_or_create( + # algorithm=algo, + # hyperparameters=hyperparameters, + # ) + + # Get User obj + #user = User.objects.get(pk=user_pk) + + # Get Study obj + #study = Study.objects.get(pk=study_pk) + + # Initialize dataset instance + #dataset, dataset_created = Dataset.objects.get_or_create( + # zenodo_id=zenodo_id, + # user=user, + # ) + + #if dataset_created: + # dataset.save() + + #if not algo_instance_created and not dataset_created: + # # This means we've already run the task. So let's just return that and not bother our workers. + # tasks = Task.objects.filter( + # algorithm_instance=algo_instance, + # dataset=dataset, + # status='SUCCESS', + # ) + # #TODO: Probably should add some timestamp handling here + # if len(studies) > 0: + # return_saved_task(tasks, user) + + # Create Hyperparameter serialization + hyperparameters = {} + for h in task.algorithm_instance.hyperparameters.all(): + hyperparameters[h.hyperparameter.name] = h.get_value() + + # Send to gennifer app + gennifer_request = { + "zenodo_id": task.dataset.zenodo_id, + "hyperparameters": hyperparameters, + } + task_id = requests.post(f'{algo.url}/run', json=gennifer_request).json()["task_id"] + + logger.info(f'TASK_ID: {task_id}') + + # Get initial status + status = get_status(algo, task_id) + + + #task = Task.objects.create( + # algorithm_instance=algo_instance, + # user=user, + # dataset=dataset, + # status=status["task_status"], + # study=study, + # ) + # Save initial task + #task.save() + + # Enter a loop to keep checking back in and populate the task once it has completed. + #TODO: Not sure if this is best practice + while True: + # Check in every 2 seconds + time.sleep(5) + status = get_status(algo, task_id) + print(status) + if status["task_status"] == 'SUCCESS': + return save_inference_task(task, status) + if status["task_status"] == "FAILURE": + return save_inference_task(task, status, failed=True) + if status["task_status"] != task.status: + task.status = status["task_status"] + task.save() diff --git a/chp_api/gennifer/tests.py b/chp_api/gennifer/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/chp_api/gennifer/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/chp_api/gennifer/trapi_interface.py b/chp_api/gennifer/trapi_interface.py new file mode 100644 index 0000000..64f7710 --- /dev/null +++ b/chp_api/gennifer/trapi_interface.py @@ -0,0 +1,228 @@ +'''trapi interface''' +import os +import uuid +import json +import pkgutil +import logging + +from typing import Tuple, Union +from pydantic import parse_obj_as +from django.db.models import QuerySet +from reasoner_pydantic.utils import HashableMapping +from django.core.exceptions import ObjectDoesNotExist +from reasoner_pydantic import MetaKnowledgeGraph, Message, KnowledgeGraph +from reasoner_pydantic.kgraph import RetrievalSource, Attribute +from reasoner_pydantic.results import NodeBinding, EdgeBinding, Result, Results, Analysis + +from .models import Result, Gene + +# Setup logging +logging.addLevelName(25, "NOTE") +# Add a special logging function +def note(self, message, *args, **kwargs): + self._log(25, message, args, kwargs) +logging.Logger.note = note +internal_logger = logging.getLogger(__name__) + +APP_PATH = os.path.dirname(os.path.abspath(__file__)) + +class TrapiInterface: + def __init__(self, trapi_version: str = '1.5'): + self.trapi_version = trapi_version + + def get_meta_knowledge_graph(self) -> MetaKnowledgeGraph: + return self._read_meta_knowledge_graph() + + def _read_meta_knowledge_graph(self) -> MetaKnowledgeGraph: + with open(os.path.join(APP_PATH, 'app_meta_data', 'meta_knowledge_graph.json'), 'r') as mkg_file: + mkg_json = json.load(mkg_file) + return MetaKnowledgeGraph.parse_obj(mkg_json) + + def get_name(self) -> str: + return 'gennifer' + + def _get_sources(self): + source_1 = RetrievalSource(resource_id = "infores:connections-hypothesis", + resource_role="primary_knowledge_source") + return [source_1] + + def _get_attributes(self, val, algorithm_instance, dataset): + att_1 = Attribute( + attribute_type_id = algorithm_instance.algorithm.edge_weight_type, + value_type_id='biolink:has_evidence', + value=val, + description=algorithm_instance.algorithm.edge_weight_description, + ) + att_2 = Attribute( + attribute_type_id='grn_inference_algorithm', + value_type_id='biolink:supporting_study_method_type', + value=str(algorithm_instance), + description=algorithm_instance.algorithm.description, + ) + att_3 = Attribute( + attribute_type_id='inferenced_dataset', + value_type_id='biolink:supporting_data_set', + value=f'zenodo:{dataset.zenodo_id}', + description=f'{dataset.title}: {dataset.description}', + ) + att_4 = Attribute( + attribute_type_id = 'knowledge_level', + value='statistical_association' + ) + return [att_1, att_2, att_3, att_4] + + def _add_results( + self, + message, + qg_subject_id, + subject_curies, + subject_category, + predicate, + qg_edge_id, + qg_object_id, + object_curies, + object_category, + vals, + algorithms, + datasets, + ): + node_binding_group = [] + edge_binding_group = [] + nodes = dict() + edges = dict() + val_id = 0 + for subject_curie in subject_curies: + for object_curie in object_curies: + nodes[subject_curie] = {"categories": [subject_category], "attributes" : []} + nodes[object_curie] = {"categories": [object_category], "attributes" : []} + kg_edge_id = str(uuid.uuid4()) + edges[kg_edge_id] = {"predicate": predicate, + "subject": subject_curie, + "object": object_curie, + "sources": self._get_sources(), + "attributes": self._get_attributes( + vals[val_id], + algorithms[val_id], + datasets[val_id], + )} + val_id += 1 + node_bindings = {qg_subject_id: set(), qg_object_id: set()} + edge_bindings = {qg_edge_id : set()} + node_bindings[qg_subject_id].add(NodeBinding(id = subject_curie, attributes=[])) + node_bindings[qg_object_id].add(NodeBinding(id = object_curie, attributes=[])) + edge_bindings[qg_edge_id].add(EdgeBinding(id = kg_edge_id, attributes=[])) + node_binding_group.append(node_bindings) + edge_binding_group.append(edge_bindings) + kgraph = KnowledgeGraph(nodes=nodes, edges=edges) + if message.knowledge_graph is not None: + message.knowledge_graph.update(kgraph) + else: + message.knowledge_graph = kgraph + return node_binding_group, edge_binding_group + + def _extract_qnode_info(self, qnode): + return qnode.ids, qnode.categories[0] + + def get_response(self, message: Message, logger): + for edge_id, edge in message.query_graph.edges.items(): + predicate = edge.predicates[0] + qg_edge_id = edge_id + qg_subject_id = edge.subject + qg_object_id = edge.object + subject_curies, subject_category = self._extract_qnode_info(message.query_graph.nodes[qg_subject_id]) + object_curies, object_category = self._extract_qnode_info(message.query_graph.nodes[qg_object_id]) + # annotation + node_bindings = [] + edge_bindings = [] + #TODO: Should probably offer support to return all results + if subject_curies is not None and object_curies is not None: + logger.info('Annotation edges detected') + logger.info('Annotate edge not currently supported') + return message + elif object_curies is not None: + logger.info('Wildcard detected') + for curie in object_curies: + # Get object gene, if we don't have then continue + obj_genes = Gene.objects.filter(chp_preferred_curie=curie) + if len(obj_genes) == 0: + continue + if predicate == 'biolink:regulates': + results = [] + for obj_gene in obj_genes: + results.extend(Result.objects.filter(target=obj_gene, is_public=True)) + subject_curies = [r.tf.chp_preferred_curie for r in results] + elif predicate == 'biolink:regulated_by': + results = [] + for obj_gene in obj_genes: + results.extend(Result.objects.filter(tf=obj_gene, is_public=True)) + subject_curies = [r.target.chp_preferred_curie for r in results] + else: + raise ValueError(f'Unknown predicate: {predicate}.') + vals = [r.edge_weight for r in results] + algorithms = [r.study.algorithm_instance for r in results] + datasets = [r.study.dataset for r in results] + node_binding_group, edge_binding_group = self._add_results( + message, + qg_subject_id, + subject_curies, + subject_category, + predicate, + qg_edge_id, + object_mapping, + qg_object_id, + [curie], + object_category, + vals, + algorithms, + datasets + ) + node_bindings.extend(node_binding_group) + edge_bindings.extend(edge_binding_group) + elif subject_curies is not None: + logger.info('Wildcard detected') + for curie in subject_curies: + # Get object gene, if we don't have then continue + sub_genes = Gene.objects.filter(chp_preferred_curie=curie) + if len(sub_genes) == 0: + continue + if predicate == 'biolink:regulates': + results = [] + for sub_gene in sub_genes: + results.extend(Result.objects.filter(tf=sub_gene, is_public=True)) + object_curies = [r.target.chp_preferred_curie for r in results] + elif predicate == 'biolink:regulated_by': + results = [] + for sub_gene in sub_genes: + results.extend(Result.objects.filter(target=sub_gene, is_public=True)) + object_curies = [r.tf.chp_preferred_curie for r in results] + else: + raise ValueError(f'Unknown predicate: {predicate}.') + vals = [r.edge_weight for r in results] + algorithms = [r.study.algorithm_instance for r in results] + datasets = [r.study.dataset for r in results] + node_binding_group, edge_binding_group = self._add_results( + message, + qg_subject_id, + subject_curies, + subject_category, + predicate, + qg_edge_id, + qg_object_id, + [curie], + object_category, + vals, + algorithms, + datasets, + ) + node_bindings.extend(node_binding_group) + edge_bindings.extend(edge_binding_group) + else: + logger.info('No curies detected. Returning no results') + return message + results = Results(__root__ = parse_obj_as(HashableMapping, {})) + for node_binding_dict, edge_binding_dict in zip(node_bindings, edge_bindings): + analysis = Analysis(resource_id='infores:connections-hypothesis', edge_bindings = edge_binding_dict, attributes=[]) + result = Result(node_bindings = node_binding_dict, analyses=[analysis]) + results.add(result) + message.results = results + return message diff --git a/chp_api/gennifer/urls.py b/chp_api/gennifer/urls.py new file mode 100644 index 0000000..3e380c0 --- /dev/null +++ b/chp_api/gennifer/urls.py @@ -0,0 +1,24 @@ +from django.urls import path, include +from rest_framework.routers import DefaultRouter + +from . import views + +# Create router and register viewsets +router = DefaultRouter() +router.register(r'datasets', views.DatasetViewSet, basename='dataset') +router.register(r'studies', views.StudyViewSet, basename='study') +router.register(r'tasks', views.TaskViewSet, basename='task') +router.register(r'results', views.ResultViewSet, basename='result') +router.register(r'algorithms', views.AlgorithmViewSet, basename='algorithm') +router.register(r'algorithm_instances', views.AlgorithmInstanceViewSet, basename='algorithm_instance') +router.register(r'hyperparameters', views.HyperparameterViewSet, basename='hyperparameter') +router.register(r'hyperparameter_instances', views.HyperparameterInstanceViewSet, basename='hyperparameter_instance') +router.register(r'genes', views.GeneViewSet, basename='genes') +router.register(r'analyses', views.UserAnalysisSessionViewSet, basename='analyses') + +urlpatterns = [ + path('', include(router.urls)), + path('run/', views.run.as_view()), + path('graph/', views.CytoscapeView.as_view()), + path('download_study/', views.StudyDownloadView.as_view()) + ] diff --git a/chp_api/gennifer/views.py b/chp_api/gennifer/views.py new file mode 100644 index 0000000..a5ad669 --- /dev/null +++ b/chp_api/gennifer/views.py @@ -0,0 +1,432 @@ +import requests + +from collections import defaultdict + +from django.http import HttpResponse, JsonResponse +from django.shortcuts import get_object_or_404 +from django.core.exceptions import ObjectDoesNotExist +from django.db.models import Q + +from rest_framework import viewsets +from rest_framework.views import APIView +from rest_framework.response import Response +from rest_framework.permissions import IsAuthenticated, IsAuthenticatedOrReadOnly +from rest_framework.exceptions import ValidationError +from django_filters.rest_framework import DjangoFilterBackend +#from oauth2_provider.contrib.rest_framework import TokenHasReadWriteScope, TokenHasScope + +from .models import ( + Dataset, + Study, + Task, + Result, + Algorithm, + Gene, + UserAnalysisSession, + AlgorithmInstance, + Hyperparameter, + HyperparameterInstance, + ) +from .serializers import ( + DatasetSerializer, + StudySerializer, + TaskSerializer, + ResultSerializer, + AlgorithmSerializer, + GeneSerializer, + UserAnalysisSessionSerializer, + AlgorithmInstanceSerializer, + HyperparameterSerializer, + HyperparameterInstanceSerializer, + ) +from .tasks import create_task +from .permissions import IsOwnerOrReadOnly, IsAdminOrReadOnly + + +class UserAnalysisSessionViewSet(viewsets.ModelViewSet): + serializer_class = UserAnalysisSessionSerializer + #filter_backends = [DjangoFilterBackend] + #filterset_fields = ['id', 'name', 'is_saved'] + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + + def get_queryset(self): + user = self.request.user + print(f'User is {user}') + return UserAnalysisSession.objects.filter(user=user) + + +class DatasetViewSet(viewsets.ModelViewSet): + queryset = Dataset.objects.all() + serializer_class = DatasetSerializer + filter_backends = [DjangoFilterBackend] + filterset_fields = ['user', 'zenodo_id'] + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + + def perform_create(self, serializers): + try: + serializers.save(user=self.request.user) + except ValueError as e: + raise ValidationError(str(e)) + +class StudyViewSet(viewsets.ModelViewSet): + #queryset = Study.objects.all() + serializer_class = StudySerializer + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + + def get_queryset(self): + user = self.request.user + return Study.objects.filter(user=user) + + def perform_create(self, serializers): + try: + serializers.save(user=self.request.user, status='RECEIVED') + except ValueError as e: + raise ValidationError(str(e)) + +class TaskViewSet(viewsets.ModelViewSet): + #queryset = Task.objects.all() + serializer_class = TaskSerializer + filter_backends = [DjangoFilterBackend] + filterset_fields = ['is_public', 'dataset', 'algorithm_instance'] + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + + def get_queryset(self): + user = self.request.user + return Task.objects.filter(user=user) + + def perform_create(self, serializers): + try: + serializers.save(user=self.request.user, status='RECEIVED') + except ValueError as e: + raise ValidationError(str(e)) + + +class ResultViewSet(viewsets.ModelViewSet): + #queryset = Result.objects.all() + serializer_class = ResultSerializer + filter_backends = [DjangoFilterBackend] + filterset_fields = ['is_public', 'task', 'tf', 'target'] + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + + def get_queryset(self): + user = self.request.user + return Result.objects.filter(user=user) + +class AlgorithmViewSet(viewsets.ModelViewSet): + serializer_class = AlgorithmSerializer + queryset = Algorithm.objects.all() + permission_classes = [IsAuthenticated, IsAdminOrReadOnly]#, TokenHasReadWriteScope] + #required_scopes = ['read'] + +class AlgorithmInstanceViewSet(viewsets.ModelViewSet): + queryset = AlgorithmInstance.objects.all() + serializer_class = AlgorithmInstanceSerializer + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + +class HyperparameterViewSet(viewsets.ModelViewSet): + serializer_class = HyperparameterSerializer + queryset = Hyperparameter.objects.all() + filter_backends = [DjangoFilterBackend] + filterset_fields = ['algorithm'] + permission_classes = [IsAuthenticated, IsAdminOrReadOnly]#, TokenHasReadWriteScope] + #required_scopes = ['read'] + +class HyperparameterInstanceViewSet(viewsets.ModelViewSet): + queryset = HyperparameterInstance.objects.all() + serializer_class = HyperparameterInstanceSerializer + permission_classes = [IsAuthenticated, IsOwnerOrReadOnly]#, TokenHasReadWriteScope] + +class GeneViewSet(viewsets.ModelViewSet): + serializer_class = GeneSerializer + #queryset = Gene.objects.all() + permission_classes = [IsAuthenticated, IsAdminOrReadOnly]#, TokenHasReadWriteScope] + + def get_queryset(self): + user = self.request.user + # Get user results + results = Result.objects.filter(user=user) + tf_genes = Gene.objects.filter(inference_result_tf__pk__in=results) + target_genes = Gene.objects.filter(inference_result_target__pk__in=results) + genes_union = tf_genes.union(target_genes) + print(len(genes_union)) + return genes_union + +class CytoscapeHandler: + def __init__(self, results): + self.results = results + + def construct_node(self, gene_obj): + if gene_obj.variant: + name = f'{gene_obj.name}({gene_obj.variant})' + curie = f'{gene_obj.curie}({gene_obj.variant})' + chp_preferred_curie = f'{gene_obj.chp_preferred_curie}({gene_obj.variant})' + else: + name = gene_obj.name + curie = gene_obj.curie + chp_preferred_curie = gene_obj.chp_preferred_curie + + node = { + "data": { + "id": str(gene_obj.pk), + "name": name, + "curie": curie, + "chp_preferred_curie": chp_preferred_curie + } + } + return node, str(gene_obj.pk) + + def construct_edge_annotations(self, annotations): + obj = {"openai": {"justification": None}, "translator": []} + tr_dict = defaultdict(list) + for a in annotations: + if a.type == 'openai': + obj["openai"]["justification"] = a.oai_justification + continue + if a.type == 'translator': + tr_dict[a.tr_formatted_relation_string].append( + { + "predicate": a.tr_predicate, + "qualified_predicate": a.tr_qualified_predicate, + "object_modifier": a.tr_object_modifier, + "object_aspect": a.tr_object_aspect, + "resource_id": a.tr_resource_id, + "primary_source": a.tr_primary_source, + } + ) + # Reformat to list + for relation, tr_results in tr_dict.items(): + obj["translator"].append( + { + "formatted_relation": relation, + "results": tr_results + } + ) + return obj + + def construct_edge(self, res, source_id, target_id): + # Normalize edge weight based on the study + normalized_weight = (res.edge_weight - res.task.min_task_edge_weight) / (res.task.max_task_edge_weight - res.task.min_task_edge_weight) + directed = res.task.algorithm_instance.algorithm.directed + edge_tuple = tuple(sorted([source_id, target_id])) + annotations = res.annotations.all() + edge = { + "data": { + "id": str(res.pk), + "source": source_id, + "target": target_id, + "dataset": str(res.task.dataset), + "weight": normalized_weight, + "algorithm": str(res.task.algorithm_instance), + "directed": directed, + "annotations": self.construct_edge_annotations(annotations), + } + } + return edge, edge_tuple, directed + + def add(self, res, nodes, edges, processed_node_ids, processed_undirected_edges): + # Construct nodes + tf_node, tf_id = self.construct_node(res.tf) + target_node, target_id = self.construct_node(res.target) + # Add nodes if not already added by another result + if tf_id not in processed_node_ids: + nodes.append(tf_node) + processed_node_ids.add(tf_id) + if target_id not in processed_node_ids: + nodes.append(target_node) + processed_node_ids.add(target_id) + # Add and construct edge + edge, edge_tuple, edge_is_directed = self.construct_edge(res, tf_id, target_id) + if edge_is_directed: + edges.append(edge) + elif not edge_is_directed and edge_tuple not in processed_undirected_edges: + edges.append(edge) + processed_undirected_edges.add(edge_tuple) + else: + pass + return nodes, edges, processed_node_ids, processed_undirected_edges + + def construct_cytoscape_data(self): + nodes = [] + edges = [] + processed_node_ids = set() + processed_undirected_edges = set() + elements = [] + # Construct graph + for res in self.results: + nodes, edges, processed_node_ids, processed_undirected_edges = self.add( + res, + nodes, + edges, + processed_node_ids, + processed_undirected_edges, + ) + elements.extend(nodes) + elements.extend(edges) + return { + "elements": elements + } + + +class CytoscapeView(APIView): + + + def get(self, request): + results = Result.objects.all() + cyto_handler = CytoscapeHandler(results) + cyto = cyto_handler.construct_cytoscape_data() + return JsonResponse(cyto) + + def post(self, request): + elements = [] + gene_ids = request.data.get("gene_ids", None) + study_ids = request.data.get("study_ids", None) + algorithm_ids = request.data.get("algorithm_ids", None) + dataset_ids = request.data.get("dataset_ids", None) + cached_inference_result_ids = request.data.get("cached_results", None) + + if not (study_ids and gene_ids) and not (algorithm_ids and dataset_ids and gene_ids): + return JsonResponse({"elements": elements, "result_ids": []}) + + # Create Filter + filters = [] + if gene_ids: + filters.extend( + [ + {"field": 'tf__pk', "operator": 'in', "value": gene_ids}, + {"field": 'target__pk', "operator": 'in', "value": gene_ids}, + ] + ) + if study_ids: + tasks = Task.objects.filter(study__pk__in=study_ids) + task_ids = [task.pk for task in tasks] + filters.append({"field": 'task__pk', "operator": 'in', "value": task_ids}) + if algorithm_ids: + filters.append({"field": 'task__algorithm_instance__algorithm__pk', "operator": 'in', "value": algorithm_ids}) + if dataset_ids: + filters.append({"field": 'task__dataset__zenodo_id', "operator": 'in', "value": dataset_ids}) + + # Construct Query + query = Q() + for filter_item in filters: + field = filter_item["field"] + operator = filter_item["operator"] + value = filter_item["value"] + query &= Q(**{f'{field}__{operator}': value}) + + # Get matching results + results = Result.objects.filter(query) + + if len(results) == 0: + return JsonResponse({"elements": elements, "result_ids": []}) + + # Exclude results that have already been sent to user + if cached_inference_result_ids: + results = results.exclude(pk__in=cached_inference_result_ids) + + # Capture result_ids + result_ids = [res.pk for res in results] + + # Initialize Cytoscape Handler + cyto_handler = CytoscapeHandler(results) + elements_dict = cyto_handler.construct_cytoscape_data() + + elements_dict["result_ids"] = result_ids + return JsonResponse(elements_dict) + +class StudyDownloadView(APIView): + permission_classes = [IsAuthenticated] + + def get(self, request, study_id=None): + if not study_id: + return JsonResponse({"detail": 'No study ID was passed.'}) + # Set response + response = { + "study_id": study_id, + } + # Get study + try: + study = Study.objects.get(pk=study_id, user=request.user) + except ObjectDoesNotExist: + response["error"] = 'The study does not exist for request user.' + return JsonResponse(response) + + response["description"] = study.description + response["status"] = study.status + response["tasks"] = [] + + # Get tasks assocaited with study + tasks = Task.objects.filter(study=study) + + # Collect task information + for task in tasks: + task_json = {} + # Collect task information + task_json["max_task_edge_weight"] = task.max_task_edge_weight + task_json["min_task_edge_weight"] = task.min_task_edge_weight + task_json["avg_task_edge_weight"] = task.avg_task_edge_weight + task_json["std_task_edge_weight"] = task.std_task_edge_weight + task_json["status"] = task.status + # Collect algo hyperparameters + hyper_instance_objs = task.algorithm_instance.hyperparameters.all() + hypers = {} + for hyper in hyper_instance_objs: + hypers[hyper.hyperparameter.name] = { + "value": hyper.value_str, + "info": hyper.hyperparameter.info + } + # Collect Algorithm instance information + task_json["algorithm"] = { + "name": task.algorithm_instance.algorithm.name, + "description": task.algorithm_instance.algorithm.description, + "edge_weight_description": task.algorithm_instance.algorithm.edge_weight_description, + "edge_weight_type": task.algorithm_instance.algorithm.edge_weight_type, + "directed": task.algorithm_instance.algorithm.directed, + "hyperparameters": hypers + } + # Collect Dataset information + task_json["dataset"] = { + "title": task.dataset.title, + "zenodo_id": task.dataset.zenodo_id, + "description": task.dataset.description, + } + # Build cytoscape graph + if task.status == 'SUCCESS': + results = Result.objects.filter(task=task) + cyto_handler = CytoscapeHandler(results) + task_json["graph"] = cyto_handler.construct_cytoscape_data() + else: + task_json["graph"] = None + response["tasks"].append(task_json) + + return JsonResponse(response) + + +class run(APIView): + permission_classes = [IsAuthenticated] + + def post(self, request): + """ Request comes in as a list of algorithms to run. + """ + study_id = request.data.get("study_id", None) + if not study_id: + return JsonResponse({"error": 'Must pass a study_id.'}) + response = { + "study_id": study_id, + "task_status": [], + } + # Get study + try: + study = Study.objects.get(pk=study_id, user=request.user) + except ObjectDoesNotExist: + response["error"] = 'The study does not exist for request user.' + return JsonResponse(response) + # Set Study Status to Started. + study.status = 'STARTED' + study.save() + # Build gennifer requests + tasks = Task.objects.filter(study=study) + for task in tasks: + # If all pass, now send to gennifer services + task_id = create_task.delay(task.pk).id + response["task_status"].append(task_id) + return JsonResponse(response) diff --git a/gunicorn.config-prod.py b/chp_api/gunicorn.config.py similarity index 93% rename from gunicorn.config-prod.py rename to chp_api/gunicorn.config.py index 32f99a5..6834b98 100644 --- a/gunicorn.config-prod.py +++ b/chp_api/gunicorn.config.py @@ -9,5 +9,4 @@ workers=10 errorlog='gunicorn-error.log' accesslog='gunicorn-access.log' -loglevel='debug' - +loglevel='debug' \ No newline at end of file diff --git a/chp_api/manage.py b/chp_api/manage.py index 81b455b..59872a5 100644 --- a/chp_api/manage.py +++ b/chp_api/manage.py @@ -3,9 +3,14 @@ import os import sys +from opentelemetry.instrumentation.django import DjangoInstrumentor def main(): os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'chp_api.settings') + + # This call is what makes the Django application be instrumented + DjangoInstrumentor().instrument() + try: from django.core.management import execute_from_command_line except ImportError as exc: @@ -16,6 +21,5 @@ def main(): ) from exc execute_from_command_line(sys.argv) - if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/chp_api/requirements.txt b/chp_api/requirements.txt new file mode 100644 index 0000000..2e5c27f --- /dev/null +++ b/chp_api/requirements.txt @@ -0,0 +1,123 @@ +amqp==5.2.0 +asgiref==3.8.1 +async-timeout==4.0.3 +attrs==23.2.0 +autocommand==2.2.2 +backports.csv==1.0.7 +backports.tarfile==1.1.1 +beautifulsoup4==4.12.3 +billiard==4.2.0 +bmt==1.4.0 +cattrs==23.2.3 +celery==5.4.0 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +cheroot==10.0.1 +CherryPy==18.9.0 +click==8.1.7 +click-didyoumean==0.3.1 +click-plugins==1.1.1 +click-repl==0.3.0 +cryptography==42.0.5 +curies==0.7.9 +Deprecated==1.2.14 +deprecation==2.1.0 +Django==4.2.11 +django-cors-headers==4.3.1 +django-environ==0.11.2 +django-extensions==3.2.3 +django-filter==24.2 +django-hosts==6.0 +django-oauth-toolkit==2.3.0 +djangorestframework==3.15.1 +djangorestframework-simplejwt==5.3.1 +exceptiongroup==1.2.1 +feedparser==6.0.11 +flower==2.0.1 +future==1.0.0 +hbreader==0.9.1 +humanize==4.9.0 +idna==3.7 +importlib-metadata==7.0.0 +inflect==7.2.0 +iniconfig==2.0.0 +isodate==0.6.1 +jaraco.collections==5.0.1 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.0 +joblib==1.4.0 +json-flattener==0.1.9 +jsonasobj2==1.0.4 +jsonschema==4.21.1 +jsonschema-specifications==2023.12.1 +jwcrypto==1.5.6 +kombu==5.3.7 +linkml-runtime==1.7.5 +lxml==5.2.1 +more-itertools==10.2.0 +mysqlclient==2.2.4 +nltk==3.8.1 +numpy==1.26.4 +oauthlib==3.2.2 +opentelemetry-api==1.24.0 +opentelemetry-instrumentation==0.45b0 +opentelemetry-instrumentation-django==0.45b0 +opentelemetry-instrumentation-wsgi==0.45b0 +opentelemetry-sdk==1.24.0 +opentelemetry-semantic-conventions==0.45b0 +opentelemetry-util-http==0.45b0 +packaging==24.0 +pandas==2.2.2 +Pattern==3.6 +pdfminer.six==20231228 +platformdirs==4.2.1 +pluggy==1.5.0 +portend==3.2.0 +prefixcommons==0.1.12 +prefixmaps==0.2.4 +prometheus_client==0.20.0 +prompt-toolkit==3.0.43 +psycopg2-binary==2.9.9 +pycparser==2.22 +pydantic==1.10.12 +PyJWT==2.8.0 +pyparsing==3.1.2 +pytest==8.1.1 +pytest-logging==2015.11.4 +python-dateutil==2.9.0.post0 +python-docx==1.1.0 +PyTrie==0.4.0 +pytz==2024.1 +PyYAML==6.0.1 +rdflib==7.0.0 +reasoner-pydantic==5.0.2 +redis==5.0.4 +referencing==0.34.0 +regex==2024.4.16 +requests==2.31.0 +requests-cache==1.2.0 +rpds-py==0.18.0 +scipy==1.13.0 +sgmllib3k==1.0.0 +six==1.16.0 +sortedcontainers==2.4.0 +soupsieve==2.5 +sqlparse==0.5.0 +stringcase==1.2.0 +tempora==5.5.1 +tomli==2.0.1 +tornado==6.4 +tqdm==4.66.2 +typeguard==4.2.1 +typing_extensions==4.11.0 +tzdata==2024.1 +url-normalize==1.4.3 +urllib3==2.2.1 +uWSGI==2.0.25.1 +vine==5.1.0 +wcwidth==0.2.13 +wrapt==1.16.0 +zc.lockfile==3.0.post1 +zipp==3.18.1 diff --git a/chp_api/requirements.txt.base b/chp_api/requirements.txt.base new file mode 100644 index 0000000..4afc956 --- /dev/null +++ b/chp_api/requirements.txt.base @@ -0,0 +1,23 @@ +tqdm +djangorestframework +djangorestframework-simplejwt +psycopg2-binary +bmt +reasoner_pydantic +django-environ +django-hosts +django +requests +requests-cache +django-filter +celery +flower +redis +pandas +django-cors-headers +django-oauth-toolkit +nltk +pattern +opentelemetry-sdk +opentelemetry-instrumentation-django +uwsgi diff --git a/chp_api/users/__init__.py b/chp_api/users/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chp_api/users/admin.py b/chp_api/users/admin.py new file mode 100644 index 0000000..88e1740 --- /dev/null +++ b/chp_api/users/admin.py @@ -0,0 +1,6 @@ +from django.contrib import admin +from django.contrib.auth.admin import UserAdmin +from .models import User + +admin.site.register(User, UserAdmin) + diff --git a/chp_api/users/apps.py b/chp_api/users/apps.py new file mode 100644 index 0000000..72b1401 --- /dev/null +++ b/chp_api/users/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class UsersConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'users' diff --git a/chp_api/users/migrations/0001_initial.py b/chp_api/users/migrations/0001_initial.py new file mode 100755 index 0000000..b754ae9 --- /dev/null +++ b/chp_api/users/migrations/0001_initial.py @@ -0,0 +1,44 @@ +# Generated by Django 4.2.2 on 2023-06-25 23:23 + +import django.contrib.auth.models +import django.contrib.auth.validators +from django.db import migrations, models +import django.utils.timezone + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('auth', '0012_alter_user_first_name_max_length'), + ] + + operations = [ + migrations.CreateModel( + name='User', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('password', models.CharField(max_length=128, verbose_name='password')), + ('last_login', models.DateTimeField(blank=True, null=True, verbose_name='last login')), + ('is_superuser', models.BooleanField(default=False, help_text='Designates that this user has all permissions without explicitly assigning them.', verbose_name='superuser status')), + ('username', models.CharField(error_messages={'unique': 'A user with that username already exists.'}, help_text='Required. 150 characters or fewer. Letters, digits and @/./+/-/_ only.', max_length=150, unique=True, validators=[django.contrib.auth.validators.UnicodeUsernameValidator()], verbose_name='username')), + ('first_name', models.CharField(blank=True, max_length=150, verbose_name='first name')), + ('last_name', models.CharField(blank=True, max_length=150, verbose_name='last name')), + ('email', models.EmailField(blank=True, max_length=254, verbose_name='email address')), + ('is_staff', models.BooleanField(default=False, help_text='Designates whether the user can log into this admin site.', verbose_name='staff status')), + ('is_active', models.BooleanField(default=True, help_text='Designates whether this user should be treated as active. Unselect this instead of deleting accounts.', verbose_name='active')), + ('date_joined', models.DateTimeField(default=django.utils.timezone.now, verbose_name='date joined')), + ('groups', models.ManyToManyField(blank=True, help_text='The groups this user belongs to. A user will get all permissions granted to each of their groups.', related_name='user_set', related_query_name='user', to='auth.group', verbose_name='groups')), + ('user_permissions', models.ManyToManyField(blank=True, help_text='Specific permissions for this user.', related_name='user_set', related_query_name='user', to='auth.permission', verbose_name='user permissions')), + ], + options={ + 'verbose_name': 'user', + 'verbose_name_plural': 'users', + 'abstract': False, + }, + managers=[ + ('objects', django.contrib.auth.models.UserManager()), + ], + ), + ] diff --git a/chp_api/users/migrations/__init__.py b/chp_api/users/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/chp_api/users/models.py b/chp_api/users/models.py new file mode 100644 index 0000000..da5acd7 --- /dev/null +++ b/chp_api/users/models.py @@ -0,0 +1,5 @@ +from django.db import models +from django.contrib.auth.models import AbstractUser + +class User(AbstractUser): + pass diff --git a/chp_api/users/serializers.py b/chp_api/users/serializers.py new file mode 100644 index 0000000..53309e2 --- /dev/null +++ b/chp_api/users/serializers.py @@ -0,0 +1,9 @@ +from django.contrib.auth import get_user_model + +from rest_framework import serializers + + +class UserSerializer(serializers.ModelSerializer): + class Meta: + model = get_user_model() + fields = ['username', 'email', 'first_name', 'last_name'] diff --git a/chp_api/users/tests.py b/chp_api/users/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/chp_api/users/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/chp_api/users/urls.py b/chp_api/users/urls.py new file mode 100644 index 0000000..a1c2f77 --- /dev/null +++ b/chp_api/users/urls.py @@ -0,0 +1,7 @@ +from django.urls import path, include + +from .views import UserDetails + +urlpatterns = [ + path('me/', UserDetails.as_view()) + ] diff --git a/chp_api/users/views.py b/chp_api/users/views.py new file mode 100644 index 0000000..8aaaf81 --- /dev/null +++ b/chp_api/users/views.py @@ -0,0 +1,14 @@ +from rest_framework import permissions +from rest_framework.views import APIView +from rest_framework.response import Response +from oauth2_provider.contrib.rest_framework import TokenHasReadWriteScope + +from .serializers import UserSerializer + + +class UserDetails(APIView): + permission_classes = [permissions.IsAuthenticated, TokenHasReadWriteScope] + + def get(self, request): + user = request.user + return Response(UserSerializer(user).data) diff --git a/compose.chp-api.yaml b/compose.chp-api.yaml new file mode 100644 index 0000000..b768bba --- /dev/null +++ b/compose.chp-api.yaml @@ -0,0 +1,229 @@ +version: '3.8' + +services: + + nginx-proxy: + build: nginx + restart: always + volumes: + - ./nginx/default.conf:/tmp/default.conf + environment: + - DJANGO_SERVER_ADDR=api:8000 + - STATIC_SERVER_ADDR=static-fs:8080 + - FLOWER_DASHBOARD_ADDR=dashboard:5556 + - NEXTJS_SERVER_ADDR=frontend:3000 + #- NEXTJS_SERVER_ADDR=api:8000 + ports: + - "80:80" + depends_on: + - api + healthcheck: + test: ["CMD-SHELL", "curl --silent --fail localhost:80/health-check || exit 1"] + interval: 10s + timeout: 10s + retries: 3 + command: /app/start.sh + + api: + build: + context: ./chp_api + dockerfile: Dockerfile + restart: always + user: chp_api + ports: + - '8000:8000' + secrets: + - db-password + - django-key + - allowed-hosts + - csrf-trusted-origins + - django-superuser-username + - django-superuser-email + - django-superuser-password + environment: + - POSTGRES_DB=chpapi + - POSTGRES_USER=chpapi_user + - POSTGRES_PASSWORD_FILE=/run/secrets/db-password + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + - DEBUG=0 + # For Helm testing purposes + #- POSTGRES_PASSWORD=31173e51d8f78b56606d06dfb66a1b126630cdf4711bed9427025d8979976f31 + #- SECRET_KEY=e1743ca40af220389cd1165d213e3d677f2d59c00d7b0f94e7a302c91f95f029 + #- DJANGO_ALLOWED_HOSTS=localhost,chp.thayer.dartmouth.edu + - CSRF_TRUSTED_ORIGINS=http://localhost,https://chp.thayer.dartmouth.edu + #- DJANGO_SUPERUSER_USERNAME=chp_admin + #- DJANGO_SUPERUSER_EMAIL=chp_admin@chp.com + #- DJANGO_SUPERUSER_PASSWORD=e12ff26f070819d9a72e317898148679680e6b3976e464b4102bd6eb18357919 + - SECRET_KEY_FILE=/run/secrets/django-key + #- CSRF_TRUSTED_ORIGINS_FILE=/run/secrets/csrf-trusted-origins + - DJANGO_ALLOWED_HOSTS_FILE=/run/secrets/allowed-hosts + - DJANGO_SUPERUSER_USERNAME_FILE=/run/secrets/django-superuser-username + - DJANGO_SUPERUSER_EMAIL_FILE=/run/secrets/django-superuser-email + - DJANGO_SUPERUSER_PASSWORD_FILE=/run/secrets/django-superuser-password + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + # Uncomment this for production + #- DJANGO_SETTINGS_MODULE=mysite.settings.production + # Comment this for development + - DJANGO_SETTINGS_MODULE=chp_api.settings + # For Open Telemetry + - OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED=true + - OTEL_TRACES_EXPORTER=jaeger + - OTEL_EXPORTER_JAEGER_AGENT_HOST=jaeger-otel-agent.sri + - OTEL_EXPORTER_JAEGER_AGENT_PORT=6831 + depends_on: + - static-fs + - db + #condition: service_healthy + healthcheck: + #test: ["CMD-SHELL", "curl --silent --fail localhost:8000/flask-health-check || exit 1"] + interval: 10s + timeout: 10s + retries: 3 + volumes: + - static-files:/home/chp_api/staticfiles + #command: uwsgi --http :8000 --max-requests=200 --master --pidfile=/tmp/project-master.pid --logto /tmp/mylog.log --module chp_api.wsgi:application + #command: opentelemetry-instrument --traces_exporter console --metrics_exporter console uwsgi --http :8000 --max-requests=200 --master --pidfile=/tmp/project-master.pid --module chp_api.wsgi:application + command: opentelemetry-instrument --traces_exporter jaeger --metrics_exporter console uwsgi --http :8000 --max-requests=200 --master --pidfile=/tmp/project-master.pid --module chp_api.wsgi:application + #command: gunicorn -c gunicorn.config.py -b 0.0.0.0:8000 chp_api.wsgi:application + #command: python3 manage.py runserver 0.0.0.0:8000 + + worker-api: + build: + context: ./chp_api + dockerfile: Dockerfile + restart: always + secrets: + - db-password + - django-key + - allowed-hosts + - django-superuser-username + - django-superuser-email + - django-superuser-password + environment: + - POSTGRES_DB=chpapi + - POSTGRES_USER=chpapi_user + - POSTGRES_PASSWORD_FILE=/run/secrets/db-password + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + - DEBUG=0 + # For Helm testing purposes + #- POSTGRES_PASSWORD=31173e51d8f78b56606d06dfb66a1b126630cdf4711bed9427025d8979976f31 + #- SECRET_KEY=e1743ca40af220389cd1165d213e3d677f2d59c00d7b0f94e7a302c91f95f029 + #- DJANGO_ALLOWED_HOSTS=localhost,chp.thayer.dartmouth.edu + - CSRF_TRUSTED_ORIGINS=http://localhost,https://chp.thayer.dartmouth.edu + #- DJANGO_SUPERUSER_USERNAME=chp_admin + #- DJANGO_SUPERUSER_EMAIL=chp_admin@chp.com + #- DJANGO_SUPERUSER_PASSWORD=e12ff26f070819d9a72e317898148679680e6b3976e464b4102bd6eb18357919 + - SECRET_KEY_FILE=/run/secrets/django-key + #- CSRF_TRUSTED_ORIGINS_FILE=/run/secrets/csrf-trusted-origins + - DJANGO_ALLOWED_HOSTS_FILE=/run/secrets/allowed-hosts + - DJANGO_SUPERUSER_USERNAME_FILE=/run/secrets/django-superuser-username + - DJANGO_SUPERUSER_EMAIL_FILE=/run/secrets/django-superuser-email + - DJANGO_SUPERUSER_PASSWORD_FILE=/run/secrets/django-superuser-password + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: celery -A chp_api worker -Q chp_api --loglevel=info + depends_on: + - api + - redis + + dashboard: + build: + context: ./chp_api + dockerfile: Dockerfile + restart: always + secrets: + - db-password + - django-key + - allowed-hosts + - django-superuser-username + - django-superuser-email + - django-superuser-password + environment: + - POSTGRES_DB=chpapi + - POSTGRES_USER=chpapi_user + - POSTGRES_PASSWORD_FILE=/run/secrets/db-password + - POSTGRES_HOST=db + - POSTGRES_PORT=5432 + - DEBUG=0 + # For Helm testing purposes + #- POSTGRES_PASSWORD=31173e51d8f78b56606d06dfb66a1b126630cdf4711bed9427025d8979976f31 + #- SECRET_KEY=e1743ca40af220389cd1165d213e3d677f2d59c00d7b0f94e7a302c91f95f029 + #- DJANGO_ALLOWED_HOSTS=localhost,chp.thayer.dartmouth.edu + - CSRF_TRUSTED_ORIGINS=http://localhost,https://chp.thayer.dartmouth.edu + #- DJANGO_SUPERUSER_USERNAME=chp_admin + #- DJANGO_SUPERUSER_EMAIL=chp_admin@chp.com + #- DJANGO_SUPERUSER_PASSWORD=e12ff26f070819d9a72e317898148679680e6b3976e464b4102bd6eb18357919 + - SECRET_KEY_FILE=/run/secrets/django-key + #- CSRF_TRUSTED_ORIGINS_FILE=/run/secrets/csrf-trusted-origins + - DJANGO_ALLOWED_HOSTS_FILE=/run/secrets/allowed-hosts + - DJANGO_SUPERUSER_USERNAME_FILE=/run/secrets/django-superuser-username + - DJANGO_SUPERUSER_EMAIL_FILE=/run/secrets/django-superuser-email + - DJANGO_SUPERUSER_PASSWORD_FILE=/run/secrets/django-superuser-password + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: celery -A chp_api --broker=redis://redis:6379/0 flower --port=5555 + ports: + - 5556:5555 + depends_on: + - api + - redis + - worker-api + + redis: + restart: always + image: redis:6-alpine + + db: + image: postgres + restart: always + secrets: + - db-password + volumes: + - db-data:/var/lib/postgresql/data + environment: + - POSTGRES_DB=chpapi + - POSTGRES_USER=chpapi_user + - POSTGRES_PASSWORD_FILE=/run/secrets/db-password + # For Helm testing purposes + #- POSTGRES_PASSWORD=31173e51d8f78b56606d06dfb66a1b126630cdf4711bed9427025d8979976f31 + expose: + - 5432 + healthcheck: + test: [ "CMD", "pg_isready -d chpapi -U chpapi_user" ] + interval: 10s + timeout: 5s + retries: 5 + + static-fs: + image: halverneus/static-file-server:latest + restart: always + environment: + - FOLDER=/var/www + - DEBUG=true + expose: + - 8080 + volumes: + - static-files:/var/www/static + +volumes: + db-data: + static-files: + +secrets: + allowed-hosts: + file: secrets/chp_api/allowed_hosts.txt + csrf-trusted-origins: + file: secrets/chp_api/csrf_trusted_origins.txt + db-password: + file: secrets/db/password.txt + django-key: + file: secrets/chp_api/secret_key.txt + django-superuser-username: + file: secrets/chp_api/django_superuser_username.txt + django-superuser-email: + file: secrets/chp_api/django_superuser_email.txt + django-superuser-password: + file: secrets/chp_api/django_superuser_password.txt diff --git a/compose.gennifer.yaml b/compose.gennifer.yaml new file mode 100644 index 0000000..28230bd --- /dev/null +++ b/compose.gennifer.yaml @@ -0,0 +1,231 @@ +version: '3.8' + +services: + frontend: + build: + context: ./gennifer/frontend + dockerfile: Dockerfile + restart: always + ports: + - 3000:3000 + environment: + - NEXTAUTH_SECRET=X/NTPIqf088gXiYFi7WF0iH3NRJRPE3nZ0oOkRXf5es= + - NEXTAUTH_URL=https://chp.thayer.dartmouth.edu + - NEXTAUTH_URL_INTERNAL=http://127.0.0.1:3000 + - CREDENTIALS_URL=https://chp.thayer.dartmouth.edu/o/token/ + - GENNIFER_CLIENT_ID=jHM4ETk5wi2WUVPElpMFJtZqwY2oBKHVMmTsY9ry + - GENNIFER_CLIENT_SECRET=hY0XfS8YLGMojWuvUOPga4sJpEO9isltF7Xk7wXjyFHwWmxkifRXcPbnmhUM0oVO4Zlz349jbtBePIlaafkWubReqEBCoIcCzaZLa2a9pIlq55yow2TBvMDHnImrXvig + - GENNIFER_USER_DETAILS_URL=https://chp.thayer.dartmouth.edu/users/me/ + - GENNIFER_BASE_URL=https://chp.thayer.dartmouth.edu/gennifer/api/ + - NEXT_PUBLIC_GENNIFER_BASE_URL=https://chp.thayer.dartmouth.edu/gennifer/api/ + + pidc: + build: + context: ./gennifer/pidc + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5004:5000 + secrets: + - gennifer_key + environment: + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'pidc:create_app()' + #command: flask --app pidc run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-pidc: + build: + context: ./gennifer/pidc + dockerfile: Dockerfile + restart: always + command: celery --app pidc.tasks.celery worker -Q pidc --loglevel=info + environment: + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + depends_on: + - pidc + - redis + + grisli: + build: + context: ./gennifer/grisli + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5005:5000 + secrets: + - gennifer_key + environment: + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - PYTHONUNBUFFERED=1 + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'grisli:create_app()' + #command: flask --app grisli run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-grisli: + build: + context: ./gennifer/grisli + dockerfile: Dockerfile + restart: always + command: celery --app grisli.tasks.celery worker -Q grisli --loglevel=info + environment: + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - PYTHONUNBUFFERED=1 + depends_on: + - grisli + - redis + + genie3: + build: + context: ./gennifer/genie3 + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5006:5000 + secrets: + - gennifer_key + environment: + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'genie3:create_app()' + #command: flask --app genie3 run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-genie3: + build: + context: ./gennifer/genie3 + dockerfile: Dockerfile + restart: always + command: celery --app genie3.tasks.celery worker -Q genie3 --loglevel=info + environment: + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + depends_on: + - genie3 + - redis + + grnboost2: + build: + context: ./gennifer/grnboost2 + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5007:5000 + secrets: + - gennifer_key + environment: + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'grnboost2:create_app()' + #command: flask --app grnboost2 run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-grnboost2: + build: + context: ./gennifer/grnboost2 + dockerfile: Dockerfile + restart: always + command: celery --app grnboost2.tasks.celery worker -Q grnboost2 --loglevel=info + environment: + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + depends_on: + - grnboost2 + - redis + + bkb-grn: + build: + context: ./gennifer/bkb-grn + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5008:5000 + secrets: + - gennifer_key + environment: + - PYTHONUNBUFFERED=1 + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'bkb_grn:create_app()' + #command: flask --app bkb_grn run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-bkb-grn: + build: + context: ./gennifer/bkb-grn + dockerfile: Dockerfile + restart: always + secrets: + - gurobi_lic + command: celery --app bkb_grn.tasks.celery worker -Q bkb_grn --loglevel=info + environment: + - PYTHONUNBUFFERED=1 + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - GRB_LICENSE_FILE=/run/secrets/gurobi_lic + depends_on: + - bkb-grn + - redis + + annotator: + build: + context: ./gennifer/annotator + dockerfile: Dockerfile + restart: always + user: gennifer_user + ports: + - 5009:5000 + secrets: + - gennifer_key + - openai_api_key + environment: + - SECRET_KEY_FILE=/run/secrets/gennifer_key + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - OPENAI_API_KEY_FILE=/run/secrets/openai_api_key + command: gunicorn -c gunicorn.config.py -b 0.0.0.0:5000 'annotator:create_app()' + #command: flask --app annotator run --debug --host 0.0.0.0 + depends_on: + - redis + + worker-annotator: + build: + context: ./gennifer/annotator + dockerfile: Dockerfile + command: celery --app annotator.tasks.celery worker -Q annotation --loglevel=info + secrets: + - openai_api_key + environment: + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/0 + - OPENAI_API_KEY_FILE=/run/secrets/openai_api_key + depends_on: + - annotator + - redis + +secrets: + gennifer_key: + file: secrets/gennifer/secret_key.txt + gurobi_lic: + file: secrets/gennifer/gurobi.lic + openai_api_key: + file: secrets/gennifer/openai_api_key.txt diff --git a/copy-migrations b/copy-migrations new file mode 100755 index 0000000..7f9244c --- /dev/null +++ b/copy-migrations @@ -0,0 +1,5 @@ +#!/bin/bash + +docker compose -f compose.chp-api.yaml run -v migrations:/home/migrations \ + --user root api \ + bash -c "python3 manage.py makemigrations && cp -r /home/chp_api/web/dispatcher/migrations /home/migrations/dispatcher && cp -r /home/chp_api/web/gennifer/migrations /home/migrations/gennifer && cp -r /home/chp_api/web/users/migrations /home/migrations/users" diff --git a/deploy/chp-api/Chart.yaml b/deploy/chp-api/Chart.yaml index 61b8921..6066d70 100644 --- a/deploy/chp-api/Chart.yaml +++ b/deploy/chp-api/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.1.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to diff --git a/deploy/chp-api/Jenkinsfile b/deploy/chp-api/Jenkinsfile index ac12cbd..799a5de 100644 --- a/deploy/chp-api/Jenkinsfile +++ b/deploy/chp-api/Jenkinsfile @@ -1,11 +1,10 @@ pipeline { options { timestamps() - skipDefaultCheckout() disableConcurrentBuilds() } agent { - node { label 'translator && aws && build' } + node { label 'translator && aws && build && chp' } } parameters { string(name: 'BUILD_VERSION', defaultValue: '', description: 'The build version to deploy (optional)') @@ -16,7 +15,7 @@ pipeline { pollSCM('H/5 * * * *') } environment { - DOCKER_REPO_NAME = "translator-ea-chp-api" + DOCKER_REPO_NAME = "853771734544.dkr.ecr.us-east-1.amazonaws.com/translator-ea-chp-api" } stages { stage('Build Version'){ @@ -33,42 +32,49 @@ pipeline { } } } - stage('Checkout source code') { - steps { - cleanWs() - checkout scm - } - } stage('Build Docker') { when { expression { return env.BUILD == 'true' }} steps { script { - docker.build(env.DOCKER_REPO_NAME, "--no-cache -f ./chp_api/Dockerfile.prod ./chp_api") - docker.withRegistry('https://853771734544.dkr.ecr.us-east-1.amazonaws.com', 'ecr:us-east-1:aws-ifx-deploy') { - docker.image(env.DOCKER_REPO_NAME).push("${BUILD_VERSION}") + dir('chp_api') { + script { + docker.build(env.DOCKER_REPO_NAME, "--no-cache -f ./Dockerfile ./") + } } + sh 'docker login -u AWS -p $(aws ecr get-login-password --region us-east-1) 853771734544.dkr.ecr.us-east-1.amazonaws.com' + docker.image(env.DOCKER_REPO_NAME).push("${BUILD_VERSION}") + sh 'cp deploy/chp-api/configs/nginx.conf deploy/chp-api/nginx/' + docker.build(env.DOCKER_REPO_NAME, "--no-cache ./deploy/chp-api/nginx") + docker.image(env.DOCKER_REPO_NAME).push("${BUILD_VERSION}-nginx") sh ''' - cp deploy/chp-api/configs/nginx.conf deploy/chp-api/nginx/ + docker pull halverneus/static-file-server:latest + docker tag halverneus/static-file-server:latest $DOCKER_REPO_NAME ''' - docker.build(env.DOCKER_REPO_NAME, "--no-cache ./deploy/chp-api/nginx") - docker.withRegistry('https://853771734544.dkr.ecr.us-east-1.amazonaws.com', 'ecr:us-east-1:aws-ifx-deploy') { - docker.image(env.DOCKER_REPO_NAME).push("${BUILD_VERSION}-nginx") - } + docker.image(env.DOCKER_REPO_NAME).push("${BUILD_VERSION}-staticfs") } } } stage('Deploy to AWS EKS') { + agent { label 'translator && ci && deploy'} steps { - configFileProvider([ - configFile(fileId: 'values-ci.yaml', targetLocation: 'deploy/chp-api/values.ncats.yaml') - ]){ - withAWS(credentials:'aws-ifx-deploy') - { - sh ''' + configFileProvider([ + configFile(fileId: 'values-ci.yaml', targetLocation: 'deploy/chp-api/values-ncats.yaml'), + configFile(fileId: 'prepare.sh', targetLocation: 'deploy/chp-api/prepare.sh') + ]){ + script { + sh '''#!/bin/bash aws --region ${AWS_REGION} eks update-kubeconfig --name ${KUBERNETES_CLUSTER_NAME} - cd deploy/chp-api && /bin/bash deploy.sh + cd deploy/chp-api + source prepare.sh + /bin/bash deploy.sh ''' - } + } + } + } + post { + always { + echo " Clean up the workspace in deploy node!" + cleanWs() } } } diff --git a/deploy/chp-api/configs/nginx.conf b/deploy/chp-api/configs/nginx.conf index 2a2e900..3efc112 100644 --- a/deploy/chp-api/configs/nginx.conf +++ b/deploy/chp-api/configs/nginx.conf @@ -2,6 +2,10 @@ upstream chp_api_app { server localhost:8000; } +upstream chp_staticfs { + server localhost:8080; +} + server { listen 80; @@ -16,8 +20,11 @@ server { proxy_send_timeout 360; proxy_connect_timeout 360; } - - location /staticfiles/ { - alias /home/chp_api/web/staticfiles/; - } + + location /static { + proxy_pass http://chp_staticfs; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } } diff --git a/deploy/chp-api/deploy.sh b/deploy/chp-api/deploy.sh index eb2a20d..5c062dc 100644 --- a/deploy/chp-api/deploy.sh +++ b/deploy/chp-api/deploy.sh @@ -22,7 +22,15 @@ do rm values.yaml.bak done +sed -i.bak \ + -e "s/APP_SECRET_KEY_VALUE/$APP_SECRET_KEY/g" \ + -e "s/DB_USERNAME_VALUE/$DB_USERNAME/g" \ + -e "s/DB_PASSWORD_VALUE/$DB_PASSWORD/g" \ + -e "s/DJANGO_SUPERUSER_PASSWORD_VALUE/$DJANGO_SUPERUSER_PASSWORD/g" \ + values-ncats.yaml +rm values-ncats.yaml.bak + kubectl apply -f namespace.yaml # deploy helm chart -helm -n ${namespace} upgrade --install ${projectName} -f values.ncats.yaml ./ \ No newline at end of file +helm -n ${namespace} upgrade --install ${projectName} -f values-ncats.yaml ./ \ No newline at end of file diff --git a/deploy/chp-api/static-file-server/Dockerfile b/deploy/chp-api/static-file-server/Dockerfile new file mode 100755 index 0000000..ff58c95 --- /dev/null +++ b/deploy/chp-api/static-file-server/Dockerfile @@ -0,0 +1,41 @@ +################################################################################ +## GO BUILDER +################################################################################ +FROM golang:1.20.2 as builder + +ENV VERSION 1.8.8 +ENV CGO_ENABLED 0 +ENV BUILD_DIR /build + +RUN mkdir -p ${BUILD_DIR} +WORKDIR ${BUILD_DIR} + +COPY go.* ./ +RUN go mod download +COPY . . + +RUN go test -cover ./... +RUN go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o /serve /build/bin/serve + +RUN adduser --system --no-create-home --uid 1000 --shell /usr/sbin/nologin static + +################################################################################ +## DEPLOYMENT CONTAINER +################################################################################ +FROM scratch + +EXPOSE 8080 +COPY --from=builder /serve / +COPY --from=builder /etc/passwd /etc/passwd + +USER static +ENTRYPOINT ["/serve"] +CMD [] + +# Metadata +LABEL life.apets.vendor="Halverneus" \ + life.apets.url="https://github.com/halverneus/static-file-server" \ + life.apets.name="Static File Server" \ + life.apets.description="A tiny static file server" \ + life.apets.version="v1.8.8" \ + life.apets.schema-version="1.0" diff --git a/deploy/chp-api/static-file-server/Dockerfile.all b/deploy/chp-api/static-file-server/Dockerfile.all new file mode 100644 index 0000000..79d17c7 --- /dev/null +++ b/deploy/chp-api/static-file-server/Dockerfile.all @@ -0,0 +1,26 @@ +FROM golang:1.20.2 as builder + +ENV VERSION 1.8.8 +ENV BUILD_DIR /build +ENV CGO_ENABLED 0 + +RUN mkdir -p ${BUILD_DIR} +WORKDIR ${BUILD_DIR} + +COPY . . +RUN go test -cover ./... +RUN GOOS=linux GOARCH=amd64 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/linux-amd64/serve /build/bin/serve +RUN GOOS=linux GOARCH=386 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/linux-i386/serve /build/bin/serve +RUN GOOS=linux GOARCH=arm GOARM=6 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/linux-arm6/serve /build/bin/serve +RUN GOOS=linux GOARCH=arm GOARM=7 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/linux-arm7/serve /build/bin/serve +RUN GOOS=linux GOARCH=arm64 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/linux-arm64/serve /build/bin/serve +RUN GOOS=darwin GOARCH=amd64 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/darwin-amd64/serve /build/bin/serve +RUN GOOS=windows GOARCH=amd64 go build -a -tags netgo -installsuffix netgo -ldflags "-s -w -X github.com/halverneus/static-file-server/cli/version.version=${VERSION}" -o pkg/win-amd64/serve.exe /build/bin/serve + +# Metadata +LABEL life.apets.vendor="Halverneus" \ + life.apets.url="https://github.com/halverneus/static-file-server" \ + life.apets.name="Static File Server" \ + life.apets.description="A tiny static file server" \ + life.apets.version="v1.8.8" \ + life.apets.schema-version="1.0" diff --git a/deploy/chp-api/static-file-server/LICENSE b/deploy/chp-api/static-file-server/LICENSE new file mode 100644 index 0000000..dd8824a --- /dev/null +++ b/deploy/chp-api/static-file-server/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Jeromy Streets + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/deploy/chp-api/static-file-server/README.md b/deploy/chp-api/static-file-server/README.md new file mode 100644 index 0000000..abd3757 --- /dev/null +++ b/deploy/chp-api/static-file-server/README.md @@ -0,0 +1,153 @@ +# static-file-server + + + + + +## Introduction + +Tiny, simple static file server using environment variables for configuration. +Install from any of the following locations: + +- Docker Hub: https://hub.docker.com/r/halverneus/static-file-server/ +- GitHub: https://github.com/halverneus/static-file-server + +## Configuration + +### Environment Variables + +Default values are shown with the associated environment variable. + +```bash +# Enables resource access from any domain. +CORS=false + +# Enable debugging for troubleshooting. If set to 'true' this prints extra +# information during execution. IMPORTANT NOTE: The configuration summary is +# printed to stdout while logs generated during execution are printed to stderr. +DEBUG=false + +# Optional Hostname for binding. Leave black to accept any incoming HTTP request +# on the prescribed port. +HOST= + +# If assigned, must be a valid port number. +PORT=8080 + +# When set to 'true' the index.html file in the folder will be served. And +# the file list will not be served. +ALLOW_INDEX=true + +# Automatically serve the index of file list for a given directory (default). +SHOW_LISTING=true + +# Folder with the content to serve. +FOLDER=/web + +# URL path prefix. If 'my.file' is in the root of $FOLDER and $URL_PREFIX is +# '/my/place' then file is retrieved with 'http://$HOST:$PORT/my/place/my.file'. +URL_PREFIX= + +# Paths to the TLS certificate and key. If one is set then both must be set. If +# both set then files are served using HTTPS. If neither are set then files are +# served using HTTP. +TLS_CERT= +TLS_KEY= + +# If TLS certificates are set then the minimum TLS version may also be set. If +# the value isn't set then the default minimum TLS version is 1.0. Allowed +# values include "TLS10", "TLS11", "TLS12" and "TLS13" for TLS1.0, TLS1.1, +# TLS1.2 and TLS1.3, respectively. The value is not case-sensitive. +TLS_MIN_VERS= + +# List of accepted HTTP referrers. Return 403 if HTTP header `Referer` does not +# match prefixes provided in the list. +# Examples: +# 'REFERRERS=http://localhost,https://...,https://another.name' +# To accept missing referrer header, add a blank entry (start comma): +# 'REFERRERS=,http://localhost,https://another.name' +REFERRERS= + +# Use key / code parameter in the request URL for access control. The code is +# computed by requested PATH and your key. +# Example: +# ACCESS_KEY=username +# To access your file, either access: +# http://$HOST:$PORT/my/place/my.file?key=username +# or access (md5sum of "/my/place/my.fileusername"): +# http://$HOST:$PORT/my/place/my.file?code=44356A355E89D9EE7B2D5687E48024B0 +ACCESS_KEY= +``` + +### YAML Configuration File + +YAML settings are individually overridden by the corresponding environment +variable. The following is an example configuration file with defaults. Pass in +the path to the configuration file using the command line option +('-c', '-config', '--config'). + +```yaml +cors: false +debug: false +folder: /web +host: "" +port: 8080 +referrers: [] +show-listing: true +tls-cert: "" +tls-key: "" +tls-min-vers: "" +url-prefix: "" +access-key: "" +``` + +Example configuration with possible alternative values: + +```yaml +debug: true +folder: /var/www +port: 80 +referrers: + - http://localhost + - https://mydomain.com +``` + +## Deployment + +### Without Docker + +```bash +PORT=8888 FOLDER=. ./serve +``` + +Files can then be accessed by going to http://localhost:8888/my/file.txt + +### With Docker + +```bash +docker run -d \ + -v /my/folder:/web \ + -p 8080:8080 \ + halverneus/static-file-server:latest +``` + +This will serve the folder "/my/folder" over http://localhost:8080/my/file.txt + +Any of the variables can also be modified: + +```bash +docker run -d \ + -v /home/me/dev/source:/content/html \ + -v /home/me/dev/files:/content/more/files \ + -e FOLDER=/content \ + -p 8080:8080 \ + halverneus/static-file-server:latest +``` + +### Getting Help + +```bash +./serve help +# OR +docker run -it halverneus/static-file-server:latest help +``` diff --git a/deploy/chp-api/static-file-server/bin/serve/main.go b/deploy/chp-api/static-file-server/bin/serve/main.go new file mode 100644 index 0000000..127b879 --- /dev/null +++ b/deploy/chp-api/static-file-server/bin/serve/main.go @@ -0,0 +1,13 @@ +package main + +import ( + "log" + + "github.com/halverneus/static-file-server/cli" +) + +func main() { + if err := cli.Execute(); nil != err { + log.Fatalf("Error: %v\n", err) + } +} diff --git a/deploy/chp-api/static-file-server/cli/args.go b/deploy/chp-api/static-file-server/cli/args.go new file mode 100644 index 0000000..3e160f9 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/args.go @@ -0,0 +1,27 @@ +package cli + +// Args parsed from the command-line. +type Args []string + +// Parse command-line arguments into Args. Value is returned to support daisy +// chaining. +func Parse(values []string) Args { + args := Args(values) + return args +} + +// Matches is used to determine if the arguments match the provided pattern. +func (args Args) Matches(pattern ...string) bool { + // If lengths don't match then nothing does. + if len(pattern) != len(args) { + return false + } + + // Compare slices using '*' as a wildcard. + for index, value := range pattern { + if "*" != value && value != args[index] { + return false + } + } + return true +} diff --git a/deploy/chp-api/static-file-server/cli/args_test.go b/deploy/chp-api/static-file-server/cli/args_test.go new file mode 100644 index 0000000..90e5679 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/args_test.go @@ -0,0 +1,81 @@ +package cli + +import ( + "testing" +) + +func TestParse(t *testing.T) { + matches := func(args Args, orig []string) bool { + if nil == orig { + return nil == args + } + if len(orig) != len(args) { + return false + } + for index, value := range args { + if orig[index] != value { + return false + } + } + return true + } + + testCases := []struct { + name string + value []string + }{ + {"Nil arguments", nil}, + {"No arguments", []string{}}, + {"Arguments", []string{"first", "second", "*"}}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if args := Parse(tc.value); !matches(args, tc.value) { + t.Errorf("Expected [%v] but got [%v]", tc.value, args) + } + }) + } +} + +func TestMatches(t *testing.T) { + testCases := []struct { + name string + value []string + pattern []string + result bool + }{ + {"Nil args and nil pattern", nil, nil, true}, + {"No args and nil pattern", []string{}, nil, true}, + {"Nil args and no pattern", nil, []string{}, true}, + {"No args and no pattern", []string{}, []string{}, true}, + {"Nil args and pattern", nil, []string{"test"}, false}, + {"No args and pattern", []string{}, []string{"test"}, false}, + {"Args and nil pattern", []string{"test"}, nil, false}, + {"Args and no pattern", []string{"test"}, []string{}, false}, + {"Simple single compare", []string{"test"}, []string{"test"}, true}, + {"Simple double compare", []string{"one", "two"}, []string{"one", "two"}, true}, + {"Bad single", []string{"one"}, []string{"two"}, false}, + {"Bad double", []string{"one", "two"}, []string{"one", "owt"}, false}, + {"Count mismatch", []string{"one", "two"}, []string{"one"}, false}, + {"Nil args and wild", nil, []string{"*"}, false}, + {"No args and wild", []string{}, []string{"*"}, false}, + {"Single arg and wild", []string{"one"}, []string{"*"}, true}, + {"Double arg and first wild", []string{"one", "two"}, []string{"*", "two"}, true}, + {"Double arg and second wild", []string{"one", "two"}, []string{"one", "*"}, true}, + {"Double arg and first wild mismatched", []string{"one", "two"}, []string{"*", "owt"}, false}, + {"Double arg and second wild mismatched", []string{"one", "two"}, []string{"eno", "*"}, false}, + {"Double arg and double wild", []string{"one", "two"}, []string{"*", "*"}, true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + args := Parse(tc.value) + if resp := args.Matches(tc.pattern...); tc.result != resp { + msg := "For arguments [%v] matched to pattern [%v] expected " + + "%b but got %b" + t.Errorf(msg, tc.value, tc.pattern, tc.result, resp) + } + }) + } +} diff --git a/deploy/chp-api/static-file-server/cli/execute.go b/deploy/chp-api/static-file-server/cli/execute.go new file mode 100644 index 0000000..cf54266 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/execute.go @@ -0,0 +1,95 @@ +package cli + +import ( + "flag" + "fmt" + + "github.com/halverneus/static-file-server/cli/help" + "github.com/halverneus/static-file-server/cli/server" + "github.com/halverneus/static-file-server/cli/version" + "github.com/halverneus/static-file-server/config" +) + +var ( + option struct { + configFile string + helpFlag bool + versionFlag bool + } +) + +// Assignments used to simplify testing. +var ( + selectRoutine = selectionRoutine + unknownArgsFunc = unknownArgs + runServerFunc = server.Run + runHelpFunc = help.Run + runVersionFunc = version.Run + loadConfig = config.Load +) + +func init() { + setupFlags() +} + +func setupFlags() { + flag.StringVar(&option.configFile, "config", "", "") + flag.StringVar(&option.configFile, "c", "", "") + flag.BoolVar(&option.helpFlag, "help", false, "") + flag.BoolVar(&option.helpFlag, "h", false, "") + flag.BoolVar(&option.versionFlag, "version", false, "") + flag.BoolVar(&option.versionFlag, "v", false, "") +} + +// Execute CLI arguments. +func Execute() (err error) { + // Parse flag options, then parse commands arguments. + flag.Parse() + args := Parse(flag.Args()) + + job := selectRoutine(args) + return job() +} + +func selectionRoutine(args Args) func() error { + switch { + + // serve help + // serve --help + // serve -h + case args.Matches("help") || option.helpFlag: + return runHelpFunc + + // serve version + // serve --version + // serve -v + case args.Matches("version") || option.versionFlag: + return runVersionFunc + + // serve + case args.Matches(): + return withConfig(runServerFunc) + + // Unknown arguments. + default: + return unknownArgsFunc(args) + } +} + +func unknownArgs(args Args) func() error { + return func() error { + return fmt.Errorf( + "unknown arguments provided [%v], try: 'help'", + args, + ) + } +} + +func withConfig(routine func() error) func() error { + return func() (err error) { + if err = loadConfig(option.configFile); nil != err { + return + } + return routine() + } +} diff --git a/deploy/chp-api/static-file-server/cli/execute_test.go b/deploy/chp-api/static-file-server/cli/execute_test.go new file mode 100644 index 0000000..e8ac885 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/execute_test.go @@ -0,0 +1,162 @@ +package cli + +import ( + "errors" + "flag" + "os" + "testing" +) + +func TestSetupFlags(t *testing.T) { + app := os.Args[0] + + file := "file.txt" + wConfig := "Config (file.txt)" + + testCases := []struct { + name string + args []string + config string + help bool + version bool + }{ + {"Empty args", []string{app}, "", false, false}, + {"Help (--help)", []string{app, "--help"}, "", true, false}, + {"Help (-help)", []string{app, "-help"}, "", true, false}, + {"Help (-h)", []string{app, "-h"}, "", true, false}, + {"Version (--version)", []string{app, "--version"}, "", false, true}, + {"Version (-version)", []string{app, "-version"}, "", false, true}, + {"Version (-v)", []string{app, "-v"}, "", false, true}, + {"Config ()", []string{app, "--config", ""}, "", false, false}, + {wConfig, []string{app, "--config", file}, file, false, false}, + {wConfig, []string{app, "--config=file.txt"}, file, false, false}, + {wConfig, []string{app, "-config", file}, file, false, false}, + {wConfig, []string{app, "-config=file.txt"}, file, false, false}, + {wConfig, []string{app, "-c", file}, file, false, false}, + {"All set", []string{app, "-h", "-v", "-c", file}, file, true, true}, + } + + reset := func() { + option.configFile = "" + option.helpFlag = false + option.versionFlag = false + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + reset() + os.Args = tc.args + flag.Parse() + + if option.configFile != tc.config { + t.Errorf( + "For options [%v] expected a config file of %s but got %s", + tc.args, tc.config, option.configFile, + ) + } + if option.helpFlag != tc.help { + t.Errorf( + "For options [%v] expected help flag of %t but got %t", + tc.args, tc.help, option.helpFlag, + ) + } + if option.versionFlag != tc.version { + t.Errorf( + "For options [%v] expected version flag of %t but got %t", + tc.args, tc.version, option.versionFlag, + ) + } + }) + } +} + +func TestExecuteAndSelection(t *testing.T) { + app := os.Args[0] + + runHelpFuncError := errors.New("help") + runHelpFunc = func() error { + return runHelpFuncError + } + runVersionFuncError := errors.New("version") + runVersionFunc = func() error { + return runVersionFuncError + } + runServerFuncError := errors.New("server") + runServerFunc = func() error { + return runServerFuncError + } + unknownArgsFuncError := errors.New("unknown") + unknownArgsFunc = func(Args) func() error { + return func() error { + return unknownArgsFuncError + } + } + + reset := func() { + option.configFile = "" + option.helpFlag = false + option.versionFlag = false + } + + testCases := []struct { + name string + args []string + result error + }{ + {"Help", []string{app, "help"}, runHelpFuncError}, + {"Help", []string{app, "--help"}, runHelpFuncError}, + {"Version", []string{app, "version"}, runVersionFuncError}, + {"Version", []string{app, "--version"}, runVersionFuncError}, + {"Serve", []string{app}, runServerFuncError}, + {"Unknown", []string{app, "unknown"}, unknownArgsFuncError}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + reset() + os.Args = tc.args + + if err := Execute(); tc.result != err { + t.Errorf( + "Expected error for %v but got %v", + tc.result, err, + ) + } + }) + } +} + +func TestUnknownArgs(t *testing.T) { + errFunc := unknownArgs(Args{"unknown"}) + if err := errFunc(); nil == err { + t.Errorf( + "Expected a given unknown argument error but got %v", + err, + ) + } +} + +func TestWithConfig(t *testing.T) { + configError := errors.New("config") + routineError := errors.New("routine") + routine := func() error { return routineError } + + testCases := []struct { + name string + loadConfig func(string) error + result error + }{ + {"Config error", func(string) error { return configError }, configError}, + {"Routine error", func(string) error { return nil }, routineError}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + loadConfig = tc.loadConfig + errFunc := withConfig(routine) + if err := errFunc(); tc.result != err { + t.Errorf("Expected error %v but got %v", tc.result, err) + } + }) + } +} diff --git a/deploy/chp-api/static-file-server/cli/help/help.go b/deploy/chp-api/static-file-server/cli/help/help.go new file mode 100644 index 0000000..9dee86e --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/help/help.go @@ -0,0 +1,190 @@ +package help + +import ( + "fmt" +) + +// Run print operation. +func Run() error { + fmt.Println(Text) + return nil +} + +var ( + // Text for directly accessing help. + Text = ` +NAME + static-file-server + +SYNOPSIS + static-file-server + static-file-server [ -c | -config | --config ] /path/to/config.yml + static-file-server [ help | -help | --help ] + static-file-server [ version | -version | --version ] + +DESCRIPTION + The Static File Server is intended to be a tiny, fast and simple solution + for serving files over HTTP. The features included are limited to make to + binding to a host name and port, selecting a folder to serve, choosing a + URL path prefix and selecting TLS certificates. If you want really awesome + reverse proxy features, I recommend Nginx. + +DEPENDENCIES + None... not even libc! + +ENVIRONMENT VARIABLES + CORS + When set to 'true' it enables resource access from any domain. All + responses will include the headers 'Access-Control-Allow-Origin' and + 'Access-Control-Allow-Headers' with a wildcard value ('*'). + DEBUG + When set to 'true' enables additional logging, including the + configuration used and an access log for each request. IMPORTANT NOTE: + The configuration summary is printed to stdout while logs generated + during execution are printed to stderr. Default value is 'false'. + FOLDER + The path to the folder containing the contents to be served over + HTTP(s). If not supplied, defaults to '/web' (for Docker reasons). + HOST + The hostname used for binding. If not supplied, contents will be served + to a client without regard for the hostname. + PORT + The port used for binding. If not supplied, defaults to port '8080'. + REFERRERS + A comma-separated list of acceped Referrers based on the 'Referer' HTTP + header. If incoming header value is not in the list, a 403 HTTP error is + returned. To accept requests without a 'Referer' HTTP header in addition + to the whitelisted values, include an empty value (either with a leading + comma in the environment variable or with an empty list item in the YAML + configuration file) as demonstrated in the second example. If not + supplied the 'Referer' HTTP header is ignored. + Examples: + REFERRERS='http://localhost,https://some.site,http://other.site:8080' + REFERRERS=',http://localhost,https://some.site,http://other.site:8080' + ALLOW_INDEX + When set to 'true' the index.html file in the folder(not include the + sub folders) will be served. And the file list will not be served. + For example, if the client requests 'http://127.0.0.1/' the 'index.html' + file in the root of the directory being served is returned. Default value + is 'true'. + SHOW_LISTING + Automatically serve the index file for the directory if requested. For + example, if the client requests 'http://127.0.0.1/' the 'index.html' + file in the root of the directory being served is returned. If the value + is set to 'false', the same request will return a 'NOT FOUND'. Default + value is 'true'. + TLS_CERT + Path to the TLS certificate file to serve files using HTTPS. If supplied + then TLS_KEY must also be supplied. If not supplied, contents will be + served via HTTP. + TLS_KEY + Path to the TLS key file to serve files using HTTPS. If supplied then + TLS_CERT must also be supplied. If not supplied, contents will be served + via HTTPS + TLS_MIN_VERS + The minimum TLS version to use. If not supplied, defaults to TLS1.0. + Acceptable values are 'TLS10', 'TLS11', 'TLS12' and 'TLS13' for TLS1.0, + TLS1.1, TLS1.2 and TLS1.3, respectively. Values are not case-sensitive. + URL_PREFIX + The prefix to use in the URL path. If supplied, then the prefix must + start with a forward-slash and NOT end with a forward-slash. If not + supplied then no prefix is used. + +CONFIGURATION FILE + Configuration can also managed used a YAML configuration file. To select the + configuration values using the YAML file, pass in the path to the file using + the appropriate flags (-c, --config). Environment variables take priority + over the configuration file. The following is an example configuration using + the default values. + + Example config.yml with defaults: + ---------------------------------------------------------------------------- + cors: false + debug: false + folder: /web + host: "" + port: 8080 + referrers: [] + show-listing: true + tls-cert: "" + tls-key: "" + tls-min-vers: "" + url-prefix: "" + ---------------------------------------------------------------------------- + + Example config.yml with possible alternative values: + ---------------------------------------------------------------------------- + debug: true + folder: /var/www + port: 80 + referrers: + - http://localhost + - https://mydomain.com + ---------------------------------------------------------------------------- + +USAGE + FILE LAYOUT + /var/www/sub/my.file + /var/www/index.html + + COMMAND + export FOLDER=/var/www/sub + static-file-server + Retrieve with: wget http://localhost:8080/my.file + wget http://my.machine:8080/my.file + + export FOLDER=/var/www + export HOST=my.machine + export PORT=80 + static-file-server + Retrieve with: wget http://my.machine/sub/my.file + + export FOLDER=/var/www + static-file-server -c config.yml + Result: Runs with values from config.yml, but with the folder being + served overridden by the FOLDER environment variable. + + export FOLDER=/var/www/sub + export HOST=my.machine + export PORT=80 + export URL_PREFIX=/my/stuff + static-file-server + Retrieve with: wget http://my.machine/my/stuff/my.file + + export FOLDER=/var/www/sub + export TLS_CERT=/etc/server/my.machine.crt + export TLS_KEY=/etc/server/my.machine.key + static-file-server + Retrieve with: wget https://my.machine:8080/my.file + + export FOLDER=/var/www/sub + export PORT=443 + export TLS_CERT=/etc/server/my.machine.crt + export TLS_KEY=/etc/server/my.machine.key + export TLS_MIN_VERS=TLS12 + static-file-server + Retrieve with: wget https://my.machine/my.file + + export FOLDER=/var/www + export PORT=80 + export ALLOW_INDEX=true # Default behavior + export SHOW_LISTING=true # Default behavior + static-file-server + Retrieve 'index.html' with: wget http://my.machine/ + + export FOLDER=/var/www + export PORT=80 + export ALLOW_INDEX=true # Default behavior + export SHOW_LISTING=false + static-file-server + Retrieve 'index.html' with: wget http://my.machine/ + Returns 'NOT FOUND': wget http://my.machine/dir/ + + export FOLDER=/var/www + export PORT=80 + export ALLOW_INDEX=false + export SHOW_LISTING=false + static-file-server + Returns 'NOT FOUND': wget http://my.machine/ +` +) diff --git a/deploy/chp-api/static-file-server/cli/help/help_test.go b/deploy/chp-api/static-file-server/cli/help/help_test.go new file mode 100644 index 0000000..371673b --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/help/help_test.go @@ -0,0 +1,9 @@ +package help + +import "testing" + +func TestRun(t *testing.T) { + if err := Run(); nil != err { + t.Errorf("While running help got %v", err) + } +} diff --git a/deploy/chp-api/static-file-server/cli/server/server.go b/deploy/chp-api/static-file-server/cli/server/server.go new file mode 100644 index 0000000..a1b008e --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/server/server.go @@ -0,0 +1,96 @@ +package server + +import ( + "fmt" + "net/http" + + "github.com/halverneus/static-file-server/config" + "github.com/halverneus/static-file-server/handle" +) + +var ( + // Values to be overridden to simplify unit testing. + selectHandler = handlerSelector + selectListener = listenerSelector +) + +// Run server. +func Run() error { + if config.Get.Debug { + config.Log() + } + // Choose and set the appropriate, optimized static file serving function. + handler := selectHandler() + + // Serve files over HTTP or HTTPS based on paths to TLS files being + // provided. + listener := selectListener() + + binding := fmt.Sprintf("%s:%d", config.Get.Host, config.Get.Port) + return listener(binding, handler) +} + +// handlerSelector returns the appropriate request handler based on +// configuration. +func handlerSelector() (handler http.HandlerFunc) { + var serveFileHandler handle.FileServerFunc + + serveFileHandler = http.ServeFile + if config.Get.Debug { + serveFileHandler = handle.WithLogging(serveFileHandler) + } + + if 0 != len(config.Get.Referrers) { + serveFileHandler = handle.WithReferrers( + serveFileHandler, config.Get.Referrers, + ) + } + + // Choose and set the appropriate, optimized static file serving function. + if 0 == len(config.Get.URLPrefix) { + handler = handle.Basic(serveFileHandler, config.Get.Folder) + } else { + handler = handle.Prefix( + serveFileHandler, + config.Get.Folder, + config.Get.URLPrefix, + ) + } + + // Determine whether index files should hidden. + if !config.Get.ShowListing { + if config.Get.AllowIndex { + handler = handle.PreventListings(handler, config.Get.Folder, config.Get.URLPrefix) + } else { + handler = handle.IgnoreIndex(handler) + } + } + // If configured, apply wildcard CORS support. + if config.Get.Cors { + handler = handle.AddCorsWildcardHeaders(handler) + } + + // If configured, apply key code access control. + if "" != config.Get.AccessKey { + handler = handle.AddAccessKey(handler, config.Get.AccessKey) + } + + return +} + +// listenerSelector returns the appropriate listener handler based on +// configuration. +func listenerSelector() (listener handle.ListenerFunc) { + // Serve files over HTTP or HTTPS based on paths to TLS files being + // provided. + if 0 < len(config.Get.TLSCert) { + handle.SetMinimumTLSVersion(config.Get.TLSMinVers) + listener = handle.TLSListening( + config.Get.TLSCert, + config.Get.TLSKey, + ) + } else { + listener = handle.Listening() + } + return +} diff --git a/deploy/chp-api/static-file-server/cli/server/server_test.go b/deploy/chp-api/static-file-server/cli/server/server_test.go new file mode 100644 index 0000000..9f78554 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/server/server_test.go @@ -0,0 +1,127 @@ +package server + +import ( + "errors" + "net/http" + "testing" + + "github.com/halverneus/static-file-server/config" + "github.com/halverneus/static-file-server/handle" +) + +func TestRun(t *testing.T) { + listenerError := errors.New("listener") + selectListener = func() handle.ListenerFunc { + return func(string, http.HandlerFunc) error { + return listenerError + } + } + + config.Get.Debug = false + if err := Run(); listenerError != err { + t.Errorf("Without debug expected %v but got %v", listenerError, err) + } + + config.Get.Debug = true + if err := Run(); listenerError != err { + t.Errorf("With debug expected %v but got %v", listenerError, err) + } +} + +func TestHandlerSelector(t *testing.T) { + // This test only exercises function branches. + testFolder := "/web" + testPrefix := "/url/prefix" + var ignoreReferrer []string + testReferrer := []string{"http://localhost"} + testAccessKey := "access-key" + + testCases := []struct { + name string + folder string + prefix string + listing bool + debug bool + refer []string + cors bool + accessKey string + }{ + {"Basic handler w/o debug", testFolder, "", true, false, ignoreReferrer, false, ""}, + {"Prefix handler w/o debug", testFolder, testPrefix, true, false, ignoreReferrer, false, ""}, + {"Basic and hide listing handler w/o debug", testFolder, "", false, false, ignoreReferrer, false, ""}, + {"Prefix and hide listing handler w/o debug", testFolder, testPrefix, false, false, ignoreReferrer, false, ""}, + {"Basic handler w/debug", testFolder, "", true, true, ignoreReferrer, false, ""}, + {"Prefix handler w/debug", testFolder, testPrefix, true, true, ignoreReferrer, false, ""}, + {"Basic and hide listing handler w/debug", testFolder, "", false, true, ignoreReferrer, false, ""}, + {"Prefix and hide listing handler w/debug", testFolder, testPrefix, false, true, ignoreReferrer, false, ""}, + {"Basic handler w/o debug w/refer", testFolder, "", true, false, testReferrer, false, ""}, + {"Prefix handler w/o debug w/refer", testFolder, testPrefix, true, false, testReferrer, false, ""}, + {"Basic and hide listing handler w/o debug w/refer", testFolder, "", false, false, testReferrer, false, ""}, + {"Prefix and hide listing handler w/o debug w/refer", testFolder, testPrefix, false, false, testReferrer, false, ""}, + {"Basic handler w/debug w/refer w/o cors", testFolder, "", true, true, testReferrer, false, ""}, + {"Prefix handler w/debug w/refer w/o cors", testFolder, testPrefix, true, true, testReferrer, false, ""}, + {"Basic and hide listing handler w/debug w/refer w/o cors", testFolder, "", false, true, testReferrer, false, ""}, + {"Prefix and hide listing handler w/debug w/refer w/o cors", testFolder, testPrefix, false, true, testReferrer, false, ""}, + {"Basic handler w/debug w/refer w/cors", testFolder, "", true, true, testReferrer, true, ""}, + {"Prefix handler w/debug w/refer w/cors", testFolder, testPrefix, true, true, testReferrer, true, ""}, + {"Basic and hide listing handler w/debug w/refer w/cors", testFolder, "", false, true, testReferrer, true, ""}, + {"Prefix and hide listing handler w/debug w/refer w/cors", testFolder, testPrefix, false, true, testReferrer, true, ""}, + {"Access Key and Basic handler w/o debug", testFolder, "", true, false, ignoreReferrer, false, testAccessKey}, + {"Access Key and Prefix handler w/o debug", testFolder, testPrefix, true, false, ignoreReferrer, false, testAccessKey}, + {"Access Key and Basic and hide listing handler w/o debug", testFolder, "", false, false, ignoreReferrer, false, testAccessKey}, + {"Access Key and Prefix and hide listing handler w/o debug", testFolder, testPrefix, false, false, ignoreReferrer, false, testAccessKey}, + {"Access Key and Basic handler w/debug", testFolder, "", true, true, ignoreReferrer, false, testAccessKey}, + {"Access Key and Prefix handler w/debug", testFolder, testPrefix, true, true, ignoreReferrer, false, testAccessKey}, + {"Access Key and Basic and hide listing handler w/debug", testFolder, "", false, true, ignoreReferrer, false, testAccessKey}, + {"Access Key and Prefix and hide listing handler w/debug", testFolder, testPrefix, false, true, ignoreReferrer, false, testAccessKey}, + {"Access Key and Basic handler w/o debug w/refer", testFolder, "", true, false, testReferrer, false, testAccessKey}, + {"Access Key and Prefix handler w/o debug w/refer", testFolder, testPrefix, true, false, testReferrer, false, testAccessKey}, + {"Access Key and Basic and hide listing handler w/o debug w/refer", testFolder, "", false, false, testReferrer, false, testAccessKey}, + {"Access Key and Prefix and hide listing handler w/o debug w/refer", testFolder, testPrefix, false, false, testReferrer, false, testAccessKey}, + {"Access Key and Basic handler w/debug w/refer w/o cors", testFolder, "", true, true, testReferrer, false, testAccessKey}, + {"Access Key and Prefix handler w/debug w/refer w/o cors", testFolder, testPrefix, true, true, testReferrer, false, testAccessKey}, + {"Access Key and Basic and hide listing handler w/debug w/refer w/o cors", testFolder, "", false, true, testReferrer, false, testAccessKey}, + {"Access Key and Prefix and hide listing handler w/debug w/refer w/o cors", testFolder, testPrefix, false, true, testReferrer, false, testAccessKey}, + {"Access Key and Basic handler w/debug w/refer w/cors", testFolder, "", true, true, testReferrer, true, testAccessKey}, + {"Access Key and Prefix handler w/debug w/refer w/cors", testFolder, testPrefix, true, true, testReferrer, true, testAccessKey}, + {"Access Key and Basic and hide listing handler w/debug w/refer w/cors", testFolder, "", false, true, testReferrer, true, testAccessKey}, + {"Access Key and Prefix and hide listing handler w/debug w/refer w/cors", testFolder, testPrefix, false, true, testReferrer, true, testAccessKey}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + config.Get.Debug = tc.debug + config.Get.Folder = tc.folder + config.Get.ShowListing = tc.listing + config.Get.URLPrefix = tc.prefix + config.Get.Referrers = tc.refer + config.Get.Cors = tc.cors + config.Get.AccessKey = tc.accessKey + + handlerSelector() + }) + } +} + +func TestListenerSelector(t *testing.T) { + // This test only exercises function branches. + testCert := "file.crt" + testKey := "file.key" + + testCases := []struct { + name string + cert string + key string + }{ + {"HTTP", "", ""}, + {"HTTPS", testCert, testKey}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + config.Get.TLSCert = tc.cert + config.Get.TLSKey = tc.key + listenerSelector() + }) + } +} diff --git a/deploy/chp-api/static-file-server/cli/version/version.go b/deploy/chp-api/static-file-server/cli/version/version.go new file mode 100644 index 0000000..b1956c4 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/version/version.go @@ -0,0 +1,24 @@ +package version + +import ( + "fmt" + "runtime" +) + +// Run print operation. +func Run() error { + fmt.Printf("%s\n%s\n", VersionText, GoVersionText) + return nil +} + +var ( + // version is the application version set during build. + version string + + // VersionText for directly accessing the static-file-server version. + VersionText = fmt.Sprintf("v%s", version) + + // GoVersionText for directly accessing the version of the Go runtime + // compiled with the static-file-server. + GoVersionText = runtime.Version() +) diff --git a/deploy/chp-api/static-file-server/cli/version/version_test.go b/deploy/chp-api/static-file-server/cli/version/version_test.go new file mode 100644 index 0000000..2bc47d5 --- /dev/null +++ b/deploy/chp-api/static-file-server/cli/version/version_test.go @@ -0,0 +1,9 @@ +package version + +import "testing" + +func TestVersion(t *testing.T) { + if err := Run(); nil != err { + t.Errorf("While running version got %v", err) + } +} diff --git a/deploy/chp-api/static-file-server/config/config.go b/deploy/chp-api/static-file-server/config/config.go new file mode 100644 index 0000000..9b3ec59 --- /dev/null +++ b/deploy/chp-api/static-file-server/config/config.go @@ -0,0 +1,301 @@ +package config + +import ( + "crypto/tls" + "errors" + "fmt" + "io/ioutil" + "log" + "os" + "strconv" + "strings" + + yaml "gopkg.in/yaml.v3" +) + +var ( + // Get the desired configuration value. + Get struct { + Cors bool `yaml:"cors"` + Debug bool `yaml:"debug"` + Folder string `yaml:"folder"` + Host string `yaml:"host"` + Port uint16 `yaml:"port"` + AllowIndex bool `yaml:"allow-index"` + ShowListing bool `yaml:"show-listing"` + TLSCert string `yaml:"tls-cert"` + TLSKey string `yaml:"tls-key"` + TLSMinVers uint16 `yaml:"-"` + TLSMinVersStr string `yaml:"tls-min-vers"` + URLPrefix string `yaml:"url-prefix"` + Referrers []string `yaml:"referrers"` + AccessKey string `yaml:"access-key"` + } +) + +const ( + corsKey = "CORS" + debugKey = "DEBUG" + folderKey = "FOLDER" + hostKey = "HOST" + portKey = "PORT" + referrersKey = "REFERRERS" + allowIndexKey = "ALLOW_INDEX" + showListingKey = "SHOW_LISTING" + tlsCertKey = "TLS_CERT" + tlsKeyKey = "TLS_KEY" + tlsMinVersKey = "TLS_MIN_VERS" + urlPrefixKey = "URL_PREFIX" + accessKeyKey = "ACCESS_KEY" +) + +var ( + defaultDebug = false + defaultFolder = "/web" + defaultHost = "" + defaultPort = uint16(8080) + defaultReferrers = []string{} + defaultAllowIndex = true + defaultShowListing = true + defaultTLSCert = "" + defaultTLSKey = "" + defaultTLSMinVers = "" + defaultURLPrefix = "" + defaultCors = false + defaultAccessKey = "" +) + +func init() { + // init calls setDefaults to better support testing. + setDefaults() +} + +func setDefaults() { + Get.Debug = defaultDebug + Get.Folder = defaultFolder + Get.Host = defaultHost + Get.Port = defaultPort + Get.Referrers = defaultReferrers + Get.AllowIndex = defaultAllowIndex + Get.ShowListing = defaultShowListing + Get.TLSCert = defaultTLSCert + Get.TLSKey = defaultTLSKey + Get.TLSMinVersStr = defaultTLSMinVers + Get.URLPrefix = defaultURLPrefix + Get.Cors = defaultCors + Get.AccessKey = defaultAccessKey +} + +// Load the configuration file. +func Load(filename string) (err error) { + // If no filename provided, assign envvars. + if filename == "" { + overrideWithEnvVars() + return validate() + } + + // Read contents from configuration file. + var contents []byte + if contents, err = ioutil.ReadFile(filename); nil != err { + return + } + + // Parse contents into 'Get' configuration. + if err = yaml.Unmarshal(contents, &Get); nil != err { + return + } + + overrideWithEnvVars() + return validate() +} + +// Log the current configuration. +func Log() { + // YAML marshalling should never error, but if it could, the result is that + // the contents of the configuration are not logged. + contents, _ := yaml.Marshal(&Get) + + // Log the configuration. + fmt.Println("Using the following configuration:") + fmt.Println(string(contents)) +} + +// overrideWithEnvVars the default values and the configuration file values. +func overrideWithEnvVars() { + // Assign envvars, if set. + Get.Cors = envAsBool(corsKey, Get.Cors) + Get.Debug = envAsBool(debugKey, Get.Debug) + Get.Folder = envAsStr(folderKey, Get.Folder) + Get.Host = envAsStr(hostKey, Get.Host) + Get.Port = envAsUint16(portKey, Get.Port) + Get.AllowIndex = envAsBool(allowIndexKey, Get.AllowIndex) + Get.ShowListing = envAsBool(showListingKey, Get.ShowListing) + Get.TLSCert = envAsStr(tlsCertKey, Get.TLSCert) + Get.TLSKey = envAsStr(tlsKeyKey, Get.TLSKey) + Get.TLSMinVersStr = envAsStr(tlsMinVersKey, Get.TLSMinVersStr) + Get.URLPrefix = envAsStr(urlPrefixKey, Get.URLPrefix) + Get.Referrers = envAsStrSlice(referrersKey, Get.Referrers) + Get.AccessKey = envAsStr(accessKeyKey, Get.AccessKey) +} + +// validate the configuration. +func validate() error { + // If HTTPS is to be used, verify both TLS_* environment variables are set. + useTLS := false + if 0 < len(Get.TLSCert) || 0 < len(Get.TLSKey) { + if len(Get.TLSCert) == 0 || len(Get.TLSKey) == 0 { + msg := "if value for either 'TLS_CERT' or 'TLS_KEY' is set then " + + "then value for the other must also be set (values are " + + "currently '%s' and '%s', respectively)" + return fmt.Errorf(msg, Get.TLSCert, Get.TLSKey) + } + if _, err := os.Stat(Get.TLSCert); nil != err { + msg := "value of TLS_CERT is set with filename '%s' that returns %v" + return fmt.Errorf(msg, Get.TLSCert, err) + } + if _, err := os.Stat(Get.TLSKey); nil != err { + msg := "value of TLS_KEY is set with filename '%s' that returns %v" + return fmt.Errorf(msg, Get.TLSKey, err) + } + useTLS = true + } + + // Verify TLS_MIN_VERS is only (optionally) set if TLS is to be used. + Get.TLSMinVers = tls.VersionTLS10 + if useTLS { + if 0 < len(Get.TLSMinVersStr) { + var err error + if Get.TLSMinVers, err = tlsMinVersAsUint16( + Get.TLSMinVersStr, + ); nil != err { + return err + } + } + + // For logging minimum TLS version being used while debugging, backfill + // the TLSMinVersStr field. + switch Get.TLSMinVers { + case tls.VersionTLS10: + Get.TLSMinVersStr = "TLS1.0" + case tls.VersionTLS11: + Get.TLSMinVersStr = "TLS1.1" + case tls.VersionTLS12: + Get.TLSMinVersStr = "TLS1.2" + case tls.VersionTLS13: + Get.TLSMinVersStr = "TLS1.3" + } + } else { + if 0 < len(Get.TLSMinVersStr) { + msg := "value for 'TLS_MIN_VERS' is set but 'TLS_CERT' and 'TLS_KEY' are not" + return errors.New(msg) + } + } + + // If the URL path prefix is to be used, verify it is properly formatted. + if 0 < len(Get.URLPrefix) && + (!strings.HasPrefix(Get.URLPrefix, "/") || strings.HasSuffix(Get.URLPrefix, "/")) { + msg := "if value for 'URL_PREFIX' is set then the value must start " + + "with '/' and not end with '/' (current value of '%s' vs valid " + + "example of '/my/prefix'" + return fmt.Errorf(msg, Get.URLPrefix) + } + + return nil +} + +// envAsStr returns the value of the environment variable as a string if set. +func envAsStr(key, fallback string) string { + if value := os.Getenv(key); value != "" { + return value + } + return fallback +} + +// envAsStrSlice returns the value of the environment variable as a slice of +// strings if set. +func envAsStrSlice(key string, fallback []string) []string { + if value := os.Getenv(key); value != "" { + return strings.Split(value, ",") + } + return fallback +} + +// envAsUint16 returns the value of the environment variable as a uint16 if set. +func envAsUint16(key string, fallback uint16) uint16 { + // Retrieve the string value of the environment variable. If not set, + // fallback is used. + valueStr := os.Getenv(key) + if valueStr == "" { + return fallback + } + + // Parse the string into a uint16. + base := 10 + bitSize := 16 + valueAsUint64, err := strconv.ParseUint(valueStr, base, bitSize) + if nil != err { + log.Printf( + "Invalid value for '%s': %v\nUsing fallback: %d", + key, err, fallback, + ) + return fallback + } + return uint16(valueAsUint64) +} + +// envAsBool returns the value for an environment variable or, if not set, a +// fallback value as a boolean. +func envAsBool(key string, fallback bool) bool { + // Retrieve the string value of the environment variable. If not set, + // fallback is used. + valueStr := os.Getenv(key) + if valueStr == "" { + return fallback + } + + // Parse the string into a boolean. + value, err := strAsBool(valueStr) + if nil != err { + log.Printf( + "Invalid value for '%s': %v\nUsing fallback: %t", + key, err, fallback, + ) + return fallback + } + return value +} + +// strAsBool converts the intent of the passed value into a boolean +// representation. +func strAsBool(value string) (result bool, err error) { + lvalue := strings.ToLower(value) + switch lvalue { + case "0", "false", "f", "no", "n": + result = false + case "1", "true", "t", "yes", "y": + result = true + default: + result = false + msg := "unknown conversion from string to bool for value '%s'" + err = fmt.Errorf(msg, value) + } + return +} + +// tlsMinVersAsUint16 converts the intent of the passed value into an +// enumeration for the crypto/tls package. +func tlsMinVersAsUint16(value string) (result uint16, err error) { + switch strings.ToLower(value) { + case "tls10": + result = tls.VersionTLS10 + case "tls11": + result = tls.VersionTLS11 + case "tls12": + result = tls.VersionTLS12 + case "tls13": + result = tls.VersionTLS13 + default: + err = fmt.Errorf("unknown value for TLS_MIN_VERS: %s", value) + } + return +} diff --git a/deploy/chp-api/static-file-server/config/config_test.go b/deploy/chp-api/static-file-server/config/config_test.go new file mode 100644 index 0000000..a4c541b --- /dev/null +++ b/deploy/chp-api/static-file-server/config/config_test.go @@ -0,0 +1,513 @@ +package config + +import ( + "crypto/tls" + "fmt" + "io/ioutil" + "os" + "strconv" + "testing" + + yaml "gopkg.in/yaml.v3" +) + +func TestLoad(t *testing.T) { + // Verify envvars are set. + testFolder := "/my/directory" + os.Setenv(folderKey, testFolder) + if err := Load(""); nil != err { + t.Errorf( + "While loading an empty file name expected no error but got %v", + err, + ) + } + if Get.Folder != testFolder { + t.Errorf( + "While loading an empty file name expected folder %s but got %s", + testFolder, Get.Folder, + ) + } + + // Verify error if file doesn't exist. + if err := Load("/this/file/should/never/exist"); nil == err { + t.Error("While loading non-existing file expected error but got nil") + } + + // Verify bad YAML returns an error. + func(t *testing.T) { + filename := "testing.tmp" + contents := []byte("{") + defer os.Remove(filename) + + if err := ioutil.WriteFile(filename, contents, 0666); nil != err { + t.Errorf("Failed to save bad YAML file with: %v\n", err) + } + if err := Load(filename); nil == err { + t.Error("While loading bad YAML expected error but got nil") + } + }(t) + + // Verify good YAML returns no error and sets value. + func(t *testing.T) { + filename := "testing.tmp" + testFolder := "/test/folder" + contents := []byte(fmt.Sprintf( + `{"folder": "%s"}`, testFolder, + )) + defer os.Remove(filename) + + if err := ioutil.WriteFile(filename, contents, 0666); nil != err { + t.Errorf("Failed to save good YAML file with: %v\n", err) + } + if err := Load(filename); nil != err { + t.Errorf( + "While loading good YAML expected nil but got %v", + err, + ) + } + }(t) +} + +func TestLog(t *testing.T) { + // Test whether YAML marshalling works, as that is the only error case. + if _, err := yaml.Marshal(&Get); nil != err { + t.Errorf("While testing YAML marshalling for config Log() got %v", err) + } + Log() +} + +func TestOverrideWithEnvvars(t *testing.T) { + // Choose values that are different than defaults. + testDebug := true + testFolder := "/my/directory" + testHost := "apets.life" + testPort := uint16(666) + testAllowIndex := false + testShowListing := false + testTLSCert := "my.pem" + testTLSKey := "my.key" + testURLPrefix := "/url/prefix" + + // Set all environment variables with test values. + os.Setenv(debugKey, fmt.Sprintf("%t", testDebug)) + os.Setenv(folderKey, testFolder) + os.Setenv(hostKey, testHost) + os.Setenv(portKey, strconv.Itoa(int(testPort))) + os.Setenv(allowIndexKey, fmt.Sprintf("%t", testAllowIndex)) + os.Setenv(showListingKey, fmt.Sprintf("%t", testShowListing)) + os.Setenv(tlsCertKey, testTLSCert) + os.Setenv(tlsKeyKey, testTLSKey) + os.Setenv(urlPrefixKey, testURLPrefix) + + // Verification functions. + equalStrings := func(t *testing.T, name, key, expected, result string) { + if expected != result { + t.Errorf( + "While checking %s for '%s' expected '%s' but got '%s'", + name, key, expected, result, + ) + } + } + equalUint16 := func(t *testing.T, name, key string, expected, result uint16) { + if expected != result { + t.Errorf( + "While checking %s for '%s' expected %d but got %d", + name, key, expected, result, + ) + } + } + equalBool := func(t *testing.T, name, key string, expected, result bool) { + if expected != result { + t.Errorf( + "While checking %s for '%s' expected %t but got %t", + name, key, expected, result, + ) + } + } + + // Verify defaults. + setDefaults() + phase := "defaults" + equalBool(t, phase, debugKey, defaultDebug, Get.Debug) + equalStrings(t, phase, folderKey, defaultFolder, Get.Folder) + equalStrings(t, phase, hostKey, defaultHost, Get.Host) + equalUint16(t, phase, portKey, defaultPort, Get.Port) + equalBool(t, phase, showListingKey, defaultShowListing, Get.ShowListing) + equalStrings(t, phase, tlsCertKey, defaultTLSCert, Get.TLSCert) + equalStrings(t, phase, tlsKeyKey, defaultTLSKey, Get.TLSKey) + equalStrings(t, phase, urlPrefixKey, defaultURLPrefix, Get.URLPrefix) + + // Apply overrides. + overrideWithEnvVars() + + // Verify overrides. + phase = "overrides" + equalBool(t, phase, debugKey, testDebug, Get.Debug) + equalStrings(t, phase, folderKey, testFolder, Get.Folder) + equalStrings(t, phase, hostKey, testHost, Get.Host) + equalUint16(t, phase, portKey, testPort, Get.Port) + equalBool(t, phase, showListingKey, testShowListing, Get.ShowListing) + equalStrings(t, phase, tlsCertKey, testTLSCert, Get.TLSCert) + equalStrings(t, phase, tlsKeyKey, testTLSKey, Get.TLSKey) + equalStrings(t, phase, urlPrefixKey, testURLPrefix, Get.URLPrefix) +} + +func TestValidate(t *testing.T) { + validPath := "config.go" + invalidPath := "should/never/exist.txt" + empty := "" + prefix := "/my/prefix" + + testCases := []struct { + name string + cert string + key string + prefix string + minTLS string + isError bool + }{ + {"Valid paths w/prefix", validPath, validPath, prefix, "", false}, + {"Valid paths wo/prefix", validPath, validPath, empty, "", false}, + {"Empty paths w/prefix", empty, empty, prefix, "", false}, + {"Empty paths wo/prefix", empty, empty, empty, "", false}, + {"Mixed paths w/prefix", empty, validPath, prefix, "", true}, + {"Alt mixed paths w/prefix", validPath, empty, prefix, "", true}, + {"Mixed paths wo/prefix", empty, validPath, empty, "", true}, + {"Alt mixed paths wo/prefix", validPath, empty, empty, "", true}, + {"Invalid cert w/prefix", invalidPath, validPath, prefix, "", true}, + {"Invalid key w/prefix", validPath, invalidPath, prefix, "", true}, + {"Invalid cert & key w/prefix", invalidPath, invalidPath, prefix, "", true}, + {"Prefix missing leading /", empty, empty, "my/prefix", "", true}, + {"Prefix with trailing /", empty, empty, "/my/prefix/", "", true}, + {"Valid paths w/min ok TLS", validPath, validPath, prefix, "tls11", false}, + {"Valid paths w/min ok TLS", validPath, validPath, prefix, "tls12", false}, + {"Valid paths w/min ok TLS", validPath, validPath, prefix, "tls13", false}, + {"Valid paths w/min bad TLS", validPath, validPath, prefix, "bad", true}, + {"Empty paths w/min ok TLS", empty, empty, prefix, "tls11", true}, + {"Empty paths w/min bad TLS", empty, empty, prefix, "bad", true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + Get.TLSCert = tc.cert + Get.TLSKey = tc.key + Get.TLSMinVersStr = tc.minTLS + Get.URLPrefix = tc.prefix + err := validate() + hasError := nil != err + if hasError && !tc.isError { + t.Errorf("Expected no error but got %v", err) + } + if !hasError && tc.isError { + t.Error("Expected an error but got no error") + } + }) + } +} + +func TestEnvAsStr(t *testing.T) { + sv := "STRING_VALUE" + fv := "FLOAT_VALUE" + iv := "INT_VALUE" + bv := "BOOL_VALUE" + ev := "EMPTY_VALUE" + uv := "UNSET_VALUE" + + sr := "String Cheese" // String result + fr := "123.456" // Float result + ir := "-123" // Int result + br := "true" // Bool result + er := "" // Empty result + fbr := "fallback result" // Fallback result + efbr := "" // Empty fallback result + + os.Setenv(sv, sr) + os.Setenv(fv, fr) + os.Setenv(iv, ir) + os.Setenv(bv, br) + os.Setenv(ev, er) + + testCases := []struct { + name string + key string + fallback string + result string + }{ + {"Good string", sv, fbr, sr}, + {"Float string", fv, fbr, fr}, + {"Int string", iv, fbr, ir}, + {"Bool string", bv, fbr, br}, + {"Empty string", ev, fbr, fbr}, + {"Unset", uv, fbr, fbr}, + {"Good string with empty fallback", sv, efbr, sr}, + {"Unset with empty fallback", uv, efbr, efbr}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := envAsStr(tc.key, tc.fallback) + if tc.result != result { + t.Errorf( + "For %s with a '%s' fallback expected '%s' but got '%s'", + tc.key, tc.fallback, tc.result, result, + ) + } + }) + } +} + +func TestEnvAsStrSlice(t *testing.T) { + oe := "ONE_ENTRY" + oewc := "ONE_ENTRY_WITH_COMMA" + oewtc := "ONE_ENTRY_WITH_TRAILING_COMMA" + te := "TWO_ENTRY" + tewc := "TWO_ENTRY_WITH_COMMA" + oc := "ONLY_COMMA" + ev := "EMPTY_VALUE" + uv := "UNSET_VALUE" + + fs := "http://my.site" + ts := "http://other.site" + fbr := []string{"one", "two"} + var efbr []string + + oes := fs + oer := []string{fs} + oewcs := "," + fs + oewcr := []string{"", fs} + oewtcs := fs + "," + oewtcr := []string{fs, ""} + tes := fs + "," + ts + ter := []string{fs, ts} + tewcs := "," + fs + "," + ts + tewcr := []string{"", fs, ts} + ocs := "," + ocr := []string{"", ""} + evs := "" + + os.Setenv(oe, oes) + os.Setenv(oewc, oewcs) + os.Setenv(oewtc, oewtcs) + os.Setenv(te, tes) + os.Setenv(tewc, tewcs) + os.Setenv(oc, ocs) + os.Setenv(ev, evs) + + testCases := []struct { + name string + key string + fallback []string + result []string + }{ + {"One entry", oe, fbr, oer}, + {"One entry w/comma", oewc, fbr, oewcr}, + {"One entry w/trailing comma", oewtc, fbr, oewtcr}, + {"Two entry", te, fbr, ter}, + {"Two entry w/comma", tewc, fbr, tewcr}, + {"Only comma", oc, fbr, ocr}, + {"Empty value w/fallback", ev, fbr, fbr}, + {"Empty value wo/fallback", ev, efbr, efbr}, + {"Unset w/fallback", uv, fbr, fbr}, + {"Unset wo/fallback", uv, efbr, efbr}, + } + + matches := func(a, b []string) bool { + if len(a) != len(b) { + return false + } + tally := make(map[int]bool) + for i := range a { + tally[i] = false + } + for _, val := range a { + for i, other := range b { + if other == val && !tally[i] { + tally[i] = true + break + } + } + } + for _, found := range tally { + if !found { + return false + } + } + return true + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := envAsStrSlice(tc.key, tc.fallback) + if !matches(tc.result, result) { + t.Errorf( + "For %s with a '%v' fallback expected '%v' but got '%v'", + tc.key, tc.fallback, tc.result, result, + ) + } + }) + } +} + +func TestEnvAsUint16(t *testing.T) { + ubv := "UPPER_BOUNDS_VALUE" + lbv := "LOWER_BOUNDS_VALUE" + hv := "HIGH_VALUE" + lv := "LOW_VALUE" + bv := "BOOL_VALUE" + sv := "STRING_VALUE" + uv := "UNSET_VALUE" + + fbr := uint16(666) // Fallback result + ubr := uint16(65535) // Upper bounds result + lbr := uint16(0) // Lower bounds result + + os.Setenv(ubv, "65535") + os.Setenv(lbv, "0") + os.Setenv(hv, "65536") + os.Setenv(lv, "-1") + os.Setenv(bv, "true") + os.Setenv(sv, "Cheese") + + testCases := []struct { + name string + key string + fallback uint16 + result uint16 + }{ + {"Upper bounds", ubv, fbr, ubr}, + {"Lower bounds", lbv, fbr, lbr}, + {"Out-of-bounds high", hv, fbr, fbr}, + {"Out-of-bounds low", lv, fbr, fbr}, + {"Boolean", bv, fbr, fbr}, + {"String", sv, fbr, fbr}, + {"Unset", uv, fbr, fbr}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := envAsUint16(tc.key, tc.fallback) + if tc.result != result { + t.Errorf( + "For %s with a %d fallback expected %d but got %d", + tc.key, tc.fallback, tc.result, result, + ) + } + }) + } +} + +func TestEnvAsBool(t *testing.T) { + tv := "TRUE_VALUE" + fv := "FALSE_VALUE" + bv := "BAD_VALUE" + uv := "UNSET_VALUE" + + os.Setenv(tv, "True") + os.Setenv(fv, "NO") + os.Setenv(bv, "BAD") + + testCases := []struct { + name string + key string + fallback bool + result bool + }{ + {"True with true fallback", tv, true, true}, + {"True with false fallback", tv, false, true}, + {"False with true fallback", fv, true, false}, + {"False with false fallback", fv, false, false}, + {"Bad with true fallback", bv, true, true}, + {"Bad with false fallback", bv, false, false}, + {"Unset with true fallback", uv, true, true}, + {"Unset with false fallback", uv, false, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := envAsBool(tc.key, tc.fallback) + if tc.result != result { + t.Errorf( + "For %s with a %t fallback expected %t but got %t", + tc.key, tc.fallback, tc.result, result, + ) + } + }) + } +} + +func TestStrAsBool(t *testing.T) { + testCases := []struct { + name string + value string + result bool + isError bool + }{ + {"Empty value", "", false, true}, + {"False value", "0", false, false}, + {"True value", "1", true, false}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := strAsBool(tc.value) + if result != tc.result { + t.Errorf( + "Expected %t for %s but got %t", + tc.result, tc.value, result, + ) + } + if tc.isError && nil == err { + t.Errorf( + "Expected error for %s but got no error", + tc.value, + ) + } + if !tc.isError && nil != err { + t.Errorf( + "Expected no error for %s but got %v", + tc.value, err, + ) + } + }) + + } +} + +func TestTlsMinVersAsUint16(t *testing.T) { + testCases := []struct { + name string + value string + result uint16 + isError bool + }{ + {"Empty value", "", 0, true}, + {"Valid TLS1.0", "TLS10", tls.VersionTLS10, false}, + {"Valid TLS1.1", "tls11", tls.VersionTLS11, false}, + {"Valid TLS1.2", "tls12", tls.VersionTLS12, false}, + {"Valid TLS1.3", "tLS13", tls.VersionTLS13, false}, + {"Invalid TLS1.4", "tls14", 0, true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := tlsMinVersAsUint16(tc.value) + if result != tc.result { + t.Errorf( + "Expected %d for %s but got %d", + tc.result, tc.value, result, + ) + } + if tc.isError && nil == err { + t.Errorf( + "Expected error for %s but got no error", + tc.value, + ) + } else if !tc.isError && nil != err { + t.Errorf( + "Expected no error for %s but got %v", + tc.value, err, + ) + } + }) + } +} diff --git a/deploy/chp-api/static-file-server/go.mod b/deploy/chp-api/static-file-server/go.mod new file mode 100644 index 0000000..0225e99 --- /dev/null +++ b/deploy/chp-api/static-file-server/go.mod @@ -0,0 +1,5 @@ +module github.com/halverneus/static-file-server + +go 1.18 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/deploy/chp-api/static-file-server/go.sum b/deploy/chp-api/static-file-server/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/deploy/chp-api/static-file-server/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/deploy/chp-api/static-file-server/handle/handle.go b/deploy/chp-api/static-file-server/handle/handle.go new file mode 100644 index 0000000..6e85835 --- /dev/null +++ b/deploy/chp-api/static-file-server/handle/handle.go @@ -0,0 +1,252 @@ +package handle + +import ( + "crypto/md5" + "crypto/tls" + "fmt" + "log" + "net/http" + "os" + "path" + "strings" +) + +var ( + // These assignments are for unit testing. + listenAndServe = http.ListenAndServe + listenAndServeTLS = defaultListenAndServeTLS + setHandler = http.HandleFunc +) + +var ( + // Server options to be set prior to calling the listening function. + // minTLSVersion is the minimum allowed TLS version to be used by the + // server. + minTLSVersion uint16 = tls.VersionTLS10 +) + +// defaultListenAndServeTLS is the default implementation of the listening +// function for serving with TLS enabled. This is, effectively, a copy from +// the standard library but with the ability to set the minimum TLS version. +func defaultListenAndServeTLS( + binding, certFile, keyFile string, handler http.Handler, +) error { + if handler == nil { + handler = http.DefaultServeMux + } + server := &http.Server{ + Addr: binding, + Handler: handler, + TLSConfig: &tls.Config{ + MinVersion: minTLSVersion, + }, + } + return server.ListenAndServeTLS(certFile, keyFile) +} + +// SetMinimumTLSVersion to be used by the server. +func SetMinimumTLSVersion(version uint16) { + if version < tls.VersionTLS10 { + version = tls.VersionTLS10 + } else if version > tls.VersionTLS13 { + version = tls.VersionTLS13 + } + minTLSVersion = version +} + +// ListenerFunc accepts the {hostname:port} binding string required by HTTP +// listeners and the handler (router) function and returns any errors that +// occur. +type ListenerFunc func(string, http.HandlerFunc) error + +// FileServerFunc is used to serve the file from the local file system to the +// requesting client. +type FileServerFunc func(http.ResponseWriter, *http.Request, string) + +// WithReferrers returns a function that evaluates the HTTP 'Referer' header +// value and returns HTTP error 403 if the value is not found in the whitelist. +// If one of the whitelisted referrers are an empty string, then it is allowed +// for the 'Referer' HTTP header key to not be set. +func WithReferrers(serveFile FileServerFunc, referrers []string) FileServerFunc { + return func(w http.ResponseWriter, r *http.Request, name string) { + if !validReferrer(referrers, r.Referer()) { + http.Error( + w, + fmt.Sprintf("Invalid source '%s'", r.Referer()), + http.StatusForbidden, + ) + return + } + serveFile(w, r, name) + } +} + +// WithLogging returns a function that logs information about the request prior +// to serving the requested file. +func WithLogging(serveFile FileServerFunc) FileServerFunc { + return func(w http.ResponseWriter, r *http.Request, name string) { + referer := r.Referer() + if len(referer) == 0 { + log.Printf( + "REQ from '%s': %s %s %s%s -> %s\n", + r.RemoteAddr, + r.Method, + r.Proto, + r.Host, + r.URL.Path, + name, + ) + } else { + log.Printf( + "REQ from '%s' (REFERER: '%s'): %s %s %s%s -> %s\n", + r.RemoteAddr, + referer, + r.Method, + r.Proto, + r.Host, + r.URL.Path, + name, + ) + } + serveFile(w, r, name) + } +} + +// Basic file handler servers files from the passed folder. +func Basic(serveFile FileServerFunc, folder string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + serveFile(w, r, folder+r.URL.Path) + } +} + +// Prefix file handler is an alternative to Basic where a URL prefix is removed +// prior to serving a file (http://my.machine/prefix/file.txt will serve +// file.txt from the root of the folder being served (ignoring 'prefix')). +func Prefix(serveFile FileServerFunc, folder, urlPrefix string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !strings.HasPrefix(r.URL.Path, urlPrefix) { + http.NotFound(w, r) + return + } + serveFile(w, r, folder+strings.TrimPrefix(r.URL.Path, urlPrefix)) + } +} + +// PreventListings returns a function that prevents listing of directories but +// still allows index.html to be served. +func PreventListings(serve http.HandlerFunc, folder string, urlPrefix string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/") { + // If the directory does not contain an index.html file, then + // return 'NOT FOUND' to prevent listing of the directory. + stat, err := os.Stat(path.Join(folder, strings.TrimPrefix(r.URL.Path, urlPrefix), "index.html")) + if err != nil || (err == nil && !stat.Mode().IsRegular()) { + http.NotFound(w, r) + return + } + } + serve(w, r) + } +} + +// IgnoreIndex wraps an HTTP request. In the event of a folder root request, +// this function will automatically return 'NOT FOUND' as opposed to default +// behavior where the index file for that directory is retrieved. +func IgnoreIndex(serve http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/") { + http.NotFound(w, r) + return + } + serve(w, r) + } +} + +// AddCorsWildcardHeaders wraps an HTTP request to notify client browsers that +// resources should be allowed to be retrieved by any other domain. +func AddCorsWildcardHeaders(serve http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Headers", "*") + serve(w, r) + } +} + +// AddAccessKey provides Access Control through url parameters. The access key +// is set by ACCESS_KEY. md5sum is computed by queried path + access key +// (e.g. "/my/file" + ACCESS_KEY) +func AddAccessKey(serve http.HandlerFunc, accessKey string) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Get key or md5sum from this access. + keys, keyOk := r.URL.Query()["key"] + var code string + if !keyOk || len(keys[0]) < 1 { + // In case a code is provided + codes, codeOk := r.URL.Query()["code"] + if !codeOk || len(codes[0]) < 1 { + http.NotFound(w, r) + return + } + code = strings.ToUpper(codes[0]) + } else { + // In case a key is provided, convert to code. + data := []byte(r.URL.Path + keys[0]) + hash := md5.Sum(data) + code = fmt.Sprintf("%X", hash) + } + + // Compute the correct md5sum of this access. + localData := []byte(r.URL.Path + accessKey) + hash := md5.Sum(localData) + localCode := fmt.Sprintf("%X", hash) + + // Compare the two. + if code != localCode { + http.NotFound(w, r) + return + } + serve(w, r) + } +} + +// Listening function for serving the handler function. +func Listening() ListenerFunc { + return func(binding string, handler http.HandlerFunc) error { + setHandler("/", handler) + return listenAndServe(binding, nil) + } +} + +// TLSListening function for serving the handler function with encryption. +func TLSListening(tlsCert, tlsKey string) ListenerFunc { + return func(binding string, handler http.HandlerFunc) error { + setHandler("/", handler) + return listenAndServeTLS(binding, tlsCert, tlsKey, nil) + } +} + +// validReferrer returns true if the passed referrer can be resolved by the +// passed list of referrers. +func validReferrer(s []string, e string) bool { + // Whitelisted referer list is empty. All requests are allowed. + if len(s) == 0 { + return true + } + + for _, a := range s { + // Handle blank HTTP Referer header, if configured + if a == "" { + if e == "" { + return true + } + // Continue loop (all strings start with "") + continue + } + + // Compare header with allowed prefixes + if strings.HasPrefix(e, a) { + return true + } + } + return false +} diff --git a/deploy/chp-api/static-file-server/handle/handle_test.go b/deploy/chp-api/static-file-server/handle/handle_test.go new file mode 100644 index 0000000..ef6158f --- /dev/null +++ b/deploy/chp-api/static-file-server/handle/handle_test.go @@ -0,0 +1,703 @@ +package handle + +import ( + "crypto/md5" + "crypto/tls" + "errors" + "fmt" + "io/ioutil" + "log" + "net/http" + "net/http/httptest" + "os" + "path" + "testing" +) + +var ( + baseDir = "tmp/" + subDir = "sub/" + subDeepDir = "sub/deep/" + tmpIndexName = "index.html" + tmpFileName = "file.txt" + tmpBadName = "bad.txt" + tmpSubIndexName = "sub/index.html" + tmpSubFileName = "sub/file.txt" + tmpSubBadName = "sub/bad.txt" + tmpSubDeepIndexName = "sub/deep/index.html" + tmpSubDeepFileName = "sub/deep/file.txt" + tmpSubDeepBadName = "sub/deep/bad.txt" + tmpNoIndexDir = "noindex/" + tmpNoIndexName = "noindex/noindex.txt" + + tmpIndex = "Space: the final frontier" + tmpFile = "These are the voyages of the starship Enterprise." + tmpSubIndex = "Its continuing mission:" + tmpSubFile = "To explore strange new worlds" + tmpSubDeepIndex = "To seek out new life and new civilizations" + tmpSubDeepFile = "To boldly go where no one has gone before" + + nothing = "" + ok = http.StatusOK + missing = http.StatusNotFound + redirect = http.StatusMovedPermanently + notFound = "404 page not found\n" + + files = map[string]string{ + baseDir + tmpIndexName: tmpIndex, + baseDir + tmpFileName: tmpFile, + baseDir + tmpSubIndexName: tmpSubIndex, + baseDir + tmpSubFileName: tmpSubFile, + baseDir + tmpSubDeepIndexName: tmpSubDeepIndex, + baseDir + tmpSubDeepFileName: tmpSubDeepFile, + baseDir + tmpNoIndexName: tmpSubDeepFile, + } + + serveFileFuncs = []FileServerFunc{ + http.ServeFile, + WithLogging(http.ServeFile), + } +) + +func TestMain(m *testing.M) { + code := func(m *testing.M) int { + if err := setup(); nil != err { + log.Fatalf("While setting up test got: %v\n", err) + } + defer teardown() + return m.Run() + }(m) + os.Exit(code) +} + +func setup() (err error) { + for filename, contents := range files { + if err = os.MkdirAll(path.Dir(filename), 0700); nil != err { + return + } + if err = ioutil.WriteFile( + filename, + []byte(contents), + 0600, + ); nil != err { + return + } + } + return +} + +func teardown() (err error) { + return os.RemoveAll("tmp") +} + +func TestSetMinimumTLSVersion(t *testing.T) { + testCases := []struct { + name string + value uint16 + expected uint16 + }{ + {"Too low", tls.VersionTLS10 - 1, tls.VersionTLS10}, + {"Lower bounds", tls.VersionTLS10, tls.VersionTLS10}, + {"Upper bounds", tls.VersionTLS13, tls.VersionTLS13}, + {"Too high", tls.VersionTLS13 + 1, tls.VersionTLS13}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + SetMinimumTLSVersion(tc.value) + if tc.expected != minTLSVersion { + t.Errorf("Expected %d but got %d", tc.expected, minTLSVersion) + } + }) + } +} + +func TestWithReferrers(t *testing.T) { + forbidden := http.StatusForbidden + + ok1 := "http://valid.com" + ok2 := "https://valid.com" + ok3 := "http://localhost" + bad := "http://other.pl" + + var noRefer []string + emptyRefer := []string{} + onlyNoRefer := []string{""} + refer := []string{ok1, ok2, ok3} + noWithRefer := []string{"", ok1, ok2, ok3} + + testCases := []struct { + name string + refers []string + refer string + code int + }{ + {"Nil refer list", noRefer, bad, ok}, + {"Empty refer list", emptyRefer, bad, ok}, + {"Unassigned allowed & unassigned", onlyNoRefer, "", ok}, + {"Unassigned allowed & assigned", onlyNoRefer, bad, forbidden}, + {"Whitelist with unassigned", refer, "", forbidden}, + {"Whitelist with bad", refer, bad, forbidden}, + {"Whitelist with ok1", refer, ok1, ok}, + {"Whitelist with ok2", refer, ok2, ok}, + {"Whitelist with ok3", refer, ok3, ok}, + {"Whitelist and none with unassigned", noWithRefer, "", ok}, + {"Whitelist with bad", noWithRefer, bad, forbidden}, + {"Whitelist with ok1", noWithRefer, ok1, ok}, + {"Whitelist with ok2", noWithRefer, ok2, ok}, + {"Whitelist with ok3", noWithRefer, ok3, ok}, + } + + success := func(w http.ResponseWriter, r *http.Request, name string) { + defer r.Body.Close() + w.WriteHeader(ok) + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + handler := WithReferrers(success, tc.refers) + + fullpath := "http://localhost/" + tmpIndexName + req := httptest.NewRequest("GET", fullpath, nil) + req.Header.Add("Referer", tc.refer) + w := httptest.NewRecorder() + + handler(w, req, "") + + resp := w.Result() + _, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + if tc.code != resp.StatusCode { + t.Errorf( + "With referer '%s' in '%v' expected status code %d but got %d", + tc.refer, tc.refers, tc.code, resp.StatusCode, + ) + } + }) + } +} + +func TestBasicWithAndWithoutLogging(t *testing.T) { + referer := "http://localhost" + noReferer := "" + testCases := []struct { + name string + path string + code int + refer string + contents string + }{ + {"Good base dir", "", ok, referer, tmpIndex}, + {"Good base index", tmpIndexName, redirect, referer, nothing}, + {"Good base file", tmpFileName, ok, referer, tmpFile}, + {"Bad base file", tmpBadName, missing, referer, notFound}, + {"Good subdir dir", subDir, ok, referer, tmpSubIndex}, + {"Good subdir index", tmpSubIndexName, redirect, referer, nothing}, + {"Good subdir file", tmpSubFileName, ok, referer, tmpSubFile}, + {"Good base dir", "", ok, noReferer, tmpIndex}, + {"Good base index", tmpIndexName, redirect, noReferer, nothing}, + {"Good base file", tmpFileName, ok, noReferer, tmpFile}, + {"Bad base file", tmpBadName, missing, noReferer, notFound}, + {"Good subdir dir", subDir, ok, noReferer, tmpSubIndex}, + {"Good subdir index", tmpSubIndexName, redirect, noReferer, nothing}, + {"Good subdir file", tmpSubFileName, ok, noReferer, tmpSubFile}, + } + + for _, serveFile := range serveFileFuncs { + handler := Basic(serveFile, baseDir) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fullpath := "http://localhost/" + tc.path + req := httptest.NewRequest("GET", fullpath, nil) + req.Header.Add("Referer", tc.refer) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if tc.code != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, tc.code, resp.StatusCode, + ) + } + if tc.contents != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tc.contents, contents, + ) + } + }) + } + } +} + +func TestPrefix(t *testing.T) { + prefix := "/my/prefix/path/" + + testCases := []struct { + name string + path string + code int + contents string + }{ + {"Good base dir", prefix, ok, tmpIndex}, + {"Good base index", prefix + tmpIndexName, redirect, nothing}, + {"Good base file", prefix + tmpFileName, ok, tmpFile}, + {"Bad base file", prefix + tmpBadName, missing, notFound}, + {"Good subdir dir", prefix + subDir, ok, tmpSubIndex}, + {"Good subdir index", prefix + tmpSubIndexName, redirect, nothing}, + {"Good subdir file", prefix + tmpSubFileName, ok, tmpSubFile}, + {"Unknown prefix", tmpFileName, missing, notFound}, + } + + for _, serveFile := range serveFileFuncs { + handler := Prefix(serveFile, baseDir, prefix) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fullpath := "http://localhost" + tc.path + req := httptest.NewRequest("GET", fullpath, nil) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if tc.code != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, tc.code, resp.StatusCode, + ) + } + if tc.contents != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tc.contents, contents, + ) + } + }) + } + } +} + +func TestIgnoreIndex(t *testing.T) { + testCases := []struct { + name string + path string + code int + contents string + }{ + {"Good base dir", "", missing, notFound}, + {"Good base index", tmpIndexName, redirect, nothing}, + {"Good base file", tmpFileName, ok, tmpFile}, + {"Bad base file", tmpBadName, missing, notFound}, + {"Good subdir dir", subDir, missing, notFound}, + {"Good subdir index", tmpSubIndexName, redirect, nothing}, + {"Good subdir file", tmpSubFileName, ok, tmpSubFile}, + } + + for _, serveFile := range serveFileFuncs { + handler := IgnoreIndex(Basic(serveFile, baseDir)) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fullpath := "http://localhost/" + tc.path + req := httptest.NewRequest("GET", fullpath, nil) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if tc.code != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, tc.code, resp.StatusCode, + ) + } + if tc.contents != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tc.contents, contents, + ) + } + }) + } + } +} + +func TestPreventListings(t *testing.T) { + testCases := []struct { + name string + path string + code int + contents string + }{ + {"Good base dir", "", ok, tmpIndex}, + {"Good base index", tmpIndexName, redirect, nothing}, + {"Good base file", tmpFileName, ok, tmpFile}, + {"Bad base file", tmpBadName, missing, notFound}, + {"Good subdir dir", subDir, ok, tmpSubIndex}, + {"Good subdir index", tmpSubIndexName, redirect, nothing}, + {"Good subdir file", tmpSubFileName, ok, tmpSubFile}, + {"Dir without index", tmpNoIndexDir, missing, notFound}, + } + + for _, serveFile := range serveFileFuncs { + handler := PreventListings(Basic(serveFile, baseDir), baseDir, "") + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fullpath := "http://localhost/" + tc.path + req := httptest.NewRequest("GET", fullpath, nil) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if tc.code != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, tc.code, resp.StatusCode, + ) + } + if tc.contents != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tc.contents, contents, + ) + } + }) + } + } +} + +func TestAddAccessKey(t *testing.T) { + // Prepare testing data. + accessKey := "my-access-key" + + code := func(path, key string) string { + data := []byte("/" + path + key) + fmt.Printf("TEST: '%s'\n", data) + return fmt.Sprintf("%X", md5.Sum(data)) + } + + // Define test cases. + testCases := []struct { + name string + path string + key string + value string + code int + contents string + }{ + { + "Good base file with code", tmpFileName, + "code", code(tmpFileName, accessKey), + ok, tmpFile, + }, + { + "Good base file with key", tmpFileName, + "key", accessKey, + ok, tmpFile, + }, + { + "Bad base file with code", tmpBadName, + "code", code(tmpBadName, accessKey), + missing, notFound, + }, + { + "Bad base file with key", tmpBadName, + "key", accessKey, + missing, notFound, + }, + { + "Good base file with no code or key", tmpFileName, + "my", "value", + missing, notFound, + }, + { + "Good base file with bad code", tmpFileName, + "code", code(tmpFileName, "bad-access-key"), + missing, notFound, + }, + { + "Good base file with bad key", tmpFileName, + "key", "bad-access-key", + missing, notFound, + }, + } + + for _, serveFile := range serveFileFuncs { + handler := AddAccessKey(Basic(serveFile, baseDir), accessKey) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fullpath := fmt.Sprintf( + "http://localhost/%s?%s=%s", + tc.path, tc.key, tc.value, + ) + req := httptest.NewRequest("GET", fullpath, nil) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if tc.code != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, tc.code, resp.StatusCode, + ) + } + if tc.contents != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tc.contents, contents, + ) + } + }) + } + } +} + +func TestListening(t *testing.T) { + // Choose values for testing. + called := false + testBinding := "host:port" + testError := errors.New("random problem") + + // Create an empty placeholder router function. + handler := func(http.ResponseWriter, *http.Request) {} + + // Override setHandler so that multiple calls to 'http.HandleFunc' doesn't + // panic. + setHandler = func(string, func(http.ResponseWriter, *http.Request)) {} + + // Override listenAndServe with a function with more introspection and + // control than 'http.ListenAndServe'. + listenAndServe = func( + binding string, handler http.Handler, + ) error { + if testBinding != binding { + t.Errorf( + "While serving expected binding of %s but got %s", + testBinding, binding, + ) + } + called = !called + if called { + return nil + } + return testError + } + + // Perform test. + listener := Listening() + if err := listener(testBinding, handler); nil != err { + t.Errorf("While serving first expected nil error but got %v", err) + } + if err := listener(testBinding, handler); nil == err { + t.Errorf( + "While serving second got nil while expecting %v", testError, + ) + } +} + +func TestTLSListening(t *testing.T) { + // Choose values for testing. + called := false + testBinding := "host:port" + testTLSCert := "test/file.pem" + testTLSKey := "test/file.key" + testError := errors.New("random problem") + + // Create an empty placeholder router function. + handler := func(http.ResponseWriter, *http.Request) {} + + // Override setHandler so that multiple calls to 'http.HandleFunc' doesn't + // panic. + setHandler = func(string, func(http.ResponseWriter, *http.Request)) {} + + // Override listenAndServeTLS with a function with more introspection and + // control than 'http.ListenAndServeTLS'. + listenAndServeTLS = func( + binding, tlsCert, tlsKey string, handler http.Handler, + ) error { + if testBinding != binding { + t.Errorf( + "While serving TLS expected binding of %s but got %s", + testBinding, binding, + ) + } + if testTLSCert != tlsCert { + t.Errorf( + "While serving TLS expected TLS cert of %s but got %s", + testTLSCert, tlsCert, + ) + } + if testTLSKey != tlsKey { + t.Errorf( + "While serving TLS expected TLS key of %s but got %s", + testTLSKey, tlsKey, + ) + } + called = !called + if called { + return nil + } + return testError + } + + // Perform test. + listener := TLSListening(testTLSCert, testTLSKey) + if err := listener(testBinding, handler); nil != err { + t.Errorf("While serving first TLS expected nil error but got %v", err) + } + if err := listener(testBinding, handler); nil == err { + t.Errorf( + "While serving second TLS got nil while expecting %v", testError, + ) + } +} + +func TestValidReferrer(t *testing.T) { + ok1 := "http://valid.com" + ok2 := "https://valid.com" + ok3 := "http://localhost" + bad := "http://other.pl" + + var noRefer []string + emptyRefer := []string{} + onlyNoRefer := []string{""} + refer := []string{ok1, ok2, ok3} + noWithRefer := []string{"", ok1, ok2, ok3} + + testCases := []struct { + name string + refers []string + refer string + result bool + }{ + {"Nil refer list", noRefer, bad, true}, + {"Empty refer list", emptyRefer, bad, true}, + {"Unassigned allowed & unassigned", onlyNoRefer, "", true}, + {"Unassigned allowed & assigned", onlyNoRefer, bad, false}, + {"Whitelist with unassigned", refer, "", false}, + {"Whitelist with bad", refer, bad, false}, + {"Whitelist with ok1", refer, ok1, true}, + {"Whitelist with ok2", refer, ok2, true}, + {"Whitelist with ok3", refer, ok3, true}, + {"Whitelist and none with unassigned", noWithRefer, "", true}, + {"Whitelist with bad", noWithRefer, bad, false}, + {"Whitelist with ok1", noWithRefer, ok1, true}, + {"Whitelist with ok2", noWithRefer, ok2, true}, + {"Whitelist with ok3", noWithRefer, ok3, true}, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := validReferrer(tc.refers, tc.refer) + if result != tc.result { + t.Errorf( + "With referrers of '%v' and a value of '%s' expected %t but got %t", + tc.refers, tc.refer, tc.result, result, + ) + } + }) + } +} + +func TestAddCorsWildcardHeaders(t *testing.T) { + testCases := []struct { + name string + corsEnabled bool + }{ + {"CORS disabled", false}, + {"CORS enabled", true}, + } + + corsHeaders := map[string]string{ + "Access-Control-Allow-Origin": "*", + "Access-Control-Allow-Headers": "*", + } + + for _, serveFile := range serveFileFuncs { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var handler http.HandlerFunc + if tc.corsEnabled { + handler = AddCorsWildcardHeaders(Basic(serveFile, baseDir)) + } else { + handler = Basic(serveFile, baseDir) + } + + fullpath := "http://localhost/" + tmpFileName + req := httptest.NewRequest("GET", fullpath, nil) + w := httptest.NewRecorder() + + handler(w, req) + + resp := w.Result() + body, err := ioutil.ReadAll(resp.Body) + if nil != err { + t.Errorf("While reading body got %v", err) + } + contents := string(body) + if ok != resp.StatusCode { + t.Errorf( + "While retrieving %s expected status code of %d but got %d", + fullpath, ok, resp.StatusCode, + ) + } + if tmpFile != contents { + t.Errorf( + "While retrieving %s expected contents '%s' but got '%s'", + fullpath, tmpFile, contents, + ) + } + + if tc.corsEnabled { + for k, v := range corsHeaders { + if v != resp.Header.Get(k) { + t.Errorf( + "With CORS enabled expect header '%s' to return '%s' but got '%s'", + k, v, resp.Header.Get(k), + ) + } + } + } else { + for k := range corsHeaders { + if "" != resp.Header.Get(k) { + t.Errorf( + "With CORS disabled expected header '%s' to return '' but got '%s'", + k, resp.Header.Get(k), + ) + } + } + } + }) + } + } +} diff --git a/deploy/chp-api/static-file-server/img/sponsor.svg b/deploy/chp-api/static-file-server/img/sponsor.svg new file mode 100644 index 0000000..a10f598 --- /dev/null +++ b/deploy/chp-api/static-file-server/img/sponsor.svg @@ -0,0 +1,147 @@ + + diff --git a/deploy/chp-api/static-file-server/update.sh b/deploy/chp-api/static-file-server/update.sh new file mode 100755 index 0000000..a4fbb16 --- /dev/null +++ b/deploy/chp-api/static-file-server/update.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +set -e + +if [ $# -eq 0 ]; then + echo "Usage: ./update.sh v#.#.#" + exit +fi + +VERSION=$1 + +docker build -t sfs-builder -f ./Dockerfile.all . + +ID=$(docker create sfs-builder) + +rm -rf out +mkdir -p out +docker cp "${ID}:/build/pkg/linux-amd64/serve" "./out/static-file-server-${VERSION}-linux-amd64" +docker cp "${ID}:/build/pkg/linux-i386/serve" "./out/static-file-server-${VERSION}-linux-386" +docker cp "${ID}:/build/pkg/linux-arm6/serve" "./out/static-file-server-${VERSION}-linux-arm6" +docker cp "${ID}:/build/pkg/linux-arm7/serve" "./out/static-file-server-${VERSION}-linux-arm7" +docker cp "${ID}:/build/pkg/linux-arm64/serve" "./out/static-file-server-${VERSION}-linux-arm64" +docker cp "${ID}:/build/pkg/darwin-amd64/serve" "./out/static-file-server-${VERSION}-darwin-amd64" +docker cp "${ID}:/build/pkg/win-amd64/serve.exe" "./out/static-file-server-${VERSION}-windows-amd64.exe" + +docker rm -f "${ID}" +docker rmi sfs-builder + +docker buildx build --push --platform linux/arm/v7,linux/arm64/v8,linux/amd64 --tag "halverneus/static-file-server:${VERSION}" . +docker buildx build --push --platform linux/arm/v7,linux/arm64/v8,linux/amd64 --tag halverneus/static-file-server:latest . + +echo "Done" diff --git a/deploy/chp-api/templates/deployment.yaml b/deploy/chp-api/templates/deployment.yaml index a9ed2d1..3c1aaf3 100644 --- a/deploy/chp-api/templates/deployment.yaml +++ b/deploy/chp-api/templates/deployment.yaml @@ -21,45 +21,54 @@ spec: {{- toYaml .Values.podSecurityContext | nindent 8 }} containers: - name: {{ .Chart.Name }} + {{- with .Values.chp_api.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} securityContext: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} command: ["/bin/sh"] - args: ["-c", "/bin/bash /home/chp_api/web/entrypoint.prod.sh && gunicorn -c gunicorn.config-prod.py --log-file=- --env DJANGO_SETTINGS_MODULE=chp_api.settings.production chp_api.wsgi:application --bind 0.0.0.0:8000"] + args: ["-c", "opentelemetry-instrument --traces_exporter jaeger --metrics_exporter console uwsgi --http :8000 --max-requests=200 --master --pidfile=/tmp/project-master.pid --module chp_api.wsgi:application"] ports: - name: http-app containerPort: 8000 protocol: TCP volumeMounts: - name: {{ include "chp-api.fullname" . }}-pvc - mountPath: /home/chp_api/web/chp_api/staticfiles + mountPath: /home/chp_api/staticfiles env: - name: SECRET_KEY valueFrom: secretKeyRef: name: {{ include "chp-api.fullname" . }}-secret key: secret_key - - name: SQL_DATABASE + - name: POSTGRES_DB valueFrom: secretKeyRef: name: {{ include "chp-api.fullname" . }}-secret key: sql_database - - name: SQL_USER + - name: POSTGRES_USER valueFrom: secretKeyRef: name: {{ include "chp-api.fullname" . }}-secret key: sql_username - - name: SQL_PASSWORD + - name: POSTGRES_PASSWORD valueFrom: secretKeyRef: name: {{ include "chp-api.fullname" . }}-secret key: sql_password + - name: DJANGO_SUPERUSER_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "chp-api.fullname" . }}-secret + key: django_superuser_password - name: SQL_ENGINE value: "{{ .Values.db.engine }}" - - name: SQL_HOST + - name: POSTGRES_HOST value: "{{ .Values.db.host }}" - - name: SQL_PORT + - name: POSTGRES_PORT value: "{{ .Values.db.port }}" - name: DATABASE value: "{{ .Values.db.type }}" @@ -67,9 +76,27 @@ spec: value: "{{ .Values.app.debug }}" - name: DJANGO_ALLOWED_HOSTS value: "{{ .Values.app.djangoAllowedHosts }}" + - name: CSRF_TRUSTED_ORIGINS + value: "{{ .Values.app.djangoCSRFTrustedOrigins }}" - name: DJANGO_SETTINGS_MODULE value: "{{ .Values.app.djangoSettingsModule }}" + - name: DJANGO_SUPERUSER_USERNAME + value: "{{ .Values.app.djangoSuperuserUsername }}" + - name: DJANGO_SUPERUSER_EMAIL + value: "{{ .Values.app.djangoSuperuserEmail }}" + - name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED + value: "{{ .Values.app.OtelPythonLoggingAutoInstrumentationEnabled }}" + - name: OTEL_TRACES_EXPORTER + value: "{{ .Values.app.OtelTracesExporter }}" + - name: OTEL_EXPORTER_JAEGER_AGENT_HOST + value: "{{ .Values.app.OtelExporterJaegerAgentHost }}" + - name: OTEL_EXPORTER_JAEGER_AGENT_PORT + value: "{{ .Values.app.OtelExporterJaegerAgentPort }}" - name: {{ .Chart.Name }}-nginx + {{- with .Values.chp_api_nginx.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} securityContext: {{- toYaml .Values.securityContextNginx | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.nginxTag | default .Chart.AppVersion }}" @@ -79,11 +106,30 @@ spec: containerPort: 80 protocol: TCP volumeMounts: - - name: {{ include "chp-api.fullname" . }}-pvc - mountPath: /home/chp_api/web/staticfiles - name: config-vol mountPath: /etc/nginx/conf.d/default.conf subPath: nginx.conf + - name: {{ .Chart.Name }}-staticfs + {{- with .Values.chp_api_staticfs.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + {{- toYaml .Values.securityContextStaticfs | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.staticfsTag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http-staticfs + containerPort: 8080 + protocol: TCP + volumeMounts: + - name: {{ include "chp-api.fullname" . }}-pvc + mountPath: /var/www/static + env: + - name: FOLDER + value: "{{ .Values.app.staticfsFolder }}" + - name: DEBUG + value: "{{ .Values.app.staticfsDebug }}" volumes: - name: config-vol configMap: diff --git a/deploy/chp-api/templates/ingress.yaml b/deploy/chp-api/templates/ingress.yaml index f495b20..f46ba08 100644 --- a/deploy/chp-api/templates/ingress.yaml +++ b/deploy/chp-api/templates/ingress.yaml @@ -27,4 +27,4 @@ spec: number: {{ $svcPort }} {{- end }} {{- end }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/deploy/chp-api/templates/secret.yaml b/deploy/chp-api/templates/secret.yaml index e141878..c435a39 100644 --- a/deploy/chp-api/templates/secret.yaml +++ b/deploy/chp-api/templates/secret.yaml @@ -8,4 +8,5 @@ stringData: sql_database: {{ .Values.db.database }} sql_username: {{ .Values.db.username }} sql_password: {{ .Values.db.password }} - secret_key: {{ .Values.app.secret_key }} \ No newline at end of file + secret_key: {{ .Values.app.secret_key }} + django_superuser_password: {{ .Values.app.djangoSuperuserPassword }} diff --git a/deploy/chp-api/values.yaml b/deploy/chp-api/values.yaml index bc9b665..bfa0aae 100644 --- a/deploy/chp-api/values.yaml +++ b/deploy/chp-api/values.yaml @@ -3,6 +3,7 @@ # Declare variables to be passed into your templates. replicaCount: 1 +appname: chp-api image: repository: 853771734544.dkr.ecr.us-east-1.amazonaws.com/translator-ea-chp-api @@ -10,6 +11,7 @@ image: # Overrides the image tag whose default is the chart appVersion. tag: "BUILD_VERSION" nginxTag: "BUILD_VERSION-nginx" + staticfsTag: "BUILD_VERSION-staticfs" nameOverride: "" fullnameOverride: "" @@ -18,8 +20,14 @@ fullnameOverride: "" app: debug: "0" secret_key: "" + djangoCSRFTrustedOrigins: "" djangoAllowedHosts: "" - djangoSettingsModule: "chp_api.settings.production" + djangoSettingsModule: "chp_api.settings" + djangoSuperuserUsername: "chp_admin" + djangoSuperuserPassword: "" + djangoSuperuserEmail: "chp_admin@chp.com" + staticfsFolder: "/var/www" + staticfsDebug: "0" # database connection information db: @@ -65,10 +73,10 @@ ingress: paths: - path: / pathType: ImplementationSpecific - + tolerations: - key: "transltr" - value: "chp-api" + value: "chp" operator: "Equal" effect: "NoSchedule" @@ -78,12 +86,12 @@ affinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - - key: app + - key: applicaiton operator: In values: - - chp-api + - chp topologyKey: "kubernetes.io/hostname" - # this ensures pod only runs on node with label application=chp-api + # this ensures pod only runs on node with label application=managed-app nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: @@ -91,4 +99,4 @@ affinity: - key: application operator: In values: - - chp-api + - chp diff --git a/deployment-script b/deployment-script new file mode 100755 index 0000000..ab3de33 --- /dev/null +++ b/deployment-script @@ -0,0 +1,23 @@ +#!/bin/bash + +# Variables +django_superuser_username='cat secrets/chp_api/django_superuser_username.txt' +django_superuser_email='cat secrets/chp_api/django_superuser_email.txt' + +# Only to be run when building on dev machine +docker compose -f compose.chp-api.yaml -f compose.gennifer.yaml up -d --build --remove-orphans + +docker compose -f compose.chp-api.yaml run api python3 manage.py migrate + +# Create a database superuser +docker compose -f compose.chp-api.yaml run --user root api python3 manage.py createsuperuser --no-input #--username $django_superuser_username --email $django_superuser_email + +# Load apps +docker compose -f compose.chp-api.yaml run api python3 manage.py runscript load_db_apps +docker compose -f compose.chp-api.yaml run api python3 manage.py runscript templater +docker compose -f compose.chp-api.yaml run api python3 manage.py runscript gene_spec_curie_templater +docker compose -f compose.chp-api.yaml run api python3 manage.py runscript algorithm_loader + +docker compose -f compose.chp-api.yaml run --user root api python3 manage.py collectstatic --noinput + +echo "Check logs with: docker compose -f compose.chp-api.yaml -f compose.gennifer.yaml logs -f" diff --git a/deployment-script-jenkins b/deployment-script-jenkins deleted file mode 100644 index 3e26712..0000000 --- a/deployment-script-jenkins +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -intuition_user=$(whoami) - -echo "Taking down the NCATS Server." -docker-compose -f docker-compose.jenkins.yml down -v - -echo "Building back from scratch." -docker system prune - -docker-compose -f docker-compose.jenkins.yml build - -echo "Bringing up server." -docker-compose -f docker-compose.jenkins.yml up -d -docker-compose -f docker-compose.jenkins.yml exec web python3 manage.py makemigrations -docker-compose -f docker-compose.jenkins.yml exec web python3 manage.py migrate --noinput -docker-compose -f docker-compose.jenkins.yml exec web python3 manage.py collectstatic --no-input -echo "Server should now be up." - -echo "Check logs with: docker-compose -f docker-compose.prod.yml logs -f" diff --git a/deployment-script-prod b/deployment-script-prod deleted file mode 100644 index 4d01ffa..0000000 --- a/deployment-script-prod +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -INTUITION_USER=$1 - -GET_FIXTURE=${2:-0} - -echo "Taking down the NCATS Server." -docker-compose -f docker-compose.prod.yml down -v - -if [ $GET_FIXTURE -eq 1 ] -then - echo "Copying over database fixtures from intuition. If on AWS will need to SFTP to their servers." - scp $INTUITION_USER@intuition.thayer.dartmouth.edu:/home/public/data/ncats/chp_db/databases/chp_db_fixture.json.gz chp_api/chp_db_fixture.json.gz -fi - -echo "Building back from scratch." -docker system prune -docker-compose -f docker-compose.prod.yml build --no-cache - -echo "Bringing up server." -docker-compose -f docker-compose.prod.yml up -d -docker-compose -f docker-compose.prod.yml exec web python3 manage.py makemigrations --settings chp_api.settings.production -docker-compose -f docker-compose.prod.yml exec web python3 manage.py migrate --noinput --settings chp_api.settings.production -docker-compose -f docker-compose.prod.yml exec web python3 manage.py collectstatic --no-input --settings chp_api.settings.production - -echo "Loading in CHP DB fixture." -docker-compose -f docker-compose.prod.yml exec web python3 manage.py loaddata chp_db_fixture.json.gz -v3 --settings chp_api.settings.production - -echo "Server should now be up." - -echo "Check logs with: docker-compose -f docker-compose.prod.yml logs -f" diff --git a/deployment-script-stage b/deployment-script-stage deleted file mode 100644 index 0dbef9c..0000000 --- a/deployment-script-stage +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -INTUITION_USER=$1 - -GET_FIXTURE=${2:-0} - -echo "Taking down the NCATS Server." -docker-compose -f docker-compose.stage.yml down -v - -if [ $GET_FIXTURE -eq 1 ] -then - echo "Copying over database fixtures from intuition. If on AWS will need to SFTP to their servers." - scp $INTUITION_USER@intuition.thayer.dartmouth.edu:/home/public/data/ncats/chp_db/databases/chp_db_fixture.json.gz chp_api/chp_db_fixture.json.gz -fi - -echo "Building back from scratch." -docker system prune -docker-compose -f docker-compose.stage.yml build --no-cache - -echo "Bringing up server." -docker-compose -f docker-compose.stage.yml up -d -docker-compose -f docker-compose.stage.yml exec web python3 manage.py makemigrations --settings chp_api.settings.staging -docker-compose -f docker-compose.stage.yml exec web python3 manage.py migrate --noinput --settings chp_api.settings.staging -docker-compose -f docker-compose.stage.yml exec web python3 manage.py collectstatic --no-input --clear --settings chp_api.settings.staging - -echo "Loading in CHP DB fixture." -docker-compose -f docker-compose.stage.yml exec web python3 manage.py loaddata chp_db_fixture.json.gz -v3 --settings chp_api.settings.staging - -echo "Server should now be up." - -echo "Check logs with: docker-compose -f docker-compose.stage.yml logs -f" diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml deleted file mode 100644 index 6894d6e..0000000 --- a/docker-compose.dev.yml +++ /dev/null @@ -1,10 +0,0 @@ -version: '3.7' - -services: - db: - image: postgres:12.0-alpine - volumes: - - postgres_data:/var/lib/postgresql/data/ -volumes: - postgres_data: - static_volume: \ No newline at end of file diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml deleted file mode 100644 index 70f7c79..0000000 --- a/docker-compose.prod.yml +++ /dev/null @@ -1,17 +0,0 @@ -version: '3.7' - -services: - web: - build: - context: . - dockerfile: Dockerfile.prod - command: gunicorn -c gunicorn.config-prod.py --env DJANGO_SETTINGS_MODULE=chp_api.settings.production chp_api.wsgi:application --bind 0.0.0.0:8000 - volumes: - - static_volume:/home/chp_api/web/staticfiles - expose: - - 8000 - env_file: - - ./.env.prod -volumes: - postgres_data: - static_volume: \ No newline at end of file diff --git a/docker-compose.stage.yml b/docker-compose.stage.yml deleted file mode 100644 index cbd32b7..0000000 --- a/docker-compose.stage.yml +++ /dev/null @@ -1,17 +0,0 @@ -version: '3.7' - -services: - web: - build: - context: . - dockerfile: Dockerfile.stage - command: gunicorn -c gunicorn.config-stage.py --env DJANGO_SETTINGS_MODULE=chp_api.settings.staging chp_api.wsgi:application --bind 0.0.0.0:8000 --access-logfile gunicorn-access.log --error-logfile gunicorn-error.log --log-level debug - volumes: - - static_volume:/home/chp_api/web/staticfiles - expose: - - 8000 - env_file: - - ./.env.stage -volumes: - postgres_data: - static_volume: \ No newline at end of file diff --git a/entrypoint-dev.sh b/entrypoint-dev.sh deleted file mode 100644 index 2e626f9..0000000 --- a/entrypoint-dev.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -if [ "$DATABASE" = "postgres" ] -then - echo "Waiting for postgres..." - - while ! nc -z $SQL_HOST $SQL_PORT; do - sleep 0.1 - done - - echo "PostgreSQL started" -fi - -python3 manage.py flush --no-input -python3 manage.py migrate - -exec "$@" \ No newline at end of file diff --git a/entrypoint.prod.sh b/entrypoint.prod.sh deleted file mode 100644 index 40e5a12..0000000 --- a/entrypoint.prod.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh - -# Wait for Database image to start -if [ "$DATABASE" = "postgres" ] -then - echo "Waiting for postgres..." - - while ! nc -z $SQL_HOST $SQL_PORT; do - sleep 0.1 - done - - echo "PostgreSQL started" -fi - -# Run django migrations and collect static -echo "Collect static files" -python3 manage.py collectstatic --noinput - -echo "Make database migrations" -python3 manage.py makemigrations - -echo "Apply database migrations" -python3 manage.py migrate - -exec "$@" \ No newline at end of file diff --git a/entrypoint.stage.sh b/entrypoint.stage.sh deleted file mode 100644 index 40e5a12..0000000 --- a/entrypoint.stage.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh - -# Wait for Database image to start -if [ "$DATABASE" = "postgres" ] -then - echo "Waiting for postgres..." - - while ! nc -z $SQL_HOST $SQL_PORT; do - sleep 0.1 - done - - echo "PostgreSQL started" -fi - -# Run django migrations and collect static -echo "Collect static files" -python3 manage.py collectstatic --noinput - -echo "Make database migrations" -python3 manage.py makemigrations - -echo "Apply database migrations" -python3 manage.py migrate - -exec "$@" \ No newline at end of file diff --git a/gennifer b/gennifer new file mode 160000 index 0000000..2077a30 --- /dev/null +++ b/gennifer @@ -0,0 +1 @@ +Subproject commit 2077a3047e7260abaad32e1d242c53ecd40073bb diff --git a/gennifer-sample.json b/gennifer-sample.json new file mode 100644 index 0000000..e91e778 --- /dev/null +++ b/gennifer-sample.json @@ -0,0 +1,16 @@ +{ + "name": "GSD Study", + "description": "This is a test study with one task.", + "tasks": [ + { + "algorithm_name": "pidc", + "zenodo_id":"7988181", + "hyperparameters": null + }, + { + "algorithm_name": "grisli", + "zenodo_id":"8057216", + "hyperparameters": null + } + ] +} diff --git a/gs-sample.json b/gs-sample.json new file mode 100644 index 0000000..28ca984 --- /dev/null +++ b/gs-sample.json @@ -0,0 +1,31 @@ +{ + "message": { + "query_graph": { + "edges": { + "e0": { + "object": "n1", + "predicates": [ + "biolink:expressed_in" + ], + "subject": "n0" + } + }, + "nodes": { + "n0": { + "categories": [ + "biolink:Gene", + "biolink:Protein" + ] + }, + "n1": { + "categories": [ + "biolink:GrossAnatomicalStructure" + ], + "ids": [ + "UBERON:0000458" + ] + } + } + } + } +} diff --git a/gunicorn.config-stage.py b/gunicorn.config-stage.py deleted file mode 100644 index 2459b11..0000000 --- a/gunicorn.config-stage.py +++ /dev/null @@ -1,13 +0,0 @@ -### Gunicorn Configuration File ### - -timeout = 0 -graceful_timeout = 0 -limit_request_field_size = 0 -limit_request_line = 0 -limit_request_fields = 0 -proxy_allow_ips = '*' -workers=1 -errorlog='gunicorn-error.log' -accesslog='gunicorn-access.log' -loglevel='debug' - diff --git a/nginx/Dockerfile b/nginx/Dockerfile index c4570fd..bfb3338 100644 --- a/nginx/Dockerfile +++ b/nginx/Dockerfile @@ -1,4 +1,32 @@ -FROM nginx:1.19.0-alpine +FROM nginx:1.19.7-alpine -RUN rm /etc/nginx/conf.d/default.conf -COPY nginx.conf /etc/nginx/conf.d/default.conf +# Add bash for boot cmd +RUN apk add bash + +# Add nginx.conf to container +COPY --chown=nginx:nginx nginx.conf /etc/nginx/nginx.conf +COPY --chown=nginx:nginx start.sh /app/start.sh + +# set workdir +WORKDIR /app + +# permissions and nginx user for tightened security +RUN chown -R nginx:nginx /app && chmod -R 755 /app && \ + chown -R nginx:nginx /var/cache/nginx && \ + chown -R nginx:nginx /var/log/nginx && \ + chmod -R 755 /var/log/nginx; \ + chown -R nginx:nginx /etc/nginx/conf.d +RUN touch /var/run/nginx.pid && chown -R nginx:nginx /var/run/nginx.pid + +# # Uncomment to keep the nginx logs inside the container - Leave commented for logging to stdout and stderr +# RUN mkdir -p /var/log/nginx +# RUN unlink /var/log/nginx/access.log \ +# && unlink /var/log/nginx/error.log \ +# && touch /var/log/nginx/access.log \ +# && touch /var/log/nginx/error.log \ +# && chown nginx /var/log/nginx/*log \ +# && chmod 644 /var/log/nginx/*log + +USER nginx + +CMD ["nginx", "-g", "'daemon off;'"] diff --git a/nginx/default.conf b/nginx/default.conf new file mode 100644 index 0000000..ee86726 --- /dev/null +++ b/nginx/default.conf @@ -0,0 +1,75 @@ +proxy_cache_path /tmp/cache levels=1:2 keys_zone=cache:10m max_size=500m inactive=60m use_temp_path=off; + +server { + listen 80; + + location / { + proxy_pass http://$DJANGO_SERVER_ADDR; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location /cache-me { + proxy_pass http://$DJANGO_SERVER_ADDR; + proxy_cache cache; + proxy_cache_lock on; + proxy_cache_valid 200 30s; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + proxy_cache_revalidate on; + proxy_cache_background_update on; + expires 20s; + } + + location /health-check { + add_header Content-Type text/plain; + return 200 "success"; + } + + location /static { + proxy_pass http://$STATIC_SERVER_ADDR; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_cache cache; + proxy_ignore_headers Cache-Control; + proxy_cache_valid 60m; + } + + location /flower-dashboard { + proxy_pass http://$FLOWER_DASHBOARD_ADDR; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location /_next/static { + proxy_cache cache; + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + + location /api { + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + + location /ui { + proxy_pass http://$NEXTJS_SERVER_ADDR/; + } + + location /dashboard { + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + + location /studies { + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + + location /explore { + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + + location /documentation { + proxy_pass http://$NEXTJS_SERVER_ADDR; + } + +} diff --git a/nginx/nginx.conf b/nginx/nginx.conf index b6613f4..ca4394f 100644 --- a/nginx/nginx.conf +++ b/nginx/nginx.conf @@ -1,23 +1,60 @@ -upstream chp_api { - server web:8000; +worker_processes auto; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; } -server { +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Define the format of log messages. + log_format main_ext '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for" ' + '"$host" sn="$server_name" ' + 'rt=$request_time ' + 'ua="$upstream_addr" us="$upstream_status" ' + 'ut="$upstream_response_time" ul="$upstream_response_length" ' + 'cs=$upstream_cache_status' ; + + access_log /var/log/nginx/access.log main_ext; + error_log /var/log/nginx/error.log warn; + + sendfile on; + + keepalive_timeout 65; + + # Enable Compression + gzip on; + gzip_proxied any; + gzip_comp_level 4; + gzip_types text/css application/javascript image/svg+xml; + + # Disable Display of NGINX Version + server_tokens off; + + # Headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + + # Size Limits + client_body_buffer_size 10K; + client_header_buffer_size 128k; + client_max_body_size 8m; + large_client_header_buffers 2 1k; - listen 80; - client_max_body_size 100M; + # # SSL / TLS Settings - Suggested for Security + # ssl_protocols TLSv1.2 TLSv1.3; + # ssl_session_timeout 15m; + # ssl_ciphers ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256; + # ssl_prefer_server_ciphers on; + # ssl_session_tickets off; - location / { - proxy_pass http://chp_api; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header Host $host; - proxy_redirect off; - proxy_read_timeout 360; - proxy_send_timeout 360; - proxy_connect_timeout 360; - } + include /etc/nginx/conf.d/*.conf; - location /staticfiles/ { - alias /home/chp_api/web/staticfiles/; - } } diff --git a/nginx/start.sh b/nginx/start.sh new file mode 100644 index 0000000..4fea892 --- /dev/null +++ b/nginx/start.sh @@ -0,0 +1,2 @@ +#!/bin/bash +envsubst '$DJANGO_SERVER_ADDR,$STATIC_SERVER_ADDR,$FLOWER_DASHBOARD_ADDR,$NEXTJS_SERVER_ADDR' < /tmp/default.conf > /etc/nginx/conf.d/default.conf && nginx -g 'daemon off;' diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index bf1faea..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,6 +0,0 @@ -chp_learn @ git+https://github.com/di2ag/chp_learn.git@production -chp_utils @ git+https://github.com/di2ag/chp_utils.git@production -trapi_model @ git+https://github.com/di2ag/trapi_model.git@production -chp_look_up @ git+https://github.com/di2ag/chp_look_up.git@production -gene-specificity @ git+https://github.com/di2ag/gene-specificity.git@production -reasoner-validator @ git+https://github.com/di2ag/reasoner-validator.git@production \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 21db109..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -djangorestframework -psycopg2-binary -django-environ -django-hosts -gunicorn -django \ No newline at end of file