diff --git a/docker/main/dataservice/Dockerfile b/docker/main/dataservice/Dockerfile index 804e4e2c1..113bd53b2 100644 --- a/docker/main/dataservice/Dockerfile +++ b/docker/main/dataservice/Dockerfile @@ -1,6 +1,6 @@ ARG docker_internal_registry ################################################################################################################ -FROM ${docker_internal_registry}/dmod-py-sources as sources +FROM ${docker_internal_registry}/dmod-py-sources:latest as sources ################################################################################################################ FROM python:3.8-alpine3.15 diff --git a/docker/main/dataservice/entrypoint.sh b/docker/main/dataservice/entrypoint.sh index e42ae462d..433ecc801 100644 --- a/docker/main/dataservice/entrypoint.sh +++ b/docker/main/dataservice/entrypoint.sh @@ -62,7 +62,7 @@ if [ -d ${UPDATED_PACKAGES_DIR:=/updated_packages} ]; then for srv in $(pip -qq freeze | grep dmod | awk -F= '{print $1}' | awk -F- '{print $2}'); do if [ $(ls ${UPDATED_PACKAGES_DIR} | grep dmod.${srv}- | wc -l) -eq 1 ]; then pip uninstall -y --no-input $(pip -qq freeze | grep dmod.${srv} | awk -F= '{print $1}') - pip install $(ls ${UPDATED_PACKAGES_DIR}/*.whl | grep dmod.${srv}-) + pip install --no-deps $(ls ${UPDATED_PACKAGES_DIR}/*.whl | grep dmod.${srv}-) fi done #pip install ${UPDATED_PACKAGES_DIR}/*.whl diff --git a/docker/main/docker-deploy.yml b/docker/main/docker-deploy.yml index 665668e58..13c93963d 100644 --- a/docker/main/docker-deploy.yml +++ b/docker/main/docker-deploy.yml @@ -88,6 +88,9 @@ services: - DATA_SERVICE_ENDPOINT_HOST=${DOCKER_REQUESTS_DATASERVICE_ENDPOINT_HOST:-data-service} - DATA_SERVICE_ENDPOINT_PORT=${DOCKER_DATASERVICE_CONTAINER_PORT:-3015} - DATA_SERVICE_CLIENT_SSL_DIR=${DOCKER_REQUESTS_CONTAINER_DATASERVICE_CLIENT_SSL_DIR:-/ssl/dataservice} + - EVALUATION_SERVICE_ENDPOINT_HOST=${DOCKER_REQUESTS_EVALUATIONSERVICE_ENDPOINT_HOST:-evaluation-service} + - EVALUATION_SERVICE_ENDPOINT_PORT=${DOCKER_EVALUATIONSERVICE_CONTAINER_PORT:-3015} + - EVALUATION_SERVICE_CLIENT_SSL_DIR=${DOCKER_REQUESTS_CONTAINER_EVALUATIONSERVICE_CLIENT_SSL_DIR:-/ssl/evaluationservice} - PARTITIONER_SERVICE_ENDPOINT_HOST=${DOCKER_REQUESTS_PARTITIONERSERVICE_ENDPOINT_HOST:-partitioner-service} - PARTITIONER_SERVICE_ENDPOINT_PORT=${DOCKER_PARTITIONER_SERVICE_CONTAINER_PORT:-3014} - PARTITIONER_SERVICE_CLIENT_SSL_DIR=${DOCKER_REQUESTS_CONTAINER_PARTITIONERSERVICE_CLIENT_SSL_DIR:-/ssl/partitionerservice} @@ -114,7 +117,9 @@ services: ports: - ${DOCKER_SUBSET_API_PORT:-5001}:${DOCKER_SUBSET_CONTAINER_PORT:-5001} volumes: - - ${HYDROFABRIC_DATA_DIR:?Please set HYDROFABRIC_DATA_DIR for Docker environment in .env config file}:/hydrofabric_data + # TODO: even if this works, need to make it configurable + - hydrofabric:/hydrofabric_data/${DMOD_HYDROFABRIC_VOLUME:?} + #- ${HYDROFABRIC_DATA_DIR:?Please set HYDROFABRIC_DATA_DIR for Docker environment in .env config file}:/hydrofabric_data # This typically needs to be commented out; intended for development use (see related 'environment' config above) #- updated_packages:${UPDATED_PACKAGES_CONTAINER_DIR:?Check if updated packages directory should be used} deploy: @@ -128,6 +133,9 @@ services: - LISTEN_PORT=${DOCKER_SUBSET_CONTAINER_PORT:-5000} - SERVICE_PACKAGE_NAME=${PYTHON_PACKAGE_NAME_SUBSET_SERVICE:?} - FILES_DIRECTORY=/hydrofabric_data + - PYCHARM_REMOTE_DEBUG_ACTIVE=${PYCHARM_REMOTE_DEBUG_SUBSET_SERVICE_ACTIVE:-false} + - PYCHARM_REMOTE_DEBUG_SERVER_HOST=${PYCHARM_REMOTE_DEBUG_SERVER_HOST:-host.docker.internal} + - PYCHARM_REMOTE_DEBUG_SERVER_PORT=${PYCHARM_REMOTE_DEBUG_SERVER_PORT_SUBSET_SERVICE:-55874} # This typically needs to be commented out; intended for development use (see related 'volume' config below) #- UPDATED_PACKAGES_DIR=${UPDATED_PACKAGES_CONTAINER_DIR:?Updated packages directory not set, make sure this should be active} entrypoint: ["python3", "-m", "${PYTHON_PACKAGE_NAME_SUBSET_SERVICE:?}"] @@ -226,6 +234,9 @@ volumes: # configs above for several services) updated_packages: external: true + hydrofabric: + name: ${DMOD_HYDROFABRIC_VOLUME:?} + external: true secrets: myredis_pass: diff --git a/docker/main/ngen-calibration/Dockerfile b/docker/main/ngen-calibration/Dockerfile new file mode 100644 index 000000000..e84be87f6 --- /dev/null +++ b/docker/main/ngen-calibration/Dockerfile @@ -0,0 +1,38 @@ +ARG DOCKER_INTERNAL_REGISTRY + +FROM ${DOCKER_INTERNAL_REGISTRY}/ngen:latest + +ARG WORKDIR=/ngen +ARG USER=mpi +ARG NGEN_CAL_BRANCH=master +ARG NGEN_CAL_COMMIT + +ENV USER=${USER} USER_HOME=/home/${USER} + +WORKDIR ${WORKDIR} +USER ${USER} + +# try NGEN_CAL_COMMIT, if not set or empty, use NGEN_CAL_BRANCH +RUN pip install "git+https://github.com/noaa-owp/ngen-cal@${NGEN_CAL_COMMIT:-${NGEN_CAL_BRANCH}}#egg=ngen_cal&subdirectory=python/ngen_cal" + +COPY --chown=${USER} entrypoint.sh ${WORKDIR} + +# Change permissions for entrypoint and make sure dataset volume mount parent directories exists +RUN chmod +x ${WORKDIR}/entrypoint.sh \ + && for d in ${DATASET_DIRECTORIES}; do mkdir -p /dmod/datasets/${d}; done \ + && for d in noah-owp-modular topmodel cfe sloth 'evapotranspiration/evapotranspiration'; do \ + if [ -d ${WORKDIR}/ngen/extern/${d}/cmake_build ]; then \ + cp -a ${WORKDIR}/ngen/extern/${d}/cmake_build/*.so* /dmod/shared_libs/.; \ + fi; \ + done \ + && ( cp -a ${WORKDIR}/ngen/cmake_build_parallel/ngen /dmod/bin/ngen-parallel || true ) \ + && ( cp -a ${WORKDIR}/ngen/cmake_build_serial/ngen /dmod/bin/ngen-serial || true ) \ + && ( cp -a ${WORKDIR}/ngen/cmake_build/partitionGenerator /dmod/bin/partitionGenerator || true ) \ + && pushd /dmod/bin \ + # NOTE use of `ln -sf`. \ + && ( ( stat ngen-parallel && ln -sf ngen-parallel ngen ) || ( stat ngen-serial && ln -sf ngen-serial ngen ) ) \ + && popd + +ENV PATH=${WORKDIR}:$PATH +ENTRYPOINT ["entrypoint.sh"] +CMD [""] diff --git a/docker/main/ngen-calibration/entrypoint.sh b/docker/main/ngen-calibration/entrypoint.sh new file mode 100755 index 000000000..c4d054610 --- /dev/null +++ b/docker/main/ngen-calibration/entrypoint.sh @@ -0,0 +1,119 @@ +#!/bin/sh +# Managed by the _generate_docker_cmd_args function in scheduler.py of dmod.scheduler +# +# $1 will have the number of nodes associated with this run +# $2 will have comma-delimited host strings in MPI form; e.g., hostname:N,hostname:M +# $3 will have the unique job id +# $4 is the worker index +# $5 will be the name of the output dataset (which will imply a directory location) +# $6 will be the name of the hydrofabric dataset (which will imply a directory location) +# $7 will be the name of the realization configuration dataset (which will imply a directory location) +# $8 will be the name of the BMI configuration dataset (which will imply a directory location) +# $9 will be the name of the partition configuration dataset (which will imply a directory location) +# TODO: wire up $10 +# $10 will be the name of the calibration configuration dataset (which will imply a directory location) + +# Not yet supported +# no-op +MPI_NODE_COUNT="${1:?No MPI node count given}" +# no-op +MPI_HOST_STRING="${2:?No MPI host string given}" +# no-op +PARTITION_DATASET_NAME="${9:?}" + +JOB_ID=${3:?No Job id given} +WORKER_INDEX=${4:?No worker index given} + +OUTPUT_DATASET_NAME="${5:?}" +HYDROFABRIC_DATASET_NAME="${6:?}" +REALIZATION_CONFIG_DATASET_NAME="${7:?}" +BMI_CONFIG_DATASET_NAME="${8:?}" +CALIBRATION_CONFIG_DATASET_NAME="${10:?}" + +ACCESS_KEY_SECRET="object_store_exec_user_name" +SECRET_KEY_SECRET="object_store_exec_user_passwd" +DOCKER_SECRETS_DIR="/run/secrets" +ACCESS_KEY_FILE="${DOCKER_SECRETS_DIR}/${ACCESS_KEY_SECRET}" +SECRET_KEY_FILE="${DOCKER_SECRETS_DIR}/${SECRET_KEY_SECRET}" + +NGEN_EXECUTABLE="/ngen/ngen/cmake_build/ngen" + +ALL_DATASET_DIR="/dmod/datasets" +OUTPUT_DATASET_DIR="${ALL_DATASET_DIR}/output/${OUTPUT_DATASET_NAME}" +HYDROFABRIC_DATASET_DIR="${ALL_DATASET_DIR}/hydrofabric/${HYDROFABRIC_DATASET_NAME}" +REALIZATION_CONFIG_DATASET_DIR="${ALL_DATASET_DIR}/config/${REALIZATION_CONFIG_DATASET_NAME}" +BMI_CONFIG_DATASET_DIR="${ALL_DATASET_DIR}/config/${BMI_CONFIG_DATASET_NAME}" +PARTITION_DATASET_DIR="${ALL_DATASET_DIR}/config/${PARTITION_DATASET_NAME}" +CALIBRATION_CONFIG_DATASET_DIR="${ALL_DATASET_DIR}/config/${CALIBRATION_CONFIG_DATASET_NAME}" + +print_date() { + date "+%Y-%m-%d,%H:%M:%S" +} + +check_for_dataset_dir() { + # Dataset dir is $1 + _CATEG="$(echo "${1}" | sed "s|${ALL_DATASET_DIR}/\([^/]*\)/.*|\1|" | awk '{print toupper($0)}')" + if [ ! -d "${1}" ]; then + echo "Error: expected ${_CATEG} dataset directory ${1} not found." 2>&1 + exit 1 + fi +} + +load_object_store_keys_from_docker_secrets() { + # Read Docker Secrets files for Object Store access, if they exist + if [ -z "${ACCESS_KEY_FILE:-}" ]; then + echo "WARN: Cannot load object store access key when Docker secret file name not set" + elif [ -e "${ACCESS_KEY_FILE}" ]; then + ACCESS_KEY="$(cat "${ACCESS_KEY_FILE}")" + else + echo "WARN: Cannot load object store access key when Docker secret file does not exist" + fi + + if [ -z "${SECRET_KEY_FILE:-}" ]; then + echo "WARN: Cannot load object store secret key when Docker secret file name not set" + elif [ -e "${SECRET_KEY_FILE}" ]; then + SECRET_KEY="$(cat "${SECRET_KEY_FILE}")" + else + echo "WARN: Cannot load object store secret key when Docker secret file does not exist" + fi + + test -n "${ACCESS_KEY:-}" && test -n "${SECRET_KEY:-}" +} + +start_calibration() { + # Start ngen calibration + echo "$(print_date) Starting serial ngen calibration" + # CALIBRATION_CONFIG_FILE=${CALIBRATION_CONFIG_DATASET_DIR}/$(basename $(find ${CALIBRATION_CONFIG_DATASET_DIR} -name "*.yaml" -maxdepth 1 | head -1)) + + # TODO: move this to CALIBRATION_CONFIG_DATASET_DIR + # NOTE: assumes that calibration dataset will be in realization config dataset AND that it is + # the only yaml file at the top level of that dataset. + CALIBRATION_CONFIG_FILE=${REALIZATION_CONFIG_DATASET_DIR}/$(basename $(find ${REALIZATION_CONFIG_DATASET_DIR} -name "*.yaml" -maxdepth 1 | head -1)) + + if [ -z "${CALIBRATION_CONFIG_FILE}" ]; then + echo "Error: NGEN calibration yaml file not found" 2>&1 + exit 1 + fi + python3 -m ngen.cal "${CALIBRATION_CONFIG_FILE}" + + #Capture the return value to use as service exit code + NGEN_RETURN=$? + + echo "$(print_date) ngen calibration finished with return value: ${NGEN_RETURN}" + + # Exit with the model's exit code + return ${NGEN_RETURN} +} + +# Sanity check that the output, hydrofabric, and config datasets are available (i.e., their directories are in place) +check_for_dataset_dir "${REALIZATION_CONFIG_DATASET_DIR}" +check_for_dataset_dir "${BMI_CONFIG_DATASET_DIR}" +check_for_dataset_dir "${PARTITION_DATASET_DIR}" +check_for_dataset_dir "${HYDROFABRIC_DATASET_DIR}" +check_for_dataset_dir "${OUTPUT_DATASET_DIR}" +# check_for_dataset_dir "${CALIBRATION_CONFIG_DATASET_DIR}" + +# Move to the output dataset mounted directory +cd ${OUTPUT_DATASET_DIR} + +start_calibration diff --git a/docker/main/ngen/entrypoint.sh b/docker/main/ngen/entrypoint.sh index 239b7fd88..b9f568935 100755 --- a/docker/main/ngen/entrypoint.sh +++ b/docker/main/ngen/entrypoint.sh @@ -19,7 +19,10 @@ OUTPUT_DATASET_NAME="${5:?}" HYDROFABRIC_DATASET_NAME="${6:?}" REALIZATION_CONFIG_DATASET_NAME="${7:?}" BMI_CONFIG_DATASET_NAME="${8:?}" -PARTITION_DATASET_NAME="${9:?}" +# Don't require a partitioning config when only using a single node +if [ ${MPI_NODE_COUNT:?} -gt 1 ]; then + PARTITION_DATASET_NAME="${9:?No argument for partition config dataset when expecting one for MPI-based job}" +fi ACCESS_KEY_SECRET="object_store_exec_user_name" SECRET_KEY_SECRET="object_store_exec_user_passwd" @@ -36,6 +39,9 @@ fi MPI_RUN="mpirun" #NGEN_EXECUTABLE="ngen" +NGEN_SERIAL_EXECUTABLE="/ngen/ngen/cmake_build_serial/ngen" +NGEN_PARALLEL_EXECUTABLE="/ngen/ngen/cmake_build_parallel/ngen" +# This will be symlinked to the parallel one currently NGEN_EXECUTABLE="/ngen/ngen/cmake_build/ngen" ALL_DATASET_DIR="/dmod/datasets" @@ -43,7 +49,10 @@ OUTPUT_DATASET_DIR="${ALL_DATASET_DIR}/output/${OUTPUT_DATASET_NAME}" HYDROFABRIC_DATASET_DIR="${ALL_DATASET_DIR}/hydrofabric/${HYDROFABRIC_DATASET_NAME}" REALIZATION_CONFIG_DATASET_DIR="${ALL_DATASET_DIR}/config/${REALIZATION_CONFIG_DATASET_NAME}" BMI_CONFIG_DATASET_DIR="${ALL_DATASET_DIR}/config/${BMI_CONFIG_DATASET_NAME}" -PARTITION_DATASET_DIR="${ALL_DATASET_DIR}/config/${PARTITION_DATASET_NAME}" +# Don't require a partitioning dataset when only using a single node +if [ ${MPI_NODE_COUNT:?} -gt 1 ]; then + PARTITION_DATASET_DIR="${ALL_DATASET_DIR}/config/${PARTITION_DATASET_NAME:?No partition config dataset name for directory}" +fi RUN_SENTINEL="/home/${MPI_USER}/.run_sentinel" @@ -127,10 +136,32 @@ exec_main_worker_ngen_run() return ${NGEN_RETURN} } +exec_serial_ngen_run() +{ + echo "$(print_date) Skipping host checks since job uses ${MPI_NODE_COUNT} worker hosts and framework will run serially" + + # Execute the model on the linked data + echo "$(print_date) Executing serial build of ngen" + ${NGEN_SERIAL_EXECUTABLE:?} ${HYDROFABRIC_DATASET_DIR}/catchment_data.geojson "" \ + ${HYDROFABRIC_DATASET_DIR}/nexus_data.geojson "" \ + ${REALIZATION_CONFIG_DATASET_DIR}/realization_config.json + + #Capture the return value to use as service exit code + NGEN_RETURN=$? + + echo "$(print_date) serial ngen command finished with return value: ${NGEN_RETURN}" + + # Exit with the model's exit code + return ${NGEN_RETURN} +} + # Sanity check that the output, hydrofabric, and config datasets are available (i.e., their directories are in place) check_for_dataset_dir "${REALIZATION_CONFIG_DATASET_DIR}" check_for_dataset_dir "${BMI_CONFIG_DATASET_DIR}" -check_for_dataset_dir "${PARTITION_DATASET_DIR}" +# Don't require a partitioning dataset when only using a single node +if [ ${MPI_NODE_COUNT:?} -gt 1 ]; then + check_for_dataset_dir "${PARTITION_DATASET_DIR:?No partition dataset directory defined}" +fi check_for_dataset_dir "${HYDROFABRIC_DATASET_DIR}" check_for_dataset_dir "${OUTPUT_DATASET_DIR}" @@ -139,7 +170,11 @@ cd ${OUTPUT_DATASET_DIR} if [ "${WORKER_INDEX}" = "0" ]; then if [ "$(whoami)" = "${MPI_USER}" ]; then - exec_main_worker_ngen_run + if [ ${MPI_NODE_COUNT:-1} -gt 1 ]; then + exec_main_worker_ngen_run + else + exec_serial_ngen_run + fi else echo "$(print_date) Starting SSH daemon on main worker" /usr/sbin/sshd -D & diff --git a/docker/main/requestservice/entrypoint.sh b/docker/main/requestservice/entrypoint.sh index da38e9ec7..2dcae593a 100755 --- a/docker/main/requestservice/entrypoint.sh +++ b/docker/main/requestservice/entrypoint.sh @@ -54,6 +54,9 @@ python -m ${SERVICE_PACKAGE_NAME:?} \ --data-service-host ${DATA_SERVICE_ENDPOINT_HOST:?} \ --data-service-port ${DATA_SERVICE_ENDPOINT_PORT:?} \ --data-service-ssl-dir ${DATA_SERVICE_CLIENT_SSL_DIR:?} \ + --evaluation-service-host ${EVALUATION_SERVICE_ENDPOINT_HOST:?} \ + --evaluation-service-port ${EVALUATION_SERVICE_ENDPOINT_PORT:?} \ + --evaluation-service-ssl-dir ${EVALUATION_SERVICE_CLIENT_SSL_DIR:?} \ --partitioner-service-host ${PARTITIONER_SERVICE_ENDPOINT_HOST:?} \ --partitioner-service-port ${PARTITIONER_SERVICE_ENDPOINT_PORT:?} \ --partitioner-service-ssl-dir ${PARTITIONER_SERVICE_CLIENT_SSL_DIR:?} diff --git a/docker/main/s3fs-volume-helper/Dockerfile b/docker/main/s3fs-volume-helper/Dockerfile index b8a261aeb..39100f924 100644 --- a/docker/main/s3fs-volume-helper/Dockerfile +++ b/docker/main/s3fs-volume-helper/Dockerfile @@ -1,8 +1,8 @@ FROM alpine:3.15 -RUN apk update && apk upgrade && apk add docker bash && mkdir -p /dmod/scripts +RUN apk update && apk upgrade && apk add docker bash && mkdir -p /dmod -COPY ./scripts/* /dmod/scripts/. +COPY ./scripts /dmod/scripts ENV OUT_OF_GIT_REPO='true' diff --git a/docker/main/subsetservice/entrypoint.sh b/docker/main/subsetservice/entrypoint.sh index cfc3728de..65eae6335 100755 --- a/docker/main/subsetservice/entrypoint.sh +++ b/docker/main/subsetservice/entrypoint.sh @@ -12,6 +12,23 @@ if [ -n "${VENV_DIR:-}" ]; then pip install --update -r /code/requirements.txt fi +# Install for debugging when appropriate +if [ "$(echo "${PYCHARM_REMOTE_DEBUG_ACTIVE:-false}" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]')" = "true" ]; then + _DEBUG_ARG="--pycharm-remote-debug" +fi + +# Handle some things in any cases when there is debugging +if [ -n "${_DEBUG_ARG:-}" ]; then + # Append these as well if appropriate, though defaults are coded (and they are somewhat agnostic to the debug setup) + if [ -n "${PYCHARM_REMOTE_DEBUG_SERVER_HOST:-}" ]; then + _DEBUG_ARG="${_DEBUG_ARG:-} --remote-debug-host ${PYCHARM_REMOTE_DEBUG_SERVER_HOST}" + fi + + if [ -n "${PYCHARM_REMOTE_DEBUG_SERVER_PORT:-}" ]; then + _DEBUG_ARG="${_DEBUG_ARG:-} --remote-debug-port ${PYCHARM_REMOTE_DEBUG_SERVER_PORT}" + fi +fi + # If we find this directory, and if there are wheels in it, then install those if [ -d ${UPDATED_PACKAGES_DIR:=/updated_packages} ]; then if [ $(ls ${UPDATED_PACKAGES_DIR}/*.whl | wc -l) -gt 0 ]; then @@ -44,4 +61,4 @@ fi #set +e #export PYTHONASYNCIODEBUG=1 -python3 -m ${SERVICE_PACKAGE_NAME:?} ${args} \ No newline at end of file +python3 -m ${SERVICE_PACKAGE_NAME:?} ${_DEBUG_ARG:-} ${args} \ No newline at end of file diff --git a/docker/nwm_gui/app_server/Dockerfile b/docker/nwm_gui/app_server/Dockerfile index 09720c9e0..070a0e28f 100644 --- a/docker/nwm_gui/app_server/Dockerfile +++ b/docker/nwm_gui/app_server/Dockerfile @@ -20,12 +20,14 @@ RUN pip install -r dependencies.txt ENV PYTHONUNBUFFERED 1 # Slurp (or set default) wheel package names ... +ARG core_package_name=dmod-core ARG comms_package_name=dmod-communication ARG client_package_name=dmod-client # Copy custom built packages from external sources image COPY --from=sources /DIST /DIST -RUN pip install --upgrade --find-links=/DIST ${comms_package_name} \ +RUN pip install --upgrade --find-links=/DIST ${core_package_name} \ + && pip install --upgrade --find-links=/DIST ${comms_package_name} \ && pip install --upgrade --find-links=/DIST ${client_package_name} \ # After eventually installing all dist files like this, clean up ... \ && rm -rf /DIST diff --git a/docker/nwm_gui/app_server/entrypoint.sh b/docker/nwm_gui/app_server/entrypoint.sh index a2d8a531c..62c90eac2 100755 --- a/docker/nwm_gui/app_server/entrypoint.sh +++ b/docker/nwm_gui/app_server/entrypoint.sh @@ -34,6 +34,10 @@ echo "Starting dmod app" #Extract the DB secrets into correct ENV variables POSTGRES_SECRET_FILE="/run/secrets/${DOCKER_SECRET_POSTGRES_PASS:?}" export SQL_PASSWORD="$(cat ${POSTGRES_SECRET_FILE})" +export DMOD_SU_PASSWORD="$(cat ${POSTGRES_SECRET_FILE})" + +# Execute the migration scripts on the designated database +python manage.py migrate # Handle for debugging when appropriate if [ "$(echo "${PYCHARM_REMOTE_DEBUG_ACTIVE:-false}" | tr '[:upper:]' '[:lower:]' | tr -d '[:space:]')" == "true" ]; then diff --git a/docker/nwm_gui/docker-compose.yml b/docker/nwm_gui/docker-compose.yml index 509441649..403c6a92b 100644 --- a/docker/nwm_gui/docker-compose.yml +++ b/docker/nwm_gui/docker-compose.yml @@ -34,6 +34,7 @@ services: args: docker_internal_registry: ${DOCKER_INTERNAL_REGISTRY:?Missing DOCKER_INTERNAL_REGISTRY value (see 'Private Docker Registry ' section in example.env)} comms_package_name: ${PYTHON_PACKAGE_DIST_NAME_COMMS:?} + client_package_name: ${PYTHON_PACKAGE_DIST_NAME_CLIENT:?} networks: - request-listener-net # Call this when starting the container @@ -42,6 +43,8 @@ services: restart: on-failure secrets: - postgres_password + - object_store_exec_user_name + - object_store_exec_user_passwd environment: #- VENV_DIR=${DOCKER_GUI_CONTAINER_VENV_DIR:-} #- CERT_PATH # used by dispatch.py @@ -50,18 +53,29 @@ services: - MAAS_ENDPOINT_PORT=${DOCKER_REQUESTS_HOST_PORT:-3012} - MAAS_PORTAL_DEBUG_HOST=${PYCHARM_REMOTE_DEBUG_SERVER_HOST:-host.docker.internal} - MAAS_PORTAL_DEBUG_PORT=${PYCHARM_REMOTE_DEBUG_SERVER_PORT_GUI:-55875} + - GUI_SUBSET_SERVICE_API_URL=http://${DMOD_GUI_SUBSET_SERVICE_API_HOST:?}:${DOCKER_SUBSET_API_PORT:-5001} - PYCHARM_REMOTE_DEBUG_ACTIVE=${PYCHARM_REMOTE_DEBUG_GUI_ACTIVE:-false} - PYCHARM_REMOTE_DEBUG_VERSION=${PYCHARM_REMOTE_DEBUG_VERSION:-~=211.7628.24} + - DMOD_SU_NAME=dmod_db_admin + # TODO: this needs to be changed eventually + - DMOD_SU_EMAIL=robert.bartel@noaa.gov - SQL_ENGINE=django.db.backends.postgresql - SQL_DATABASE=${DMOD_GUI_POSTGRES_DB:-dmod_dev} - SQL_USER=${DMOD_GUI_POSTGRES_USER:?} - SQL_HOST=db - SQL_PORT=5432 + - DMOD_SU_NAME=dmod_super_user + - DMOD_SU_EMAIL=none@noaa.gov - DATABASE=postgres - DOCKER_SECRET_POSTGRES_PASS=postgres_password + - OBJECT_STORE_HOSTNAME=${EXTERNAL_OBJECT_STORE_HOSTNAME:?} + - OBJECT_STORE_PORT=${EXTERNAL_OBJECT_STORE_PORT:?} + - DMOD_GUI_CSRF_TRUSTED_ORIGINS=${DMOD_GUI_CSRF_TRUSTED_ORIGINS:?No CSRF trusted origins configured (provide '' at least)} volumes: - ${DMOD_APP_STATIC:?}:/usr/maas_portal/static - ${DMOD_SSL_DIR}/requestservice:/usr/maas_portal/ssl + # Needed only for speeding debugging + #- ${DOCKER_GUI_HOST_SRC:?GUI sources path not configured in environment}/MaaS:/usr/maas_portal/MaaS #- ${DOCKER_GUI_HOST_VENV_DIR:-/tmp/blah}:${DOCKER_GUI_CONTAINER_VENV_DIR:-/tmp/blah} # Expose Django's port to the internal network so that the web server may access it expose: @@ -76,8 +90,10 @@ services: networks: - request-listener-net volumes: - #- ${DMOD_GUI_POSTGRES_DATA:?}:/var/lib/postgresql/data - - dmod_db_volume:/var/lib/postgresql/data + # TODO: look back later at why this was done during dev work, and whether it is still needed + # TODO: for now, implement by just defaulting to same Docker volume name (i.e., without bind mount) as before + - ${DMOD_GUI_POSTGRES_DATA_VOLUME_HOST_DIR:-dmod_db_volume}:/var/lib/postgresql/data + #- ${DMOD_GUI_POSTGRES_DATA_VOLUME_HOST_DIR:?}:/var/lib/postgresql/data secrets: - postgres_password environment: @@ -98,6 +114,10 @@ networks: secrets: postgres_password: file: ../secrets/postgres_password.txt + object_store_exec_user_passwd: + file: ${DMOD_OBJECT_STORE_EXEC_USER_PASSWD_SECRET_FILE:?} + object_store_exec_user_name: + file: ${DMOD_OBJECT_STORE_EXEC_USER_NAME_SECRET_FILE:?} # Define persistent volumes that may be shared and persisted between containers volumes: dmod_db_volume: diff --git a/docker/nwm_gui/web_server/nginx/default.conf b/docker/nwm_gui/web_server/nginx/default.conf index 33b1f88ce..7d71e51c1 100644 --- a/docker/nwm_gui/web_server/nginx/default.conf +++ b/docker/nwm_gui/web_server/nginx/default.conf @@ -4,6 +4,7 @@ upstream wresgui { server { listen 80; + client_max_body_size 0; # Restrict verbs to GET, HEAD, and POST if ($request_method !~ ^(GET|HEAD|POST)$ ) diff --git a/docker/py-sources/py-deps.Dockerfile b/docker/py-sources/py-deps.Dockerfile index 9a0ff75fe..da6d7e35b 100644 --- a/docker/py-sources/py-deps.Dockerfile +++ b/docker/py-sources/py-deps.Dockerfile @@ -1,4 +1,4 @@ -ARG REQUIRE="gcc g++ musl-dev gdal-dev libffi-dev openssl-dev rust cargo git proj proj-dev proj-util openblas openblas-dev lapack lapack-dev" +ARG REQUIRE="gcc g++ musl-dev gdal-dev libffi-dev openssl-dev rust cargo git proj proj-dev proj-util openblas openblas-dev lapack lapack-dev geos-dev" ################################################################################################################ ################################################################################################################ ##### Create foundational level build stage with initial structure diff --git a/docker/py-sources/py-sources.Dockerfile b/docker/py-sources/py-sources.Dockerfile index 9e015ad6e..3d190d092 100644 --- a/docker/py-sources/py-sources.Dockerfile +++ b/docker/py-sources/py-sources.Dockerfile @@ -1,5 +1,5 @@ ARG docker_internal_registry -FROM ${docker_internal_registry}/dmod-py-deps as basis +FROM ${docker_internal_registry}/dmod-py-deps:latest as basis # Copy these needed for sourced functions used by build scripts in later stages RUN mkdir -p /dmod/scripts/shared COPY ./scripts/dist_package.sh /dmod/scripts diff --git a/example.env b/example.env index 637e9e5ce..bafbc8e63 100644 --- a/example.env +++ b/example.env @@ -108,6 +108,11 @@ TROUTE_BRANCH=ngen ## Python Packages Settings ## ######################################################################## +## The "name" of the built client Python distribution package, for purposes of installing (e.g., via pip) +PYTHON_PACKAGE_DIST_NAME_CLIENT=dmod-client +## The name of the actual Python communication package (i.e., for importing or specifying as a module on the command line) +PYTHON_PACKAGE_NAME_CLIENT=dmod.client + ## The "name" of the built communication Python distribution package, for purposes of installing (e.g., via pip) PYTHON_PACKAGE_DIST_NAME_COMMS=dmod-communication ## The name of the actual Python communication package (i.e., for importing or specifying as a module on the command line) diff --git a/python/gui/MaaS/cbv/AbstractDatasetView.py b/python/gui/MaaS/cbv/AbstractDatasetView.py new file mode 100644 index 000000000..5917d5035 --- /dev/null +++ b/python/gui/MaaS/cbv/AbstractDatasetView.py @@ -0,0 +1,47 @@ +from abc import ABC +from django.views.generic.base import View +from dmod.client.request_clients import DatasetExternalClient +import logging +logger = logging.getLogger("gui_log") +from .DMODProxy import DMODMixin, GUI_STATIC_SSL_DIR +from typing import Dict, Optional +from pathlib import Path +from django.conf import settings +import minio + +MINIO_HOST_STRING = settings.MINIO_HOST_STRING +MINIO_ACCESS = Path(settings.MINIO_ACCESS_FILE).read_text().strip() +MINIO_SECRET = Path(settings.MINIO_SECRET_FILE).read_text().strip() +MINIO_SECURE_CONNECT = settings.MINIO_SECURE_CONNECT + + +class AbstractDatasetView(View, DMODMixin, ABC): + + @classmethod + def factory_minio_client(cls, endpoint: Optional[str] = None, access: Optional[str] = None, + secret: Optional[str] = None, is_secure: Optional[bool] = False) -> minio.Minio: + client = minio.Minio(endpoint=MINIO_HOST_STRING if endpoint is None else endpoint, + access_key=MINIO_ACCESS if access is None else access, + secret_key=MINIO_SECRET if secret is None else secret, + secure=MINIO_SECURE_CONNECT if is_secure is None else is_secure) + + return client + + def __init__(self, *args, **kwargs): + super(AbstractDatasetView, self).__init__(*args, **kwargs) + self._dataset_client = None + + async def get_dataset(self, dataset_name: str) -> Dict[str, dict]: + serial_dataset = await self.dataset_client.get_serialized_datasets(dataset_name=dataset_name) + return serial_dataset + + async def get_datasets(self) -> Dict[str, dict]: + serial_datasets = await self.dataset_client.get_serialized_datasets() + return serial_datasets + + @property + def dataset_client(self) -> DatasetExternalClient: + if self._dataset_client is None: + self._dataset_client = DatasetExternalClient(endpoint_uri=self.maas_endpoint_uri, + ssl_directory=GUI_STATIC_SSL_DIR) + return self._dataset_client diff --git a/python/gui/MaaS/cbv/DMODProxy.py b/python/gui/MaaS/cbv/DMODProxy.py index fc0fcb7a1..99bb725dd 100644 --- a/python/gui/MaaS/cbv/DMODProxy.py +++ b/python/gui/MaaS/cbv/DMODProxy.py @@ -7,6 +7,9 @@ from django.http import HttpRequest, HttpResponse from django.views.generic.base import View from django.shortcuts import render +from django.conf import settings + +DEFAULT_MAAS_URI = settings.DEFAULT_MAAS_ENDPOINT_URI import logging logger = logging.getLogger("gui_log") @@ -16,6 +19,8 @@ from pathlib import Path from typing import List, Optional, Tuple, Type +GUI_STATIC_SSL_DIR = Path(settings.GUI_SSL_DIR) + class RequestFormProcessor(ABC): @@ -209,7 +214,7 @@ class PostFormRequestClient(ModelExecRequestClient): def _bootstrap_ssl_dir(cls, ssl_dir: Optional[Path] = None): if ssl_dir is None: ssl_dir = Path(__file__).resolve().parent.parent.parent.joinpath('ssl') - ssl_dir = Path('/usr/maas_portal/ssl') #Fixme + ssl_dir = GUI_STATIC_SSL_DIR #Fixme return ssl_dir def __init__(self, endpoint_uri: str, http_request: HttpRequest, ssl_dir: Optional[Path] = None): @@ -289,8 +294,7 @@ class DMODMixin: @property def maas_endpoint_uri(self): if not hasattr(self, '_maas_endpoint_uri') or self._maas_endpoint_uri is None: - self._maas_endpoint_uri = 'wss://' + os.environ.get('MAAS_ENDPOINT_HOST') + ':' - self._maas_endpoint_uri += os.environ.get('MAAS_ENDPOINT_PORT') + self._maas_endpoint_uri = DEFAULT_MAAS_URI return self._maas_endpoint_uri def forward_request(self, request: HttpRequest, event_type: MessageEventType) -> Tuple[ @@ -315,6 +319,7 @@ def forward_request(self, request: HttpRequest, event_type: MessageEventType) -> client = PostFormRequestClient(endpoint_uri=self.maas_endpoint_uri, http_request=request) if event_type == MessageEventType.MODEL_EXEC_REQUEST: form_processor_type = ModelExecRequestFormProcessor + # TODO: need a new type of form processor here (or 3 more, for management, uploading, and downloading) else: raise RuntimeError("{} got unsupported event type: {}".format(self.__class__.__name__, str(event_type))) diff --git a/python/gui/MaaS/cbv/DatasetApiView.py b/python/gui/MaaS/cbv/DatasetApiView.py new file mode 100644 index 000000000..45b689b6d --- /dev/null +++ b/python/gui/MaaS/cbv/DatasetApiView.py @@ -0,0 +1,163 @@ +import asyncio +import zipfile + +from django.http import JsonResponse +from .AbstractDatasetView import AbstractDatasetView +from pathlib import Path +from django.conf import settings +from typing import Optional, Set +import logging + +logger = logging.getLogger("gui_log") + +CACHE_DIR: Path = Path(settings.DATA_CACHE_DIR) +DOWNLOADS_DIR: Path = Path(settings.DATA_DOWNLOADS_DIR) +UPLOADS_DIR: Path = Path(settings.DATA_UPLOADS_DIR) + + +class DatasetApiView(AbstractDatasetView): + + @classmethod + def _cleanup_dir(cls, dir_path: Path) -> bool: + """ + Cleanup contents and remove a given directory, returning whether this was done or nothing exists at the path. + + Parameters + ---------- + dir_path : Path + Path to an expected directory. + + Returns + ------- + bool + ``True`` if nothing exists at this path, either because a directory was deleted or because nothing was + there; or ``False`` if there is an existing non-directory file at this path. + """ + # TODO: implement and then use in caching method and after zip file is created + if not dir_path.exists(): + return True + elif not dir_path.is_dir(): + return False + else: + results = True + for p in dir_path.glob('*'): + if p.is_dir(): + results = results and cls._cleanup_dir(p) + else: + p.unlink() + dir_path.rmdir() + return results + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _cache_dataset_downloads(self, dataset_name: str, files: Optional[Set[str]] = None) -> Path: + """ + Cache contents (files) of the dataset to files in the local downloads cache. + + Parameters + ---------- + dataset_name : str + The name of the dataset of interest. + files : Optional[Set[str]] + An optional subset of the files in the dataset to be cached locally, with the default of ``None`` implying + all files within the dataset. + + Returns + ---------- + Path + The cache directory path containing the downloaded dataset data. + """ + #returned_json = asyncio.get_event_loop().run_until_complete(self.get_dataset(dataset_name=dataset_name)) + #dataset_json = returned_json[dataset_name] + # TODO: maybe check to make sure dataset exists? + local_copy_dir = CACHE_DIR.joinpath(dataset_name) + if local_copy_dir.is_dir(): + self._cleanup_dir(local_copy_dir) + elif local_copy_dir.exists(): + local_copy_dir.unlink() + local_copy_dir.mkdir(parents=True) + # TODO: later devise something better for dealing with prefixes for emulated directory structure + #for minio_object in self.minio_client.list_objects(dataset_name): + logger.info("Retrieving a list of dataset files for {}".format(dataset_name)) + minio_client = self.factory_minio_client() + file_list = [obj.object_name for obj in minio_client.list_objects(dataset_name)] + logger.info("Downloading {} dataset files to GUI app server".format(len(file_list))) + for filename in file_list: + minio_client.fget_object(bucket_name=dataset_name, object_name=filename, + file_path=str(local_copy_dir.joinpath(filename))) + logger.info("Dataset {} locally cached".format(dataset_name)) + return local_copy_dir + + def _get_dataset_content_details(self, dataset_name: str): + result = asyncio.get_event_loop().run_until_complete(self.dataset_client.get_dataset_content_details(name=dataset_name)) + logger.info(result) + return JsonResponse({"contents": result}, status=200) + + def _delete_dataset(self, dataset_name: str) -> JsonResponse: + result = asyncio.get_event_loop().run_until_complete(self.dataset_client.delete_dataset(name=dataset_name)) + return JsonResponse({"successful": result}, status=200) + + def _get_dataset_download(self, request, *args, **kwargs): + dataset_name = request.GET.get("dataset_name", None) + local_dir = self._cache_dataset_downloads(dataset_name).resolve(strict=True) + logger.info("Caching data to {}".format(local_dir)) + zip_path = DOWNLOADS_DIR.joinpath('{}.zip'.format(dataset_name)) + if not DOWNLOADS_DIR.is_dir(): + DOWNLOADS_DIR.mkdir(parents=True) + logger.info("Creating zip file for dataset contents at {}".format(zip_path)) + with zipfile.ZipFile(zip_path, mode='w', compression=zipfile.ZIP_STORED) as zip_file: + for file in local_dir.glob('*'): + logger.info("Writing {} to zip file {}".format(file, zip_path)) + zip_file.write(file, file.relative_to(local_dir.parent)) + + logger.info("Dataset zip file {} fully created".format(zip_path)) + self._cleanup_dir(local_dir) + + # TODO: make sure downloading actually works + + #response = HttpResponse(zip_path.open(), mimetype='application/zip') + #return response + # TODO: later, figure out something to clean up these zip files + return JsonResponse({"zip_file": str(zip_path.relative_to(DOWNLOADS_DIR))}, status=200) + + def _get_datasets_json(self) -> JsonResponse: + serial_dataset_map = asyncio.get_event_loop().run_until_complete(self.get_datasets()) + return JsonResponse({"datasets": serial_dataset_map}, status=200) + + def _get_dataset_json(self, dataset_name: str) -> JsonResponse: + serial_dataset = asyncio.get_event_loop().run_until_complete(self.get_dataset(dataset_name=dataset_name)) + return JsonResponse({"dataset": serial_dataset[dataset_name]}, status=200) + + def _get_download(self, request, *args, **kwargs): + dataset_name = request.GET.get("dataset_name", None) + item_name = request.GET.get("item_name", None) + local_dir = self._cache_dataset_downloads(dataset_name=dataset_name, files={item_name}).resolve(strict=True) + download_subdir = DOWNLOADS_DIR.joinpath(dataset_name) + if not DOWNLOADS_DIR.is_dir(): + DOWNLOADS_DIR.mkdir(parents=True) + elif download_subdir.is_dir(): + self._cleanup_dir(download_subdir) + elif download_subdir.exists(): + download_subdir.unlink() + local_dir.rename(download_subdir) + + return JsonResponse({"dataset": dataset_name}, status=200) + + def get(self, request, *args, **kwargs): + request_type = request.GET.get("request_type", None) + if request_type == 'download_dataset': + return self._get_dataset_download(request) + if request_type == 'download_file': + return self._get_download(request) + elif request_type == 'datasets': + return self._get_datasets_json() + elif request_type == 'dataset': + return self._get_dataset_json(dataset_name=request.GET.get("name", None)) + elif request_type == 'contents': + return self._get_dataset_content_details(dataset_name=request.GET.get("name", None)) + if request_type == 'delete': + return self._delete_dataset(dataset_name=request.GET.get("name", None)) + + # TODO: finish + return JsonResponse({}, status=400) diff --git a/python/gui/MaaS/cbv/DatasetFileWebsocketFilelike.py b/python/gui/MaaS/cbv/DatasetFileWebsocketFilelike.py new file mode 100644 index 000000000..9e69409ad --- /dev/null +++ b/python/gui/MaaS/cbv/DatasetFileWebsocketFilelike.py @@ -0,0 +1,20 @@ +import asyncio +from typing import AnyStr +from dmod.client.request_clients import DatasetExternalClient + + +class DatasetFileWebsocketFilelike: + + def __init__(self, client: DatasetExternalClient, dataset_name: str, file_name: str): + self._client = client + self._dataset_name = dataset_name + self._file_name = file_name + self._read_index: int = 0 + + def read(self, blksize: int) -> AnyStr: + + result = asyncio.get_event_loop().run_until_complete( + self._client.download_item_block(dataset_name=self._dataset_name, item_name=self._file_name, + blk_start=self._read_index, blk_size=blksize)) + self._read_index += blksize + return result diff --git a/python/gui/MaaS/cbv/DatasetManagementForms.py b/python/gui/MaaS/cbv/DatasetManagementForms.py new file mode 100644 index 000000000..7566d0bee --- /dev/null +++ b/python/gui/MaaS/cbv/DatasetManagementForms.py @@ -0,0 +1,242 @@ +from django import forms +from enum import Enum +from functools import partial + +from dmod.core.meta_data import DataCategory, DataFormat +from django.conf import settings + +from .js_utils import start_end_time_validation + +# typing imports +from typing import Optional + +# form field type alias +# correspond to `dmod.core.meta_data.StandardDatasetIndex`` +def _time(start_time_id: str, end_time_id: str): + return partial( + forms.DateTimeField, + widget=forms.DateTimeInput( + attrs={ + "type": "datetime-local", + "onchange": start_end_time_validation(start_time_id, end_time_id), + } + ), + # TODO: this should be removed once we upgrade django versions >= 3.1 (tracked by #209) + input_formats=[settings.DATE_TIME_FORMAT], + ) + + +_Unknown = forms.CharField +_CatchmentId = forms.CharField +_DataId = forms.CharField +_HydrofabricId = forms.CharField +_Length = forms.IntegerField +_GlobalChecksum = forms.CharField +_ElementId = forms.CharField +_Files = partial( + forms.FileField, + widget=forms.ClearableFileInput( + attrs={ + 'multiple': True, + # filename cannot contain underscore (_) + "oninput": """((el) => { + const files = el.files; + + for (let {name} of files){ + // filenames cannot include _'s. + //if (name.includes('_')){ + + // see constraint validation API for more detail (https://developer.mozilla.org/en-US/docs/Web/API/Constraint_validation) + // el.setCustomValidity('Filename cannot contain underscores \"_\"'); + // return; + //} + + // valid input + el.setCustomValidity(''); + } + + })(this)""" + } + ), +) + + +class FormNameMixIn: + def form_name(self) -> str: + """returns class name of form""" + return type(self).__name__ + + +class DynamicFormMixIn: + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + for visible in self.visible_fields(): + # input field have id's of form: `id_{{field instance var name}}_{{form name}} + visible.field.widget.attrs["id"] = f"{visible.auto_id}_{self.form_name()}" + visible.field.widget.attrs["class"] = self.form_name() + visible.field.widget.attrs["style"] = "display: none;" + visible.field.widget.attrs["disabled"] = "true" + + +class DatasetForm(FormNameMixIn, forms.Form): + name = forms.CharField(max_length=100, label="Dataset Name") + category = forms.ChoiceField( + choices=[(f.name, f.name.title()) for f in DataCategory], + label="Dataset Category", + ) + data_format = forms.ChoiceField( + choices=[("---", "---")] + [(f.name, f.name) for f in DataFormat], + label="Data Format", + widget=forms.Select( + attrs={ + # when selection changes, unhide and enable the form fields and labels for the + # corresponding DataFormat. form fields and labels have an html class name of their + # DataFormat. i.e. + "onchange": """((name) => { + // remove previously active fields, if any + const active_fields = document.querySelectorAll('.active_field') + active_fields.forEach(el => { + + // disable field, hide it, and remove flag class, 'active_field' + el.setAttribute('disabled', true) + el.style.display = 'none' + el.classList.remove('active_field') + }) + + const els_with_class = document.querySelectorAll(`.${name}`) + els_with_class.forEach(el => { + + // enable field, hide it, and remove flag class, 'active_field' + el.removeAttribute('disabled') + el.style.display = 'block' + el.classList.add('active_field') + }) + })(this.value)""" + } + ), + ) + + +class AORC_CSV(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time("id_start_time_AORC_CSV", "id_end_time_AORC_CSV")( + label="Start Datetime" + ) + end_time = _time("id_start_time_AORC_CSV", "id_end_time_AORC_CSV")( + label="End Datetime" + ) + # TODO: note if end times are inclusive. + # TODO: note that all datetimes are naive UTC time. + # help_text="", + # ) + files = _Files() + + +class NETCDF_FORCING_CANONICAL(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time( + "id_start_time_NETCDF_FORCING_CANONICAL", "id_end_time_NETCDF_FORCING_CANONICAL" + )(label="Start Datetime") + end_time = _time( + "id_start_time_NETCDF_FORCING_CANONICAL", "id_end_time_NETCDF_FORCING_CANONICAL" + )(label="End Datetime") + files = _Files() + + +class NETCDF_AORC_DEFAULT(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time( + "id_start_time_NETCDF_AORC_DEFAULT", "id_end_time_NETCDF_AORC_DEFAULT" + )(label="Start Datetime") + end_time = _time( + "id_start_time_NETCDF_AORC_DEFAULT", "id_end_time_NETCDF_AORC_DEFAULT" + )(label="End Datetime") + files = _Files() + + +class NGEN_OUTPUT(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time("id_start_time_NGEN_OUTPUT", "id_end_time_NGEN_OUTPUT")( + label="Start Datetime" + ) + end_time = _time("id_start_time_NGEN_OUTPUT", "id_end_time_NGEN_OUTPUT")( + label="End Datetime" + ) + data_id = _DataId() + files = _Files() + + +class NGEN_REALIZATION_CONFIG(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time( + "id_start_time_NGEN_REALIZATION_CONFIG", "id_end_time_NGEN_REALIZATION_CONFIG" + )(label="Start Datetime") + end_time = _time( + "id_start_time_NGEN_REALIZATION_CONFIG", "id_end_time_NGEN_REALIZATION_CONFIG" + )(label="End Datetime") + data_id = _DataId() + files = _Files() + + +class NGEN_GEOJSON_HYDROFABRIC(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + hydrofabric_id = _HydrofabricId() + data_id = _DataId() + files = _Files() + + +class NGEN_PARTITION_CONFIG(DynamicFormMixIn, FormNameMixIn, forms.Form): + data_id = _DataId() + hydrofabric_id = _HydrofabricId + length = _Length() + files = _Files() + + +class BMI_CONFIG(DynamicFormMixIn, FormNameMixIn, forms.Form): + global_checksum = _GlobalChecksum() + data_id = _DataId() + files = _Files() + + +class NWM_OUTPUT(DynamicFormMixIn, FormNameMixIn, forms.Form): + catchment_id = _CatchmentId() + start_time = _time("id_start_time_NWM_OUTPUT", "id_end_time_NWM_OUTPUT")( + label="Start Datetime" + ) + end_time = _time("id_start_time_NWM_OUTPUT", "id_end_time_NWM_OUTPUT")( + label="End Datetime" + ) + data_id = _DataId() + files = _Files() + + +class NWM_CONFIG(DynamicFormMixIn, FormNameMixIn, forms.Form): + element_id = _ElementId() + start_time = _time("id_start_time_NWM_CONFIG", "id_end_time_NWM_CONFIG")( + label="Start Datetime" + ) + end_time = _time("id_start_time_NWM_CONFIG", "id_end_time_NWM_CONFIG")( + label="End Datetime" + ) + data_id = _DataId() + files = _Files() + + +class DatasetFormatForm(Enum): + AORC_CSV = AORC_CSV + NETCDF_FORCING_CANONICAL = NETCDF_FORCING_CANONICAL + NETCDF_AORC_DEFAULT = NETCDF_AORC_DEFAULT + NGEN_OUTPUT = NGEN_OUTPUT + NGEN_REALIZATION_CONFIG = NGEN_REALIZATION_CONFIG + NGEN_GEOJSON_HYDROFABRIC = NGEN_GEOJSON_HYDROFABRIC + NGEN_PARTITION_CONFIG = NGEN_PARTITION_CONFIG + BMI_CONFIG = BMI_CONFIG + NWM_OUTPUT = NWM_OUTPUT + NWM_CONFIG = NWM_CONFIG + + @staticmethod + def get_form_from_name(name: str) -> Optional[forms.Form]: + try: + return DatasetFormatForm[name].value + except KeyError: + return None diff --git a/python/gui/MaaS/cbv/DatasetManagementView.py b/python/gui/MaaS/cbv/DatasetManagementView.py new file mode 100644 index 000000000..5087ceb36 --- /dev/null +++ b/python/gui/MaaS/cbv/DatasetManagementView.py @@ -0,0 +1,192 @@ +""" +Defines a view that may be used to configure a MaaS request +""" +import asyncio +import os.path + +from django.http import HttpRequest, HttpResponse +from django.shortcuts import render +from django.core.files.uploadedfile import UploadedFile, InMemoryUploadedFile, TemporaryUploadedFile +from django.conf import settings +from datetime import datetime + +import dmod.communication as communication +from dmod.core.meta_data import DataCategory, DataDomain, DataFormat, Serializable + +import logging +logger = logging.getLogger("gui_log") + +from .utils import extract_log_data +from .AbstractDatasetView import AbstractDatasetView +from .DatasetManagementForms import DatasetForm, DatasetFormatForm +from typing import List + +DT_FORMAT = settings.DATE_TIME_FORMAT + + +class DatasetManagementView(AbstractDatasetView): + + """ + A view used to configure a dataset management request or requests for transmitting dataset data. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _process_event_type(self, http_request: HttpRequest) -> communication.MessageEventType: + """ + Determine and return whether this request is for a ``DATASET_MANAGEMENT`` or ``DATA_TRANSMISSION`` event. + + Parameters + ---------- + http_request : HttpRequest + The raw HTTP request in question. + + Returns + ------- + communication.MessageEventType + Either ``communication.MessageEventType.DATASET_MANAGEMENT`` or + ``communication.MessageEventType.DATA_TRANSMISSION``. + """ + # TODO: + raise NotImplementedError("{}._process_event_type not implemented".format(self.__class__.__name__)) + + def _create_dataset(self, name: str, category: str, data_format: str, *args, **kwargs) -> bool: + d_format = DataFormat.get_for_name(data_format) + if format is None: + return False + else: + try: + domain = DataDomain.factory_init_from_restriction_collections(d_format, **kwargs) + except Exception as e: + msg = 'Failed to create dataset {}: {} creating domain ({})'.format(name, e.__class__.__name__, str(e)) + logger.error(msg) + raise RuntimeError(msg) + return asyncio.get_event_loop().run_until_complete( + self.dataset_client.create_dataset(name=name, category=DataCategory.get_for_name(category), domain=domain)) + + def _upload_files_to_dataset(self, dataset_name: str, files: List[UploadedFile]) -> bool: + # TODO: (later) consider modifying files to account for DMOD-specific characteristics (e.g., file paths for + # inside worker containers) + minio_client = self.factory_minio_client() + result = True + for f in files: + if isinstance(f, TemporaryUploadedFile): + length = os.path.getsize(f.file.name) + else: + length = f.file.getbuffer().nbytes + result_obj = minio_client.put_object(bucket_name=dataset_name, object_name=f.name, data=f.file, + length=length) + # TODO: (later) try to do something based on result_obj.last_modified + result = result and result_obj.bucket_name == dataset_name and result_obj.object_name == f.name + return result + + def get(self, http_request: HttpRequest, *args, **kwargs) -> HttpResponse: + """ + The handler for 'get' requests. + + This will render the 'maas/dataset_management.html' template after retrieving necessary information to initially + populate the forms it displays. + + Parameters + ---------- + http_request : HttpRequest + The request asking to render this page. + args + kwargs + + Returns + ------- + A rendered page. + """ + errors, warnings, info = extract_log_data(kwargs) + + # Gather map of serialized datasets, keyed by dataset name + serial_dataset_map = asyncio.get_event_loop().run_until_complete(self.get_datasets()) + serial_dataset_list = [serial_dataset_map[d] for d in serial_dataset_map] + + dataset_categories = [c.name.title() for c in DataCategory] + dataset_formats = [f.name for f in DataFormat] + + form = DatasetForm() + + payload = { + 'form': form, + 'dynamic_forms': [f.value() for f in DatasetFormatForm], + 'datasets': serial_dataset_list, + 'dataset_categories': dataset_categories, + 'dataset_formats': dataset_formats, + 'errors': errors, + 'info': info, + 'warnings': warnings + } + + return render(http_request, 'maas/dataset_management.html', payload) + + def post(self, http_request: HttpRequest, *args, **kwargs) -> HttpResponse: + """ + The handler for 'post' requests. + + This will attempt to submit the request and rerender the page like a 'get' request. + + Parameters + ---------- + http_request : HttpRequest + The request asking to render this page. + args + kwargs + + Returns + ------- + A rendered page. + """ + # Should get a list of file-type objects, with a ``name`` property and a ``file`` BytesIO property + files = http_request.FILES.getlist('files') + + csrf_token_key = 'csrfmiddlewaretoken' + + # name (dataset name), category, data_format, and any other applicable dynamic form items + # e.g., catchment_id, hydrofabric_id, data_id, etc. + dataset_details = dict([(k, v) for k, v in http_request.POST.items() if k != csrf_token_key]) + dataset_name = dataset_details['name'] + + # TODO: consider reading files to validate/replace domain details from form + + # If present, parse catchment ids string to list of individual ids + if 'catchment_id' in dataset_details: + dataset_details['catchment_id'] = [s.strip() for s in dataset_details.pop('catchment_id').split(',')] + + # Fix keys for start and end times + if 'start_time' in dataset_details and 'end_time' in dataset_details: + start = datetime.strptime(dataset_details.pop('start_time'), settings.DATE_TIME_FORMAT) + end = datetime.strptime(dataset_details.pop('end_time'), settings.DATE_TIME_FORMAT) + dataset_details['time'] = {'start': start.strftime(Serializable.get_datetime_str_format()), + 'end': end.strftime(Serializable.get_datetime_str_format())} + elif 'start_time' in dataset_details or 'end_time' in dataset_details: + # TODO: figure out best way to handle this; for now ... + raise RuntimeError('Cannot create a dataset of this format unless both a start and end time are given') + + was_created = self._create_dataset(**dataset_details) + + if not was_created: + err_msg = 'Could not created dataset {}'.format(dataset_name) + logger.error(err_msg) + http_response = self.get(http_request=http_request, errors=[err_msg], *args, **kwargs) + elif files is None or len(files) == 0: + info_msg = 'Created empty dataset {}'.format(dataset_name) + logger.info(info_msg) + http_response = self.get(http_request=http_request, info_msg=[info_msg], *args, **kwargs) + # With this condition test (if we get here), put files in dataset + elif not self._upload_files_to_dataset(dataset_name=dataset_name, files=files): + err_msg = 'Could not upload requested files to dataset {}'.format(dataset_name) + logger.error(err_msg) + http_response = self.get(http_request=http_request, errors=[err_msg], *args, **kwargs) + else: + info_msg = 'Created dataset {} with {} files uploaded'.format(dataset_name, len(files)) + logger.info(info_msg) + http_response = self.get(http_request=http_request, info_msg=[info_msg], *args, **kwargs) + + #for k, v in session_data.items(): + # http_response.set_cookie(k, v) + + return http_response diff --git a/python/gui/MaaS/cbv/MapView.py b/python/gui/MaaS/cbv/MapView.py index 398c95ea9..f96d6c85d 100644 --- a/python/gui/MaaS/cbv/MapView.py +++ b/python/gui/MaaS/cbv/MapView.py @@ -12,18 +12,22 @@ from django.shortcuts import render from django.conf import settings from rest_framework.views import APIView -PROJECT_ROOT = settings.BASE_DIR +#PROJECT_ROOT = settings.BASE_DIR +HYDROFABRICS_DIR = settings.HYDROFABRIC_ROOT +SUBSET_SERVICE_URL = settings.SUBSET_SERVICE_URL import json from pathlib import Path from .. import datapane from .. import configuration +import requests import logging logger = logging.getLogger("gui_log") _resolution_regex = re.compile("(.+) \((.+)\)") -def _build_fabric_path(fabric, type): + +def _build_fabric_path(fabric, fabric_type): """ build a qualified path from the hydrofabric name and type """ @@ -36,28 +40,46 @@ def _build_fabric_path(fabric, type): name = fabric resolution='' - path = Path(PROJECT_ROOT, 'static', 'ngen', 'hydrofabric', name, resolution, type+'_data.geojson') + path = Path(HYDROFABRICS_DIR, name, resolution, fabric_type + '_data.geojson') + #path = Path(HYDROFABRICS_DIR, name, fabric_type + '_data.geojson') return path + class Fabrics(APIView): + + def _get_geojson_in_bounds(self, fabric_name: str, feature_type:str, min_x: float, min_y: float, max_x: float, + max_y: float) -> dict: + url_path = '{}/subset/bounds'.format(SUBSET_SERVICE_URL) + request_data = {'fabric_name': fabric_name, 'feature_type': feature_type, 'min_x': min_x, 'min_y': min_y, + 'max_x': max_x, 'max_y': max_y} + subset_response = requests.post(url=url_path, data=request_data) + return subset_response.json() + def get(self, request: HttpRequest, fabric: str = None) -> typing.Optional[JsonResponse]: if fabric is None: fabric = 'example' - type = request.GET.get('fabric_type', 'catchment') - if not type: - type="catchment" + + fabric_type = request.GET.get('fabric_type', 'catchment') + min_x = request.GET.get('min_x', None) + min_y = request.GET.get('min_y', None) + max_x = request.GET.get('max_x', None) + max_y = request.GET.get('max_y', None) + + if not fabric_type: + fabric_type = "catchment" - path = _build_fabric_path(fabric, type) + path = _build_fabric_path(fabric, fabric_type) if path is None: - return None - - with open(path) as fp: - data = json.load(fp) - return JsonResponse(data) + return JsonResponse(self._get_geojson_in_bounds(fabric_name=fabric, feature_type=fabric_type, min_x=min_x, + min_y=min_y, max_x=max_x, max_y=max_y)) + else: + with open(path) as fp: + data = json.load(fp) + return JsonResponse(data) class FabricNames(APIView): - _fabric_dir = Path(PROJECT_ROOT, 'static', 'ngen', 'hydrofabric') + _fabric_dir = Path(HYDROFABRICS_DIR) def get(self, request: HttpRequest) -> JsonResponse: names = [] @@ -93,6 +115,9 @@ def get(self, request: HttpRequest) -> JsonResponse: class MapView(View): + # TODO: update view/template to only do things for low enough zoom levels. + # TODO: update view/template to get features inside bounding box + """ A view used to render the map """ diff --git a/python/gui/MaaS/cbv/crosswalk.py b/python/gui/MaaS/cbv/crosswalk.py index 272cffbf5..51acf2da9 100644 --- a/python/gui/MaaS/cbv/crosswalk.py +++ b/python/gui/MaaS/cbv/crosswalk.py @@ -15,6 +15,7 @@ import json import re from pathlib import Path +HYDROFABRICS_DIR = settings.HYDROFABRIC_ROOT from .. import datapane from .. import configuration @@ -39,6 +40,10 @@ def _build_fabric_path(fabric, type=""): logger.debug("fabric path:", fabric, name, resolution) path = Path(PROJECT_ROOT, 'static', 'ngen', 'hydrofabric', name, resolution, type+'crosswalk.json') + + #path = Path(HYDROFABRICS_DIR, name, resolution, type+'crosswalk.json') + #path = Path(HYDROFABRICS_DIR, name, 'crosswalk.json') + if (path == None): return JsonResponse({}) return path @@ -46,11 +51,11 @@ def _build_fabric_path(fabric, type=""): class Crosswalk(APIView): def get(self, request: HttpRequest, crosswalk: str = None) -> typing.Optional[JsonResponse]: - logger.debug("crosswalk path:", crosswalk) + #logger.debug("crosswalk path:", crosswalk) if crosswalk is None: return JsonResponse({}) - logger.debug("crosswalk path:", crosswalk) + #logger.debug("crosswalk path:", crosswalk) path = _build_fabric_path(crosswalk) if path is None: diff --git a/python/gui/MaaS/cbv/execution.py b/python/gui/MaaS/cbv/execution.py index 49293e0dd..2968e789a 100644 --- a/python/gui/MaaS/cbv/execution.py +++ b/python/gui/MaaS/cbv/execution.py @@ -10,7 +10,7 @@ from django.conf import settings -from django.http import HttpRequest +from django.http import HttpRequest, QueryDict from django.http import JsonResponse from rest_framework.views import APIView @@ -19,6 +19,8 @@ from .. import processors from ..client import JobRequestClient +from datetime import datetime +import re LOGGER = logging.getLogger("gui_log") @@ -64,6 +66,77 @@ class Execute(APIView): """ API view for executing a configured model on a specified framework """ + + def _parse_post_keys(self, data: dict, feature_key: str, formulation_key: str) -> dict: + applicable = dict() + + applicable['formulation-type'] = formulation_key + applicable['forcing-pattern'] = data['{}-forcing-pattern'.format(feature_key)] + + properties_to_retype = dict() + + #property_key_pattern = re.compile(r'(' + feature_key + '):::([^:].+[^:]):::([^:].+[^:])(::([^:].+))?') + property_key_pattern = re.compile(r'(' + feature_key + '):::(.+?):::(.+)') + meta_property_subpattern = re.compile(r'(.+):::(.+)') + + for k, value in data.items(): + if value == '': + continue + + match_obj = property_key_pattern.match(k) + if match_obj is None: + continue + + matched_feature = match_obj.group(1) + matched_form = match_obj.group(2) + + # Skip if no match, or if either the matched feature or formulation is not of interest + if matched_feature != feature_key or matched_form != formulation_key: + continue + + prop_meta_match_obj = meta_property_subpattern.match(match_obj.group(3)) + if prop_meta_match_obj is None: + applicable[match_obj.group(3)] = value + elif prop_meta_match_obj.group(2) == 'config-type': + properties_to_retype[prop_meta_match_obj.group(1)] = value + + for prop_key, type_str in properties_to_retype.items(): + if type_str.lower() == 'text' or prop_key not in applicable: + continue + if type_str.lower() == 'number': + applicable[prop_key] = float(applicable[prop_key]) + elif type_str.lower() == 'numbers': + applicable[prop_key] = [float(s.strip()) for s in applicable[prop_key].split(',')] + elif type_str.lower() == 'list': + applicable[prop_key] = [s.strip() for s in applicable[prop_key].split(',')] + + return applicable + + def _parse_config_request(self, post_data: QueryDict) -> dict: + features = post_data['features'].split('|') + formulations_map = json.loads(post_data['formulations']) + + global_formulation_key = formulations_map[post_data['global-formulation-type']] + + # TODO: add other properties besides formulations configs (e.g., list of features) + config_properties = dict() + config_properties['features'] = features + config_properties['cpu_count'] = post_data['requested-cpu-count'] + config_properties['start'] = datetime.strptime(post_data['start-time'], settings.DATE_TIME_FORMAT) + config_properties['end'] = datetime.strptime(post_data['end-time'], settings.DATE_TIME_FORMAT) + feature_configs = dict() + feature_configs['global'] = self._parse_post_keys(data=post_data, feature_key='global', formulation_key=global_formulation_key) + + for feature in features: + formulation_type_key = post_data['{}-formulation-type'.format(feature)] + if formulation_type_key == 'global': + continue + formulation_type = formulations_map[formulation_type_key] + feature_configs[feature] = self._parse_post_keys(data=post_data, feature_key=feature, formulation_key=formulation_type) + + config_properties['formulations'] = feature_configs + return config_properties + def post(self, request: HttpRequest): """ The post handler @@ -100,7 +173,17 @@ def post(self, request: HttpRequest): # Allow the caller to determine whether or not a new session should be created force_new_session = request.POST.get("force_new_session", False) + framework_name = request.POST.get('framework', None) + + if framework_name == 'ngen': + parsed_config = self._parse_config_request(request.POST) + # TODO: implement to process config details from GUI, creating any necessary realization config datasets + required_datasets_names = self._process_ngen_configuration_into_datasets(parsed_config) + else: + raise RuntimeError('Unsupported framework {}'.format(None)) + # Issue the request + # TODO: modify the way client makes requires to be regular client response: ExternalRequestResponse = client.make_maas_request(request, force_new_session) # Throw an error if the request could not be successfully issued @@ -113,6 +196,7 @@ def post(self, request: HttpRequest): # Set a cookie if a job was started and we have the id (rely on client to manage multiple job ids) if response is not None and 'job_id' in response.data: + # TODO: make sure that the client displays this job id somehow http_response.set_cookie('new_job_id', response.data['job_id']) # Set cookies if a new session was acquired diff --git a/python/gui/MaaS/cbv/js_utils.py b/python/gui/MaaS/cbv/js_utils.py new file mode 100644 index 000000000..27ece4f7b --- /dev/null +++ b/python/gui/MaaS/cbv/js_utils.py @@ -0,0 +1,36 @@ +def start_end_time_validation(start_time_id: str, end_time_id: str) -> str: + """Applies validity testing to start and end time input DOM elements of type`datetime-local`. If + start time is after end time or end time is prior to start, an input validity message is tagged + on the `start_time_id` element. + """ + return f"""((start_time_id, end_time_id) => {{ + + let start_time_el = document.getElementById(start_time_id); + let end_time_el = document.getElementById(end_time_id); + + if (start_time_el == null){{ + console.error(`invalid start_time_id: ${{start_time_id}}`) + return; + }} + + if (end_time_el == null){{ + console.error(`invalid end_time_id: ${{end_time_id}}`) + return; + }} + + if (start_time_el.value === '' || end_time_el.value === ''){{ + // missing time value + return; + }} + + const start_time = new Date(start_time_el.value); + const end_time = new Date(end_time_el.value); + + if (start_time.getTime() > end_time.getTime()){{ + start_time_el.setCustomValidity('Start time after end time'); + return; + }} + + // reset + start_time_el.setCustomValidity(''); + }})('{start_time_id}', '{end_time_id}')""" diff --git a/python/gui/MaaS/migrations/0001_initial.py b/python/gui/MaaS/migrations/0001_initial.py index a78db615f..2d20a8a93 100644 --- a/python/gui/MaaS/migrations/0001_initial.py +++ b/python/gui/MaaS/migrations/0001_initial.py @@ -10,9 +10,9 @@ class Migration(migrations.Migration): def create_superuser(apps, schema_editor): from django.contrib.auth.models import User - SU_NAME = os.environ.get('DMOD_SU_NAME') - SU_EMAIL = os.environ.get('DMOD_SU_EMAIL') - SU_PASSWORD = os.environ.get('DMOD_SU_PASSWORD') + SU_NAME = os.environ.get('DMOD_SU_NAME').strip() + SU_EMAIL = os.environ.get('DMOD_SU_EMAIL').strip() + SU_PASSWORD = os.environ.get('DMOD_SU_PASSWORD').strip() superuser = User.objects.create_superuser( username=SU_NAME, diff --git a/python/gui/MaaS/migrations/0002_formulation_formulationparameter.py b/python/gui/MaaS/migrations/0002_formulation_formulationparameter.py index d9fe516b6..b795729a1 100644 --- a/python/gui/MaaS/migrations/0002_formulation_formulationparameter.py +++ b/python/gui/MaaS/migrations/0002_formulation_formulationparameter.py @@ -28,6 +28,7 @@ class Migration(migrations.Migration): ('name', models.CharField(help_text='The name of the parameter for the formulation', max_length=50)), ('description', models.CharField(blank=True, help_text='How this parameter affects the formulation', max_length=200, null=True)), ('value_type', models.CharField(choices=[('number', 'Number'), ('text', 'Text'), ('date', 'Date'), ('datetime-local', 'Date and Time')], help_text='The type of the ', max_length=50)), + ('config_type', models.CharField(choices=[('number', 'Number'), ('text', 'Text'), ('date', 'Date'), ('datetime-local', 'Date and Time'), ('numbers', 'Numbers'), ('list', 'List'), ('dataset', 'Dataset')], help_text='The type in realization configurations of the ', max_length=50)), ('group', models.CharField(blank=True, help_text='A sub-group to which this parameter belongs', max_length=50, null=True)), ('is_list', models.BooleanField(default=False, help_text='Whether or not this variable should be a list')), ('minimum', models.FloatField(blank=True, help_text='The minimum possible numerical value for this parameter', null=True)), diff --git a/python/gui/MaaS/migrations/0003_formulation_records.py b/python/gui/MaaS/migrations/0003_formulation_records.py new file mode 100644 index 000000000..bcf936907 --- /dev/null +++ b/python/gui/MaaS/migrations/0003_formulation_records.py @@ -0,0 +1,82 @@ +from django.db import migrations + + +def create_premade_formulations(apps, schema_editor): + + Formulation = apps.get_model('MaaS', 'Formulation') + FormulationParameter = apps.get_model('MaaS', 'FormulationParameter') + + raw_form_details = [ + ('CFE', 'External BMI module implementation of CFE.'), + ('Multi::Noah_OWP::CFE', 'Combination of external Noah OWP Modular and CFE BMI modules.'), + ('Multi::Noah_OWP::PET::CFE', 'Combination of external Noah OWP Modular, PET and CFE BMI modules.') + ] + formulations = dict([(n, Formulation.objects.create(name=n, description=d)) for n, d in raw_form_details]) + + opt_param_desc = 'Optional value to use for {} module {} parameter' + + FormulationParameter.objects.bulk_create([ + #FormulationParameter(name='surface_partitioning_scheme', description='Scheme for surface runoff partitioning', value_type='text', default_value='Schaake', formulation=formulations['CFE']), + + # CFE params + FormulationParameter(name='BMI Config Dataset', group='CFE', description='Name of dataset containing required BMI initialization files', value_type='text', config_type='dataset', formulation=formulations['CFE']), + FormulationParameter(name='BMI Init File Pattern', group='CFE', description='The name or pattern for BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_config.ini', formulation=formulations['CFE']), + FormulationParameter(name='CFE::satdk', group='CFE', description=opt_param_desc.format('CFE', 'satdk'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::maxsmc', group='CFE', description=opt_param_desc.format('CFE', 'maxsmc'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::slope', group='CFE', description=opt_param_desc.format('CFE', 'slope'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::b', group='CFE', description=opt_param_desc.format('CFE', 'b'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::multiplier', group='CFE', description=opt_param_desc.format('CFE', 'multiplier'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::Klf', group='CFE', description=opt_param_desc.format('CFE', 'Klf'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::Kn', group='CFE', description=opt_param_desc.format('CFE', 'Kn'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::Cgw', group='CFE', description=opt_param_desc.format('CFE', 'Cgw'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::expon', group='CFE', description=opt_param_desc.format('CFE', 'expon'), value_type='number', config_type='number', formulation=formulations['CFE']), + FormulationParameter(name='CFE::max_gw_storage', group='CFE', description=opt_param_desc.format('CFE', 'max_gw_storage'), value_type='number', config_type='number', formulation=formulations['CFE']), + + # Multi::Noah_OWP::CFE params + FormulationParameter(name='Noah_OWP::BMI Config Dataset', group='Noah_OWP', description='Name of dataset containing required BMI initialization files for Noah OWP', value_type='text', config_type='dataset', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='Noah_OWP::BMI Init File Pattern', group='Noah_OWP', description='The name or pattern for Noah OWP BMI initialization files', value_type='text', config_type='text', default_value='noah-owp-modular-init-{{id}}.namelist.input', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::BMI Config Dataset', group='CFE', description='Name of dataset containing required BMI initialization files for CFE', value_type='text', config_type='dataset', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::BMI Init File Pattern', group='CFE', description='The name or pattern for CFE BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_config.ini', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::satdk', group='CFE', description=opt_param_desc.format('CFE', 'satdk'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::maxsmc', group='CFE', description=opt_param_desc.format('CFE', 'maxsmc'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::slope', group='CFE', description=opt_param_desc.format('CFE', 'slope'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::b', group='CFE', description=opt_param_desc.format('CFE', 'b'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::multiplier', group='CFE', description=opt_param_desc.format('CFE', 'multiplier'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::Klf', group='CFE', description=opt_param_desc.format('CFE', 'Klf'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::Kn', group='CFE', description=opt_param_desc.format('CFE', 'Kn'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::Cgw', group='CFE', description=opt_param_desc.format('CFE', 'Cgw'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::expon', group='CFE', description=opt_param_desc.format('CFE', 'expon'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + FormulationParameter(name='CFE::max_gw_storage', group='CFE', description=opt_param_desc.format('CFE', 'max_gw_storage'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::CFE']), + + # Multi::Noah_OWP::PET::CFE params + FormulationParameter(name='Noah_OWP::BMI Config Dataset', group='Noah_OWP', description='Name of dataset containing required BMI initialization files for Noah OWP', value_type='text', config_type='dataset', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='Noah_OWP::BMI Init File Pattern', group='Noah_OWP', description='The name or pattern for Noah OWP BMI initialization files', value_type='text', config_type='text', default_value='noah-owp-modular-init-{{id}}.namelist.input', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='PET::BMI Config Dataset', group='PET', description='Name of dataset containing required BMI initialization files for PET', value_type='text', config_type='dataset', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='PET::BMI Init File Pattern', group='PET', description='The name or pattern for PET BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_bmi_config.ini', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::BMI Config Dataset', group='CFE', description='Name of dataset containing required BMI initialization files for CFE', value_type='text', config_type='dataset', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::BMI Init File Pattern', group='CFE', description='The name or pattern for CFE BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_bmi_config.ini', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::satdk', group='CFE', description=opt_param_desc.format('CFE', 'satdk'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::maxsmc', group='CFE', description=opt_param_desc.format('CFE', 'maxsmc'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::slope', group='CFE', description=opt_param_desc.format('CFE', 'slope'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::b', group='CFE', description=opt_param_desc.format('CFE', 'b'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::multiplier', group='CFE', description=opt_param_desc.format('CFE', 'multiplier'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::Klf', group='CFE', description=opt_param_desc.format('CFE', 'Klf'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::Kn', group='CFE', description=opt_param_desc.format('CFE', 'Kn'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::Cgw', group='CFE', description=opt_param_desc.format('CFE', 'Cgw'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::expon', group='CFE', description=opt_param_desc.format('CFE', 'expon'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']), + FormulationParameter(name='CFE::max_gw_storage', group='CFE', description=opt_param_desc.format('CFE', 'max_gw_storage'), value_type='number', config_type='number', formulation=formulations['Multi::Noah_OWP::PET::CFE']) + + ]) + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('MaaS', '0002_formulation_formulationparameter'), + ] + + operations = [ + migrations.RunPython(create_premade_formulations) + ] diff --git a/python/gui/MaaS/migrations/0004_formulation_records_2.py b/python/gui/MaaS/migrations/0004_formulation_records_2.py new file mode 100644 index 000000000..753bb9a4e --- /dev/null +++ b/python/gui/MaaS/migrations/0004_formulation_records_2.py @@ -0,0 +1,48 @@ +from django.db import migrations + + +def create_premade_formulations(apps, schema_editor): + + Formulation = apps.get_model('MaaS', 'Formulation') + FormulationParameter = apps.get_model('MaaS', 'FormulationParameter') + + raw_form_details = [ + ('Multi::PET::CFE', 'Combination of external PET and CFE BMI modules.') + ] + formulations = dict([(n, Formulation.objects.create(name=n, description=d)) for n, d in raw_form_details]) + + opt_param_desc = 'Optional value to use for {} module {} parameter' + + FormulationParameter.objects.bulk_create([ + #FormulationParameter(name='surface_partitioning_scheme', description='Scheme for surface runoff partitioning', value_type='text', default_value='Schaake', formulation=formulations['CFE']), + + # Multi::PET::CFE params + FormulationParameter(name='PET::BMI Config Dataset', group='PET', description='Name of dataset containing required BMI initialization files for PET', value_type='text', config_type='dataset', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='PET::BMI Init File Pattern', group='PET', description='The name or pattern for PET BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_bmi_config.ini', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::BMI Config Dataset', group='CFE', description='Name of dataset containing required BMI initialization files for CFE', value_type='text', config_type='dataset', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::BMI Init File Pattern', group='CFE', description='The name or pattern for CFE BMI initialization files', value_type='text', config_type='text', default_value='{{id}}_bmi_config.ini', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::satdk', group='CFE', description=opt_param_desc.format('CFE', 'satdk'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::maxsmc', group='CFE', description=opt_param_desc.format('CFE', 'maxsmc'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::slope', group='CFE', description=opt_param_desc.format('CFE', 'slope'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::b', group='CFE', description=opt_param_desc.format('CFE', 'b'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::multiplier', group='CFE', description=opt_param_desc.format('CFE', 'multiplier'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::Klf', group='CFE', description=opt_param_desc.format('CFE', 'Klf'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::Kn', group='CFE', description=opt_param_desc.format('CFE', 'Kn'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::Cgw', group='CFE', description=opt_param_desc.format('CFE', 'Cgw'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::expon', group='CFE', description=opt_param_desc.format('CFE', 'expon'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']), + FormulationParameter(name='CFE::max_gw_storage', group='CFE', description=opt_param_desc.format('CFE', 'max_gw_storage'), value_type='number', config_type='number', formulation=formulations['Multi::PET::CFE']) + + ]) + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('MaaS', '0003_formulation_records'), + ] + + operations = [ + migrations.RunPython(create_premade_formulations) + ] diff --git a/python/gui/MaaS/static/common/css/base.css b/python/gui/MaaS/static/common/css/base.css index f8e2fff1f..b4311fd95 100644 --- a/python/gui/MaaS/static/common/css/base.css +++ b/python/gui/MaaS/static/common/css/base.css @@ -4,7 +4,7 @@ background-size: 100px 100px; background-position-x: 10px; height: fit-content; - background-color: #33ade0; + background-color: #0a5190; } #base-banner h1 { @@ -38,7 +38,7 @@ input[type="checkbox"] + label { .MaaS-btn { margin: 10px 0px; color: white; - background-color: #E06633; + background-color: #900f0f; border: none; font-weight: bold; height: 40px; @@ -47,7 +47,7 @@ input[type="checkbox"] + label { } .MaaS-btn:hover { - background-color: #e7875f; + background-color: #c92e2e; } fieldset { @@ -58,7 +58,7 @@ fieldset { legend { padding: 3px; color: white; - background-color: #33ade0; + background-color: #0a5190; font-weight: bold; border: none; width: fit-content; @@ -67,12 +67,12 @@ legend { details summary { font-weight: bold; padding: 10px; - background-color: #33ade0; + background-color: #0a5190; color: white; border: none; cursor: pointer; } summary:hover, summary.summary-hover { - background-color: #E06633; + background-color: #900f0f; } diff --git a/python/gui/MaaS/static/common/js/map.js b/python/gui/MaaS/static/common/js/map.js index 1bb3e0508..0bc1f1eab 100644 --- a/python/gui/MaaS/static/common/js/map.js +++ b/python/gui/MaaS/static/common/js/map.js @@ -51,6 +51,10 @@ startup_scripts.push( function(){ mymap = L.map('mapid').setView(centerLine, zoom); + //mymap.on('zoomend', function() { + // loadFabric('zoomend'); + //}); + L.tileLayer(mapUrl, { maxZoom: maxZoom, attribution: attribution, @@ -152,6 +156,11 @@ function plotMapLayers(featureDocuments, map) { function propertiesToHTML(geojson, xwalk) { var properties = geojson.properties; var markup = ""; + + if (!("id" in geojson) && "id" in properties) { + geojson.id = properties.id; + } + if ("Name" in properties) { markup += "

" + properties.Name + "

"; } @@ -180,7 +189,8 @@ function propertiesToHTML(geojson, xwalk) { var propertyIsNotName = property.toLowerCase() != "name"; var propertyIsNotBlank = properties[property] != null && properties[property] != ""; var propertyIsNotAnObject = typeof properties[property] != 'object'; - if (propertyIsNotName && propertyIsNotBlank && propertyIsNotAnObject) { + var propertyIsNotId = property.toLowerCase() != "id"; + if (propertyIsNotName && propertyIsNotBlank && propertyIsNotAnObject && propertyIsNotId) { propertyKeys.push(property); } } @@ -413,7 +423,25 @@ function loadFabricTypes() { ); } +function isZoomCloseEnoughForFabricLookup() { + return mymap.getZoom() > 10; +} + + function loadFabric(event) { + // TODO: test this out, both to see if it works and to see if it is the right amount + /* + if (!isZoomCloseEnoughForFabricLookup()) { + return + } + + var bounding_box = mymap.getBounds(); + var min_x = bounding_box.getWest(); + var min_y = bounding_box.getSouth(); + var max_x = bounding_box.getEast(); + var max_y = bounding_box.getNorth(); + */ + var name = $("#fabric-selector").val(); var type = $("#fabric-type-selector").val(); @@ -451,6 +479,28 @@ function loadFabric(event) { mymap.fitBounds(activeLayer.getBounds()); } + /* + if (activeLayer) { + Object.values(selectedLayers).forEach(layer => removeFeature(layer.feature.id)); + activeLayer.remove(); + } + var url = "fabric/" + name; + $.ajax( + { + url: url, + type: "GET", + data: {"fabric_name": name, "feature_type": type, "min_x": min_x, "min_y": min_y, "max_x": max_x, "max_y": max_y}, + error: function(xhr, status, error) { + console.error(error); + }, + success: function(result, status, xhr) { + if (result) { + addDocument(result); + } + } + } + ) + */ var name_type = name+"|"+type; if (name && type && (name_type != activeLayerName || activeLayer == null)) { activeLayerName = name_type; diff --git a/python/gui/MaaS/static/maas/css/configuration/ngen.css b/python/gui/MaaS/static/maas/css/configuration/ngen.css index 61650bc4f..c8e01c318 100644 --- a/python/gui/MaaS/static/maas/css/configuration/ngen.css +++ b/python/gui/MaaS/static/maas/css/configuration/ngen.css @@ -17,7 +17,7 @@ } .summary-selected { - background-color: #E06633 !important; + background-color: #900f0f !important; color: white; } diff --git a/python/gui/MaaS/static/maas/css/configuration/nwm.css b/python/gui/MaaS/static/maas/css/configuration/nwm.css index 61650bc4f..c8e01c318 100644 --- a/python/gui/MaaS/static/maas/css/configuration/nwm.css +++ b/python/gui/MaaS/static/maas/css/configuration/nwm.css @@ -17,7 +17,7 @@ } .summary-selected { - background-color: #E06633 !important; + background-color: #900f0f !important; color: white; } diff --git a/python/gui/MaaS/static/maas/js/components/confirmDialog.js b/python/gui/MaaS/static/maas/js/components/confirmDialog.js new file mode 100644 index 000000000..a09710327 --- /dev/null +++ b/python/gui/MaaS/static/maas/js/components/confirmDialog.js @@ -0,0 +1,108 @@ +class ConfirmDialog { + constructor(parentDivName, id, styleClass, onConfirmFunc) { + this.parentDivName = parentDivName; + this.id = id; + this.styleClass = styleClass; + this.onConfirmFunc = onConfirmFunc; + + this.outer_div = null; + this.content_div = null; + } + + get parentDiv() { + return document.getElementById(this.parentDivName); + } +} + +class ConfirmDeleteDatasetDialog extends ConfirmDialog { + constructor(dataset_name, parentDivName, id, styleClass, onConfirmFunc) { + super(parentDivName, id, styleClass, onConfirmFunc); + this.dataset_name = dataset_name; + this.buttons_div = null; + } + + _style_outer_div() { + this.outer_div.style.position = 'fixed'; + this.outer_div.style.zIndex = '1'; + this.outer_div.style.left = '35%'; + this.outer_div.style.top = '5%'; + this.outer_div.style.width = '25%'; + this.outer_div.style.height = '25%'; + this.outer_div.style.overflow = 'clip'; + this.outer_div.style.backgroundColor = '#B7B5B5FF'; + this.outer_div.style.border = '1px solid #888'; + this.outer_div.style.padding = '15px'; + //this.outer_div.style.paddingTop = '0px'; + this.outer_div.style.margin = '15% auto'; + } + + _init_outer_div() { + if (this.outer_div == null) { + this.outer_div = document.createElement('div'); + this.outer_div.id = this.id; + this.outer_div.class = this.styleClass; + this._style_outer_div(); + this.parentDiv.appendChild(this.outer_div); + } + } + + _init_content() { + if (this.content_div == null) { + this.content_div = document.createElement('div'); + this.content_div.style.height = '70%'; + //this.content_div.style.overflow = 'fixed'; + this.content_div.style.padding = '10px'; + this.content_div.appendChild(document.createTextNode("This will permanently delete dataset: ")); + this.content_div.appendChild(document.createElement('br')); + this.content_div.appendChild(document.createElement('br')); + this.content_div.appendChild(document.createTextNode(this.dataset_name)); + this.content_div.appendChild(document.createElement('br')); + this.content_div.appendChild(document.createElement('br')); + this.content_div.appendChild(document.createTextNode("Proceed?")); + + if (this.outer_div == null) { + this._init_outer_div(); + } + this.outer_div.appendChild(this.content_div); + } + } + + _init_buttons() { + if (this.buttons_div == null) { + this.buttons_div = document.createElement('div'); + this.outer_div.appendChild(this.buttons_div); + this.buttons_div.style.padding = '10px'; + + let cancel_button = document.createElement('button'); + cancel_button.onclick = () => { + this.remove(); + }; + cancel_button.textContent = "Cancel"; + cancel_button.style.marginRight = '10px'; + this.buttons_div.appendChild(cancel_button); + + let confirm_button = document.createElement('button'); + confirm_button.onclick = this.onConfirmFunc; + confirm_button.textContent = "Confirm"; + this.buttons_div.appendChild(confirm_button); + } + } + + append() { + this._init_outer_div(); + this._init_content(); + this._init_buttons(); + } + + remove() { + this.buttons_div.remove(); + this.buttons_div = null; + + this.content_div.remove(); + this.content_div = null; + + this.outer_div.remove(); + this.outer_div = null; + } + +} \ No newline at end of file diff --git a/python/gui/MaaS/static/maas/js/components/createDatasetForm.js b/python/gui/MaaS/static/maas/js/components/createDatasetForm.js new file mode 100644 index 000000000..ce9fffb7d --- /dev/null +++ b/python/gui/MaaS/static/maas/js/components/createDatasetForm.js @@ -0,0 +1,43 @@ +class CreateDatasetForm { + constructor(parentDivId) { + this.parentDivId = parentDivId; + this.formElementId = this.parentDivId + "-form"; + this.formContentDivId = this.formElementId + "-div-universal-inputs"; + this.dynamicVarsDivId = this.formElementId + "-div-dynamic-inputs"; + + } + + updateFormatChange(selection) { + let dy_div = document.getElementById(this.dynamicVarsDivId); + while (dy_div.firstChild){ + dy_div.removeChild(dy_div) + } + + let addUploadSelection = false; + if (selection == "NETCDF_FORCING_CANONICAL") { + addUploadSelection = true; + } + + if (addUploadSelection) { + let upload_select_label = document.createElement('label'); + let selectId = this.parentDivId + '-inputs-upload'; + upload_select_label.appendChild(document.createTextNode('Data Files:')); + upload_select_label.htmlFor = selectId + dy_div.appendChild(upload_select_label); + + let upload_select = document.createElement('input'); + upload_select.type = 'file'; + upload_select.name = 'create-dataset-upload'; + upload_select.id = selectId + upload_select.style.float = 'right'; + upload_select.style.textAlign = 'right'; + dy_div.appendChild(upload_select); + } + } + + dynamicInputUpdate(formInput, selection) { + if (formInput.id == this.parentDivId + '-form-input-format') { + this.updateFormatChange(selection); + } + } +} \ No newline at end of file diff --git a/python/gui/MaaS/static/maas/js/components/datasetOverview.js b/python/gui/MaaS/static/maas/js/components/datasetOverview.js new file mode 100644 index 000000000..2e156c946 --- /dev/null +++ b/python/gui/MaaS/static/maas/js/components/datasetOverview.js @@ -0,0 +1,167 @@ +class DatasetOverviewTableRow { + constructor(parentTableId, serializedDataset, detailsOnClickFunc, filesOnClickFunc, downloadOnClickFunc, + uploadOnClickFunc, deleteOnClickFunc) { + this.parentTableId = parentTableId; + this.serializedDataset = serializedDataset; + + this.rowClassName = "mgr-tbl-content"; + + this.detailsOnClickFunc = detailsOnClickFunc; + this.filesOnClickFunc = filesOnClickFunc; + this.downloadOnClickFunc = downloadOnClickFunc; + this.uploadOnClickFunc = uploadOnClickFunc; + this.deleteOnClickFunc = deleteOnClickFunc; + + this.row = document.getElementById(this.rowId); + } + + get datasetName() { + return this.serializedDataset["name"]; + } + + get category() { + return this.serializedDataset["data_category"]; + } + + get rowId() { + return this.parentTableId + "-row-" + this.datasetName; + } + + get parentTable() { + return document.getElementById(this.parentTableId); + } + + _createLinks(is_anchor, text, onClickFunc) { + let cell = document.createElement('th'); + let content; + if (is_anchor) { + content = document.createElement('a'); + content.href = "javascript:void(0);"; + } + else { + content = document.createElement('button'); + } + + const ds_name = this.datasetName; + + let onclick; + switch (text) { + case 'Details': + onclick = this.detailsOnClickFunc; + break; + case 'Files': + onclick = this.filesOnClickFunc; + break; + case 'Download': + onclick = this.downloadOnClickFunc; + break; + case 'Upload Files': + onclick = this.uploadOnClickFunc; + break; + case 'Delete': + onclick = this.deleteOnClickFunc; + break; + } + + content.onclick = function() { onclick(ds_name); }; + content.appendChild(document.createTextNode(text)); + cell.appendChild(content); + this.row.appendChild(cell); + } + + build() { + if (this.row != null) { + this.row.remove(); + } + this.row = document.createElement('tr'); + this.row.id = this.rowId; + this.row.className = this.rowClassName; + + let colCell = document.createElement('th'); + colCell.appendChild(document.createTextNode(this.datasetName)); + this.row.appendChild(colCell); + + colCell = document.createElement('th'); + colCell.appendChild(document.createTextNode(this.category)); + this.row.appendChild(colCell); + + this._createLinks(true, "Details", this.datasetName, this.detailsOnClickFunc); + this._createLinks(true, "Files", this.datasetName, this.filesOnClickFunc); + this._createLinks(true, "Download", this.datasetName, this.downloadOnClickFunc); + // TODO: put this back in later + //this._createLinks(true, "Upload Files", this.datasetName, this.uploadOnClickFunc); + this._createLinks(true, "Delete", this.datasetName, this.deleteOnClickFunc); + } +} + +class DatasetOverviewTable { + constructor(parentDivId, tableClass, detailsOnClickFunc, filesOnClickFunc, downloadOnClickFunc, + uploadOnClickFunc, deleteOnClickFunc) { + this.parentDivId = parentDivId; + this.tableClass = tableClass; + this.tableId = this.parentDivId + "-overview-table"; + + this.detailsOnClickFunc = detailsOnClickFunc; + this.filesOnClickFunc = filesOnClickFunc; + this.downloadOnClickFunc = downloadOnClickFunc; + this.uploadOnClickFunc = uploadOnClickFunc; + this.deleteOnClickFunc = deleteOnClickFunc; + + this.table = document.getElementById(this.tableId); + } + + get parentDiv() { + return document.getElementById(this.parentDivId); + } + + get tableHeader() { + let thead = document.createElement('thead'); + let header = document.createElement('tr'); + thead.appendChild(header); + + let colCell = document.createElement('th'); + colCell.className = "mgr-tbl-dataset-header"; + colCell.appendChild(document.createTextNode('Dataset Name')); + header.appendChild(colCell); + + colCell = document.createElement('th'); + colCell.className = "mgr-tbl-category-header"; + colCell.appendChild(document.createTextNode('Category')); + header.appendChild(colCell); + + header.appendChild(document.createElement('th')); + + colCell = document.createElement('th'); + colCell.appendChild(document.createTextNode('Actions')); + header.appendChild(colCell); + + header.appendChild(document.createElement('th')); + header.appendChild(document.createElement('th')); + + return thead; + } + + buildAndAddRow(serializedDataset) { + let row = new DatasetOverviewTableRow(this.tableId, serializedDataset, this.detailsOnClickFunc, + this.filesOnClickFunc, this.downloadOnClickFunc, this.uploadOnClickFunc, this.deleteOnClickFunc); + row.build(); + this.table.appendChild(row.row); + } + + buildTable(contentResponse) { + if (this.table != null) { + this.table.remove(); + } + this.table = document.createElement('table'); + this.table.id = this.tableId; + this.table.className = this.tableClass; + + this.table.appendChild(this.tableHeader); + + for (const ds_name in contentResponse["datasets"]) { + this.buildAndAddRow(contentResponse["datasets"][ds_name]); + } + + this.parentDiv.appendChild(this.table); + } +} \ No newline at end of file diff --git a/python/gui/MaaS/templates/maas/configuration/ngen.html b/python/gui/MaaS/templates/maas/configuration/ngen.html index 53b1140eb..148d94f7d 100644 --- a/python/gui/MaaS/templates/maas/configuration/ngen.html +++ b/python/gui/MaaS/templates/maas/configuration/ngen.html @@ -34,6 +34,11 @@ +
+ Resources + + +
@@ -89,8 +97,10 @@
+
+ + + + + + + + + + +
+ + + {% if errors %} +
+
    + {% for error in errors %} +
  • {{ error }}
  • + {% endfor %} +
+
+ {% endif %} + + {% if warnings %} +
+
    + {% for warning in warnings %} +
  • {{ warning }}
  • + {% endfor %} +
+
+ {% endif %} + + {% if info %} +
+
    + {% for message in info %} +
  • {{ message }}
  • + {% endfor %} +
+
+ {% endif %} + + {# Cache jQuery scripts for UI scripting and styling #} +
+

Dataset Management

+
+ Manage + Create +
+
+
+
+

Create New Dataset:

+
+ {# Add the token to provide cross site request forgery protection #} + {% csrf_token %} + {{ form }} + + {% comment %} + Below are dynamic forms, meaning depending on the above form's input, + one of the below forms is shown. To accomplish this, each of the below forms are + wrapped in an element with a unique class. The class name is used _by_ the above + form's fields using event listeners to toggle on the correct form. When a form is + "toggled on", meaning its wrapper is displayed, a flag class is added to it's + wrapper class list. + {% endcomment %} + {% for format_form in dynamic_forms %} + {% for field in format_form %} + {% comment %} + input field have id's of form: `id_{{field instance var name}}_{{form name}}. + a little song and dance is required to get an input tag's label `for` property + to align properly. + {% endcomment %} + + {{ field }} + {% endfor %} + {% endfor %} + + + + + + + + + + + + + +
+ + + + +
+
+
+ + diff --git a/python/gui/MaaS/urls.py b/python/gui/MaaS/urls.py index a6496fada..0e844b5c3 100644 --- a/python/gui/MaaS/urls.py +++ b/python/gui/MaaS/urls.py @@ -1,15 +1,25 @@ from django.urls import re_path from .cbv.EditView import EditView +from .cbv.DatasetManagementView import DatasetManagementView +from .cbv.DatasetApiView import DatasetApiView from .cbv.MapView import MapView, Fabrics, FabricNames, FabricTypes, ConnectedFeatures from .cbv.configuration import CreateConfiguration from .cbv.execution import Execute from .cbv.crosswalk import Crosswalk +from .views import download_dataset, download_dataset_file + app_name = 'MaaS' urlpatterns = [ re_path(r'^$', EditView.as_view()), + # TODO: add this later + #re_path(r'ngen$', NgenWorkflowView.as_view(), name="ngen-workflow"), + re_path(r'datasets', DatasetManagementView.as_view(), name="dataset-management"), + re_path(r'dataset-api', DatasetApiView.as_view(), name="dataset-api"), + re_path(r'download_dataset/(?P.*)$', download_dataset, name='download_dataset'), + re_path(r'download_dataset_file/(?P.*)/(?P.*)$', download_dataset_file, name='download_dataset_file'), re_path(r'map$', MapView.as_view(), name="map"), re_path(r'map/connections$', ConnectedFeatures.as_view(), name="connections"), re_path(r'fabric/names$', FabricNames.as_view(), name='fabric-names'), diff --git a/python/gui/MaaS/views.py b/python/gui/MaaS/views.py index 5234f69ad..d365137d8 100644 --- a/python/gui/MaaS/views.py +++ b/python/gui/MaaS/views.py @@ -1,5 +1,24 @@ -from django.http import HttpRequest -from django.shortcuts import render +import os +from django.conf import settings +from django.http import HttpResponse, Http404 -# Create your views here. +def download_dataset(request, path): + file_path = os.path.join(settings.DATA_DOWNLOADS_DIR, path) + if os.path.exists(file_path): + with open(file_path, 'rb') as fh: + response = HttpResponse(fh.read(), content_type="application/zip") + response['Content-Disposition'] = 'inline; filename=' + os.path.basename(file_path) + return response + raise Http404 + + +def download_dataset_file(request, dataset_name, file_name): + file_path = os.path.join(os.path.join(settings.DATA_DOWNLOADS_DIR, dataset_name), file_name) + if os.path.exists(file_path): + with open(file_path, 'rb') as fh: + response = HttpResponse(fh.read(), content_type="application/octet-stream") + response['Content-Disposition'] = 'inline; filename=' + os.path.basename(file_path) + return response + raise Http404 + diff --git a/python/gui/dependencies.txt b/python/gui/dependencies.txt index 5a8bba36e..c2ca2e833 100644 --- a/python/gui/dependencies.txt +++ b/python/gui/dependencies.txt @@ -17,3 +17,5 @@ channels channels-redis djangorestframework psycopg2-binary # TODO: get source package in future. Note that psycopg2 cannot be used on Mac; psycopg2-binary must be used +numpy +minio \ No newline at end of file diff --git a/python/gui/maas_experiment/settings.py b/python/gui/maas_experiment/settings.py index 154465798..72bf9af32 100644 --- a/python/gui/maas_experiment/settings.py +++ b/python/gui/maas_experiment/settings.py @@ -26,7 +26,8 @@ ALLOWED_HOSTS = ['*'] # The default is false; if it's not true, it will leave a user logged in indefinitely -SESSION_EXPIRE_AT_BROWSER_CLOSE = True +# TODO: get browser sessions working correctly +#SESSION_EXPIRE_AT_BROWSER_CLOSE = True # This is the absolute age; navigating won't necessarily tell the system that anything is happening # and sessions will absolutely end after this time, regardless of what is going on. @@ -40,18 +41,25 @@ # “secure”, which means browsers may ensure that the cookie is only sent under an HTTPS connection. # Leaving this setting off isn’t a good idea because an attacker could capture an unencrypted session cookie with a # packet sniffer and use the cookie to hijack the user’s session. -SESSION_COOKIE_SECURE = not DEBUG +# TODO: get browser sessions (and cookies) working correctly +#SESSION_COOKIE_SECURE = not DEBUG # Whether to use a secure cookie for the CSRF cookie. If this is set to True, the cookie will be marked as “secure”, # which means browsers may ensure that the cookie is only sent with an HTTPS connection. -CSRF_COOKIE_SECURE = not DEBUG +# TODO: get CSRF security working correctly +#CSRF_COOKIE_SECURE = not DEBUG +CSRF_COOKIE_SECURE = False # Whether to store the CSRF token in the user’s session instead of in a cookie. # It requires the use of django.contrib.sessions. # # Storing the CSRF token in a cookie (Django’s default) is safe, but storing it in the session is common practice # in other web frameworks and therefore sometimes demanded by security auditors. -CSRF_USE_SESSIONS = not DEBUG +# TODO: get CSRF security working correctly +#CSRF_USE_SESSIONS = not DEBUG +CSRF_USE_SESSIONS = False +# A list of trusted origins for unsafe requests (e.g. POST). +CSRF_TRUSTED_ORIGINS = [url.strip() for url in os.environ.get('DMOD_GUI_CSRF_TRUSTED_ORIGINS', '').split(',') if url] # security.W019: Unless we start serving data in a frame, set to 'DENY' X_FRAME_OPTIONS = 'DENY' @@ -74,7 +82,8 @@ 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', + # Turning this off for now, until things get straightened out; it is breaking the dataset management create form + #'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', @@ -133,13 +142,158 @@ USE_TZ = True +DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M" # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/2.2/howto/static-files/ STATIC_URL = '/static/' STATIC_ROOT = os.path.join(BASE_DIR, "static/") +NGEN_STATIC_ROOT = os.path.join(STATIC_ROOT, "ngen/") +HYDROFABRIC_ROOT = os.path.join(NGEN_STATIC_ROOT, "hydrofabric/") +DATA_CACHE_DIR = os.path.join(STATIC_ROOT, "cache/") +DATA_DOWNLOADS_DIR = os.path.join(DATA_CACHE_DIR, "downloads/") +DATA_UPLOADS_DIR = os.path.join(DATA_CACHE_DIR, "uploads/") +SECRETS_ROOT = '/run/secrets/' + +SUBSET_SERVICE_URL = os.environ.get('GUI_SUBSET_SERVICE_API_URL') + +GUI_SSL_DIR = os.path.join(BASE_DIR, 'ssl') +DEFAULT_MAAS_ENDPOINT_URI = 'wss://' + os.environ.get('MAAS_ENDPOINT_HOST') + ':' + os.environ.get('MAAS_ENDPOINT_PORT') + +MINIO_HOSTNAME = os.environ.get("OBJECT_STORE_HOSTNAME") +MINIO_PORT = os.environ.get("OBJECT_STORE_PORT") +MINIO_HOST_STRING = "{}:{}".format(MINIO_HOSTNAME, MINIO_PORT) + +MINIO_ACCESS_DOCKER_SECRET_NAME = 'object_store_exec_user_name' +MINIO_SECRET_DOCKER_SECRET_NAME: str = 'object_store_exec_user_passwd' + +MINIO_ACCESS_FILE = os.path.join(SECRETS_ROOT, MINIO_ACCESS_DOCKER_SECRET_NAME) +MINIO_SECRET_FILE = os.path.join(SECRETS_ROOT, MINIO_SECRET_DOCKER_SECRET_NAME) + +# TODO adjust this to be configurable +MINIO_SECURE_CONNECT = False + +""" +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'filters': { + 'require_debug_false': { + '()': 'django.utils.log.RequireDebugFalse' + } + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler' + } + }, + 'loggers': { + 'gui_log': { + 'handlers': ['console', ], + 'level': 'DEBUG' + } + } +} +""" + +LOGGING = { + 'version': 1, + 'disable_existing_loggers': False, + 'filters': { + 'require_debug_false': { + '()': 'django.utils.log.RequireDebugFalse' + } + }, + 'formatters': { + 'console': { + # exact format is not important, this is the minimum information + 'format': '%(asctime)s %(name)-12s %(levelname)-8s %(message)s', + }, + }, + 'handlers': { + 'guilogFile': { + 'level': 'DEBUG', + 'class': 'logging.handlers.RotatingFileHandler', + 'filename': os.environ.get('APPLICATION_LOG_PATH', os.path.join(BASE_DIR, 'gui.log')), + 'maxBytes': 1024*1024*50, # 50MB + 'backupCount': 5, + 'formatter': 'console' + }, + 'stdout': { + 'level': 'DEBUG', + 'class': 'logging.StreamHandler', + 'formatter': 'console' + } + }, + 'loggers': { + 'gui_log': { + 'handlers': ['guilogFile', 'stdout'], + 'level': 'DEBUG' + } + } +} + + +CRITICAL_LEVEL = logging.CRITICAL +"""Logging level used to indicate critical messages""" + +ERROR_LEVEL = logging.ERROR +"""Logging level used to indicate errors""" + +WARNING_LEVEL = logging.WARNING +"""Logging level used to indicate warnings""" + +INFO_LEVEL = logging.INFO +"""Logging level used to indicate basic information""" + +DEBUG_LEVEL = logging.DEBUG +"""Logging level used to indicate messages useful for debugging""" + +UNKNOWN_LEVEL = logging.NOTSET +"""Logging level to use when a proper logging level is not found""" + +DEFAULT_MESSAGE_LEVEL = INFO_LEVEL +"""The logging level used when none is given""" + + +def log(message: str, level: int = DEFAULT_MESSAGE_LEVEL, logger_name: str = "gui_log"): + """ + Logs the given message at the given level in the given log. + + These are all available as module level variables + + :param str message: The message to log + :param str level: The logging level to write as. + :param str logger_name: The name of the logger to write to + """ + + # We want to make sure that we're using a valid level; if it isn't in our approved list, we use the level + # equivalent to "uh...I dunno..." since we just want to write + if level is None or level not in [CRITICAL_LEVEL, ERROR_LEVEL, WARNING_LEVEL, INFO_LEVEL, DEBUG_LEVEL]: + level = UNKNOWN_LEVEL + + # We want to log messages in the form of '[2019-06-04 12:27:14-0500] Something happened...', + # so we need to determine the time and append it to the message + timestamp = datetime.now(tz=reference.LocalTimezone()).strftime("%Y-%m-%d %I:%M:%S%z") + log_message = "[{}] {}".format(timestamp, message) + + # Log the newly formatted message at the given level + logging.getLogger(logger_name).log(level, log_message) + + +REQUIRED_ENVIRONMENT_VARIABLES = [ + { + "name": "MAAS_ENDPOINT_HOST", + "purpose": "The default host address for MaaS" + }, + { + "name": "MAAS_ENDPOINT_PORT", + "purpose": "The port for the default MaaS endpoint" + } +] def ensure_required_environment_variables(): missing_variables = [ diff --git a/python/lib/client/dmod/client/__main__.py b/python/lib/client/dmod/client/__main__.py index deb1c8542..4e4c701bb 100644 --- a/python/lib/client/dmod/client/__main__.py +++ b/python/lib/client/dmod/client/__main__.py @@ -54,6 +54,17 @@ def _handle_exec_command_args(parent_subparsers_container): # Nested parser for the 'ngen' action parser_ngen = workflow_subparsers.add_parser('ngen') + parser_ngen_cal = workflow_subparsers.add_parser('ngen_cal') + # parser_ngen_cal.add_argument('--allocation-paradigm', + # dest='allocation_paradigm', + # type=AllocationParadigm.get_from_name, + # choices=[val.name.lower() for val in AllocationParadigm], + # default=AllocationParadigm.get_default_selection(), + # help='Specify job resource allocation paradigm to use.') + parser_ngen_cal.add_argument('realization_cfg_data_id', help='Identifier of dataset of required realization config') + # parser_ngen_cal.add_argument('cpu_count', type=int, help='Provide the desired number of processes for the execution') + + parser_ngen.add_argument('--partition-config-data-id', dest='partition_cfg_data_id', default=None, help='Provide data_id for desired partition config dataset.') parser_ngen.add_argument('--allocation-paradigm', @@ -449,9 +460,13 @@ def execute_jobs_command(args, client: DmodClient): def execute_workflow_command(args, client: DmodClient): async_loop = get_or_create_eventloop() + # TODO: aaraney if args.workflow == 'ngen': result = async_loop.run_until_complete(client.submit_ngen_request(**(vars(args)))) print(result) + elif args.workflow == "ngen_cal": + result = async_loop.run_until_complete(client.submit_ngen_cal_request(realization_cfg_data_id=args.realization_cfg_data_id)) + print(result) else: print("ERROR: Unsupported execution workflow {}".format(args.workflow)) exit(1) diff --git a/python/lib/client/dmod/client/_version.py b/python/lib/client/dmod/client/_version.py index df9144c54..0404d8103 100644 --- a/python/lib/client/dmod/client/_version.py +++ b/python/lib/client/dmod/client/_version.py @@ -1 +1 @@ -__version__ = '0.1.1' +__version__ = '0.3.0' diff --git a/python/lib/client/dmod/client/dmod_client.py b/python/lib/client/dmod/client/dmod_client.py index 82ab83274..4d9ec7f3b 100644 --- a/python/lib/client/dmod/client/dmod_client.py +++ b/python/lib/client/dmod/client/dmod_client.py @@ -1,6 +1,6 @@ from dmod.core.execution import AllocationParadigm from dmod.core.meta_data import DataCategory, DataDomain, DataFormat, DiscreteRestriction -from .request_clients import DatasetClient, DatasetExternalClient, DatasetInternalClient, NgenRequestClient +from .request_clients import DatasetClient, DatasetExternalClient, DatasetInternalClient, NgenRequestClient, NgenCalRequestClient from .client_config import YamlClientConfig from datetime import datetime from pathlib import Path @@ -13,6 +13,7 @@ def __init__(self, client_config: YamlClientConfig, bypass_request_service: bool self._client_config = client_config self._dataset_client = None self._ngen_client = None + self._ngen_cal_client = None self._bypass_request_service = bypass_request_service @property @@ -91,18 +92,24 @@ def dataset_client(self) -> DatasetClient: if self._bypass_request_service: if self.client_config.dataservice_endpoint_uri is None: raise RuntimeError("Cannot bypass request service without data service config details") - self._dataset_client = DatasetInternalClient(self.client_config.dataservice_endpoint_uri, - self.client_config.dataservice_ssl_dir) + self._dataset_client = DatasetInternalClient(endpoint_uri=self.client_config.dataservice_endpoint_uri, + ssl_directory=self.client_config.dataservice_ssl_dir) else: - self._dataset_client = DatasetExternalClient(self.requests_endpoint_uri, self.requests_ssl_dir) + self._dataset_client = DatasetExternalClient(endpoint_uri=self.requests_endpoint_uri, ssl_directory=self.requests_ssl_dir) return self._dataset_client @property def ngen_request_client(self) -> NgenRequestClient: if self._ngen_client is None: - self._ngen_client = NgenRequestClient(self.requests_endpoint_uri, self.requests_ssl_dir) + self._ngen_client = NgenRequestClient(endpoint_uri=self.requests_endpoint_uri, ssl_directory=self.requests_ssl_dir) return self._ngen_client + @property + def ngen_cal_request_client(self) -> NgenCalRequestClient: + if self._ngen_cal_client is None: + self._ngen_cal_client = NgenCalRequestClient(self.requests_endpoint_uri, self.requests_ssl_dir) + return self._ngen_cal_client + async def delete_dataset(self, dataset_name: str, **kwargs): return await self.dataset_client.delete_dataset(dataset_name, **kwargs) @@ -237,6 +244,18 @@ async def submit_ngen_request(self, start: datetime, end: datetime, hydrofabric_ cpu_count, realization_cfg_data_id, bmi_cfg_data_id, partition_cfg_data_id, cat_ids, allocation_paradigm) + # async def submit_ngen_cal_request(self, start: datetime, end: datetime, hydrofabric_data_id: str, hydrofabric_uid: str, + # cpu_count: int, realization_cfg_data_id: str, bmi_cfg_data_id: str, + # partition_cfg_data_id: Optional[str] = None, cat_ids: Optional[List[str]] = None, + # allocation_paradigm: Optional[AllocationParadigm] = None, *args, **kwargs): + async def submit_ngen_cal_request(self, realization_cfg_data_id: str): + + return await self.ngen_cal_request_client.request_exec(realization_cfg_data_id=realization_cfg_data_id) + + # return await self.ngen_cal_request_client.request_exec(start, end, hydrofabric_data_id, hydrofabric_uid, + # cpu_count, realization_cfg_data_id, bmi_cfg_data_id, + # partition_cfg_data_id, cat_ids, allocation_paradigm) + def print_config(self): print(self.client_config.print_config()) diff --git a/python/lib/client/dmod/client/request_clients.py b/python/lib/client/dmod/client/request_clients.py index 09251c01b..b802beb7c 100644 --- a/python/lib/client/dmod/client/request_clients.py +++ b/python/lib/client/dmod/client/request_clients.py @@ -2,20 +2,22 @@ from datetime import datetime from dmod.core.execution import AllocationParadigm from dmod.communication import DataServiceClient, ExternalRequestClient, ManagementAction, ModelExecRequestClient, \ - NGENRequest, NGENRequestResponse + NGENRequest, NGENRequestResponse, \ + NgenCalibrationRequest, NgenCalibrationResponse from dmod.communication.client import R from dmod.communication.dataset_management_message import DatasetManagementMessage, DatasetManagementResponse, \ MaaSDatasetManagementMessage, MaaSDatasetManagementResponse, QueryType, DatasetQuery from dmod.communication.data_transmit_message import DataTransmitMessage, DataTransmitResponse from dmod.core.meta_data import DataCategory, DataDomain, TimeRange +from dmod.core.dataset import Dataset from pathlib import Path -from typing import List, Optional, Tuple, Type, Union +from typing import AnyStr, Dict, List, Optional, Tuple, Type, Union import json import websockets -#import logging -#logger = logging.getLogger("gui_log") +import logging +logger = logging.getLogger("client_log") class NgenRequestClient(ModelExecRequestClient[NGENRequest, NGENRequestResponse]): @@ -42,6 +44,67 @@ async def request_exec(self, start: datetime, end: datetime, hydrofabric_data_id catchments=cat_ids) return await self.async_make_request(request) +# TODO: aaraney add NgenCalRequestClient +class NgenCalRequestClient(ModelExecRequestClient[NgenCalibrationRequest, NgenCalibrationResponse]): + + # In particular needs - endpoint_uri: str, ssl_directory: Path + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._cached_session_file = Path.home().joinpath('.dmod_client_session') + + async def request_exec(self, + # start: datetime, + # end: datetime, + # hydrofabric_data_id: str, + # hydrofabric_uid: str, + # cpu_count: int, + realization_cfg_data_id: str, + # bmi_cfg_data_id: str, + # partition_cfg_data_id: Optional[str] = None, + # cat_ids: Optional[List[str]] = None, + # allocation_paradigm: Optional[AllocationParadigm] = None + ) -> NgenCalibrationResponse: + await self._async_acquire_session_info() + + start = "2022-01-01 01:00:00" + end = "2022-01-02 01:00:00" + + hydrofabric_uid = "" + bmi_cfg_data_id = "bmi-config" + hydrofabric_data_id = "hydrofabric" + config_data_id = "ngen-cal-config" + + # NOTE: aaraney this will likely have to change + request = NgenCalibrationRequest( + evaluation_time_range=TimeRange(begin=start, end=end), + # model_cal_params={"fake": (-1, 1, 0)}, # TODO: remove this + model_cal_params=dict(), # TODO: remove this + iterations=2, # TODO: remove this + # config_data_id=realization_cfg_data_id, + config_data_id=config_data_id, + + time_range=TimeRange(begin=start, end=end), + hydrofabric_uid=hydrofabric_uid, + hydrofabric_data_id=hydrofabric_data_id, + bmi_cfg_data_id=bmi_cfg_data_id, + + # job_name= None, + # cal_strategy_type= 'estimation', + # cal_strategy_algorithm= 'dds', + # cal_strategy_objective_func= 'nnse', + # is_objective_func_minimized= True, + # model_strategy= 'uniform', + # is_restart= False, + session_secret=self.session_secret, + # cpu_count=cpu_count, + # allocation_paradigm=allocation_paradigm, + + # config_data_id=realization_cfg_data_id, + # partition_cfg_data_id=partition_cfg_data_id, + # catchments=cat_ids + ) + return await self.async_make_request(request) + class DatasetClient(ABC): @@ -89,6 +152,25 @@ async def create_dataset(self, name: str, category: DataCategory, domain: DataDo async def delete_dataset(self, name: str, **kwargs) -> bool: pass + @abstractmethod + async def download_item_block(self, dataset_name: str, item_name: str, blk_start: int, blk_size: int) -> AnyStr: + """ + Download a block/chunk of a given size and start point from a specified dataset file. + + Parameters + ---------- + dataset_name + item_name + blk_start + blk_size + + Returns + ------- + AnyStr + The downloaded block/chunk. + """ + pass + @abstractmethod async def download_dataset(self, dataset_name: str, dest_dir: Path) -> bool: """ @@ -130,6 +212,14 @@ async def download_from_dataset(self, dataset_name: str, item_name: str, dest: P """ pass + @abstractmethod + async def get_dataset_content_details(self, name: str, **kwargs) -> bool: + pass + + @abstractmethod + async def get_item_size(self, dataset_name: str, item_name: str) -> int: + pass + @abstractmethod async def list_datasets(self, category: Optional[DataCategory] = None) -> List[str]: pass @@ -175,6 +265,50 @@ async def delete_dataset(self, name: str, **kwargs) -> bool: self.last_response = await self.async_make_request(request) return self.last_response is not None and self.last_response.success + async def get_dataset_content_details(self, name: str, **kwargs) -> dict: + # TODO: later add things like created and last updated perhaps + query = DatasetQuery(query_type=QueryType.GET_DATASET_ITEMS) + request = DatasetManagementMessage(action=ManagementAction.QUERY, query=query, dataset_name=name) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data + else: + return {} + + async def get_item_size(self, dataset_name: str, item_name: str) -> int: + query = DatasetQuery(query_type=QueryType.GET_ITEM_SIZE, item_name=item_name) + request = DatasetManagementMessage(action=ManagementAction.QUERY, query=query, dataset_name=dataset_name, + data_location=item_name) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data + else: + return -1 + + async def download_item_block(self, dataset_name: str, item_name: str, blk_start: int, blk_size: int) -> AnyStr: + """ + Download a block/chunk of a given size and start point from a specified dataset file. + + Parameters + ---------- + dataset_name + item_name + blk_start + blk_size + + Returns + ------- + AnyStr + The downloaded block/chunk. + """ + request = DatasetManagementMessage(action=ManagementAction.REQUEST_DATA, dataset_name=dataset_name, + data_location=item_name, blk_start=blk_start, blk_size=blk_size) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data + else: + return '' + async def download_dataset(self, dataset_name: str, dest_dir: Path) -> bool: """ Download an entire dataset to a local directory. @@ -460,7 +594,56 @@ async def delete_dataset(self, name: str, **kwargs) -> bool: self.last_response = await self.async_make_request(request) return self.last_response is not None and self.last_response.success - async def download_dataset(self, dataset_name: str, dest_dir: Path) -> bool: + async def download_item_block(self, dataset_name: str, item_name: str, blk_start: int, blk_size: int) -> AnyStr: + """ + Download a block/chunk of a given size and start point from a specified dataset file. + + Parameters + ---------- + dataset_name + item_name + blk_start + blk_size + + Returns + ------- + AnyStr + The downloaded block/chunk. + """ + await self._async_acquire_session_info() + request = MaaSDatasetManagementMessage(action=ManagementAction.REQUEST_DATA, dataset_name=dataset_name, + session_secret=self.session_secret, data_location=item_name, + blk_start=blk_start, blk_size=blk_size) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data + else: + return '' + + async def get_item_size(self, dataset_name: str, item_name: str) -> int: + await self._async_acquire_session_info() + query = DatasetQuery(query_type=QueryType.GET_ITEM_SIZE, item_name=item_name) + request = MaaSDatasetManagementMessage(action=ManagementAction.QUERY, query=query, dataset_name=dataset_name, + session_secret=self._session_secret, data_location=item_name) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data + else: + return -1 + + async def get_dataset_content_details(self, name: str, **kwargs) -> List: + # TODO: later add things like created and last updated perhaps + await self._async_acquire_session_info() + query = DatasetQuery(query_type=QueryType.GET_DATASET_ITEMS) + request = MaaSDatasetManagementMessage(session_secret=self.session_secret, action=ManagementAction.QUERY, + query=query, dataset_name=name) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + return self.last_response.data[DatasetManagementResponse._DATA_KEY_QUERY_RESULTS] + else: + return [] + + async def download_dataset(self, dataset_name: str, dest_dir: Path, overwrite: bool = False) -> bool: await self._async_acquire_session_info() try: dest_dir.mkdir(parents=True, exist_ok=True) @@ -473,14 +656,18 @@ async def download_dataset(self, dataset_name: str, dest_dir: Path) -> bool: self.last_response: MaaSDatasetManagementResponse = await self.async_make_request(request) for item, dest in [(filename, dest_dir.joinpath(filename)) for filename in self.last_response.query_results]: dest.parent.mkdir(exist_ok=True) - success = success and await self.download_from_dataset(dataset_name=dataset_name, item_name=item, dest=dest) + success = success and await self.download_from_dataset(dataset_name=dataset_name, item_name=item, dest=dest, + overwrite=overwrite) return success - async def download_from_dataset(self, dataset_name: str, item_name: str, dest: Path) -> bool: + async def download_from_dataset(self, dataset_name: str, item_name: str, dest: Path, overwrite: bool = False) -> bool: await self._async_acquire_session_info() - if dest.exists(): - return False try: + if dest.exists(): + if overwrite: + dest.unlink() + else: + return False dest.parent.mkdir(parents=True, exist_ok=True) except: return False @@ -505,6 +692,67 @@ async def download_from_dataset(self, dataset_name: str, item_name: str, dest: P if not has_data: return message_object + async def get_datasets(self, dataset_name: Optional[str] = None) -> Dict[str, Dataset]: + """ + Get datasets as ::class:`Dataset` objects, either for all datasets or for the one with the provided name. + + Essentially, this is a convenience wrapper around ::method:`get_serialized_datasets` that inflates the + ::class:`Dataset`. + + Parameters + ---------- + dataset_name : Optional[str] + The name of a specific dataset to get serialized details of, if only one should be obtained. + + Returns + ------- + Dict[str, Dataset] + A dictionary, keyed by dataset name, of ::class:`Dataset` objects. + + See Also + ------- + get_serialized_datasets + """ + serial_jsons: Dict[str, dict] = await self.get_serialized_datasets(dataset_name=dataset_name) + return dict([(k, Dataset.factory_init_from_deserialized_json(ds_json)) for k, ds_json in serial_jsons.items()]) + + async def get_serialized_datasets(self, dataset_name: Optional[str] = None) -> Dict[str, dict]: + """ + Get dataset objects in serialized form, either for all datasets or for the one with the provided name. + + Parameters + ---------- + dataset_name : Optional[str] + The name of a specific dataset to get serialized details of, if only one should be obtained. + + Returns + ------- + Dict[str, dict] + A dictionary, keyed by dataset name, of serialized dataset objects. + """ + # TODO: may need to generalize this and add to super class + if dataset_name is None: + datasets = await self.list_datasets() + else: + # TODO: improve how this is use so that it can be safely, efficiently put everywhere it **may** be needed + await self._async_acquire_session_info() + datasets = [dataset_name] + serialized = dict() + action = ManagementAction.QUERY + query = DatasetQuery(query_type=QueryType.GET_SERIALIZED_FORM) + try: + for d in datasets: + request = MaaSDatasetManagementMessage(action=action, query=query, dataset_name=d, + session_secret=self.session_secret) + self.last_response: DatasetManagementResponse = await self.async_make_request(request) + if self.last_response.success: + serialized[d] = self.last_response.data[DatasetManagementResponse._DATA_KEY_QUERY_RESULTS] + # TODO: what to do if any are not successful + return serialized + except Exception as e: + logger.error(e) + raise e + async def list_datasets(self, category: Optional[DataCategory] = None) -> List[str]: await self._async_acquire_session_info() action = ManagementAction.LIST_ALL if category is None else ManagementAction.SEARCH diff --git a/python/lib/client/dmod/test/test_dataset_client.py b/python/lib/client/dmod/test/test_dataset_client.py index b266658c7..37ff6e386 100644 --- a/python/lib/client/dmod/test/test_dataset_client.py +++ b/python/lib/client/dmod/test/test_dataset_client.py @@ -1,7 +1,7 @@ import unittest from ..client.request_clients import DataCategory, DatasetClient, DatasetManagementResponse, MaaSDatasetManagementResponse from pathlib import Path -from typing import List, Optional +from typing import List, Optional, AnyStr class SimpleMockDatasetClient(DatasetClient): @@ -28,6 +28,24 @@ async def download_from_dataset(self, dataset_name: str, item_name: str, dest: P """ Mock implementation, always returning ``False``. """ return False + async def download_item_block(self, dataset_name: str, item_name: str, blk_start: int, blk_size: int) -> AnyStr: + """ + Mock implementation, always returning empty string. + """ + return '' + + async def get_dataset_content_details(self, name: str, **kwargs) -> bool: + """ + Mock implementation, always returning ``False``. + """ + return False + + async def get_item_size(self, dataset_name: str, item_name: str) -> int: + """ + Mock implementation always returning ``1``. + """ + return 1 + async def list_datasets(self, category: Optional[DataCategory] = None) -> List[str]: """ Mock implementation, always returning an empty list. """ return [] diff --git a/python/lib/client/setup.py b/python/lib/client/setup.py index 779e156ba..3052639b1 100644 --- a/python/lib/client/setup.py +++ b/python/lib/client/setup.py @@ -22,6 +22,6 @@ license='', include_package_data=True, #install_requires=['websockets', 'jsonschema'],vi - install_requires=['dmod-core>=0.1.0', 'websockets>=8.1', 'pyyaml', 'dmod-communication>=0.7.0', 'dmod-externalrequests>=0.3.0'], + install_requires=['dmod-core>=0.1.0', 'websockets>=8.1', 'pyyaml', 'dmod-communication>=0.11.0', 'dmod-externalrequests>=0.3.0'], packages=find_namespace_packages(include=['dmod.*'], exclude=['dmod.test']) ) diff --git a/python/lib/communication/dmod/communication/__init__.py b/python/lib/communication/dmod/communication/__init__.py index 528c6575f..2712a4a5a 100644 --- a/python/lib/communication/dmod/communication/__init__.py +++ b/python/lib/communication/dmod/communication/__init__.py @@ -3,7 +3,8 @@ PartitionerServiceClient, SchedulerClient from .maas_request import get_available_models, get_available_outputs, get_distribution_types, get_parameters, \ get_request, Distribution, ExternalRequest, ExternalRequestResponse, ModelExecRequest, ModelExecRequestResponse, \ - NWMRequest, NWMRequestResponse, Scalar, NGENRequest, NGENRequestResponse + NWMRequest, NWMRequestResponse, Scalar, NGENRequest, NGENRequestResponse, NgenCalibrationRequest, \ + NgenCalibrationResponse from .message import AbstractInitRequest, MessageEventType, Message, Response, InvalidMessage, InvalidMessageResponse, \ InitRequestResponseReason from .metadata_message import MetadataPurpose, MetadataMessage, MetadataResponse diff --git a/python/lib/communication/dmod/communication/_version.py b/python/lib/communication/dmod/communication/_version.py index e754a834e..2c7bffbf8 100644 --- a/python/lib/communication/dmod/communication/_version.py +++ b/python/lib/communication/dmod/communication/_version.py @@ -1 +1 @@ -__version__ = '0.10.1' +__version__ = '0.12.0' diff --git a/python/lib/communication/dmod/communication/client.py b/python/lib/communication/dmod/communication/client.py index 571f0f97d..5b5dbf68c 100644 --- a/python/lib/communication/dmod/communication/client.py +++ b/python/lib/communication/dmod/communication/client.py @@ -800,10 +800,8 @@ def _update_after_valid_response(self, response: EXTERN_REQ_R): # TODO: this can probably be taken out, as the superclass implementation should suffice async def async_make_request(self, request: EXTERN_REQ_M) -> EXTERN_REQ_R: - async with websockets.connect(self.endpoint_uri, ssl=self.client_ssl_context) as websocket: - await websocket.send(request.to_json()) - response = await websocket.recv() - return request.__class__.factory_init_correct_response_subtype(json_obj=json.loads(response)) + response = await self.async_send(request.to_json(), await_response=True) + return request.__class__.factory_init_correct_response_subtype(json_obj=json.loads(response)) @property def errors(self): diff --git a/python/lib/communication/dmod/communication/dataset_management_message.py b/python/lib/communication/dmod/communication/dataset_management_message.py index d148b6d0e..d21192906 100644 --- a/python/lib/communication/dmod/communication/dataset_management_message.py +++ b/python/lib/communication/dmod/communication/dataset_management_message.py @@ -16,6 +16,11 @@ class QueryType(Enum): GET_VALUES = 6 GET_MIN_VALUE = 7 GET_MAX_VALUE = 8 + GET_SERIALIZED_FORM = 9 + GET_LAST_UPDATED = 10 + GET_SIZE = 11 + GET_ITEM_SIZE = 12 + GET_DATASET_ITEMS = 13 @classmethod def get_for_name(cls, name_str: str) -> 'QueryType': @@ -42,26 +47,32 @@ def get_for_name(cls, name_str: str) -> 'QueryType': class DatasetQuery(Serializable): _KEY_QUERY_TYPE = 'query_type' + _KEY_ITEM_NAME = 'item_name' @classmethod def factory_init_from_deserialized_json(cls, json_obj: dict) -> Optional['DatasetQuery']: try: - return cls(query_type=QueryType.get_for_name(json_obj[cls._KEY_QUERY_TYPE])) + return cls(query_type=QueryType.get_for_name(json_obj[cls._KEY_QUERY_TYPE]), + item_name=json_obj.get(cls._KEY_ITEM_NAME)) except Exception as e: return None def __hash__(self): - return hash(self.query_type) + return hash('{}{}'.format(self.query_type.name, self.item_name if self.item_name is not None else '')) def __eq__(self, other): - return isinstance(other, DatasetQuery) and self.query_type == other.query_type + return isinstance(other, DatasetQuery) and self.query_type == other.query_type \ + and self.item_name == other.item_name - def __init__(self, query_type: QueryType): + def __init__(self, query_type: QueryType, item_name: Optional[str] = None): self.query_type = query_type + self.item_name = item_name def to_dict(self) -> Dict[str, Union[str, Number, dict, list]]: serial = dict() serial[self._KEY_QUERY_TYPE] = self.query_type.name + if self.item_name is not None: + serial[self._KEY_ITEM_NAME] = self.item_name return serial @@ -181,6 +192,8 @@ class DatasetManagementMessage(AbstractInitRequest): _SERIAL_KEY_CATEGORY = 'category' _SERIAL_KEY_DATA_DOMAIN = 'data_domain' _SERIAL_KEY_DATA_LOCATION = 'data_location' + _SERIAL_KEY_DATA_BLK_START = 'data_blk_start' + _SERIAL_KEY_DATA_BLK_SIZE = 'data_blk_size' _SERIAL_KEY_DATASET_NAME = 'dataset_name' _SERIAL_KEY_IS_PENDING_DATA = 'pending_data' _SERIAL_KEY_QUERY = 'query' @@ -217,6 +230,8 @@ def factory_init_from_deserialized_json(cls, json_obj: dict) -> Optional['Datase category_str = json_obj.get(cls._SERIAL_KEY_CATEGORY) category = None if category_str is None else DataCategory.get_for_name(category_str) data_loc = json_obj.get(cls._SERIAL_KEY_DATA_LOCATION) + data_blk_start = json_obj.get(cls._SERIAL_KEY_DATA_BLK_START) + data_blk_size = json_obj.get(cls._SERIAL_KEY_DATA_BLK_SIZE) #page = json_obj[cls._SERIAL_KEY_PAGE] if cls._SERIAL_KEY_PAGE in json_obj else None if cls._SERIAL_KEY_QUERY in json_obj: query = DatasetQuery.factory_init_from_deserialized_json(json_obj[cls._SERIAL_KEY_QUERY]) @@ -229,7 +244,7 @@ def factory_init_from_deserialized_json(cls, json_obj: dict) -> Optional['Datase return deserialized_class(action=action, dataset_name=dataset_name, category=category, is_read_only_dataset=json_obj[cls._SERIAL_KEY_IS_READ_ONLY], domain=domain, - data_location=data_loc, + data_location=data_loc, blk_start=data_blk_start, blk_size=data_blk_size, is_pending_data=json_obj.get(cls._SERIAL_KEY_IS_PENDING_DATA), #page=page, query=query, **deserialized_class_kwargs) except Exception as e: @@ -261,8 +276,8 @@ def __hash__(self): def __init__(self, action: ManagementAction, dataset_name: Optional[str] = None, is_read_only_dataset: bool = False, category: Optional[DataCategory] = None, domain: Optional[DataDomain] = None, - data_location: Optional[str] = None, is_pending_data: bool = False, - query: Optional[DatasetQuery] = None, *args, **kwargs): + data_location: Optional[str] = None, blk_start: Optional[int] = None, blk_size: Optional[int] = None, + is_pending_data: bool = False, query: Optional[DatasetQuery] = None, *args, **kwargs): """ Initialize this instance. @@ -278,6 +293,10 @@ def __init__(self, action: ManagementAction, dataset_name: Optional[str] = None, The optional category of the involved dataset or datasets, when applicable; defaults to ``None``. data_location : Optional[str] Optional location/file/object/etc. for acted-upon data. + blk_start : Optional[int] + Optional starting point for when acting upon a block/chunk of data. + blk_size : Optional[int] + Optional block size for when acting upon a block/chunk of data. is_pending_data : bool Whether the sender has data pending transmission after this message (default: ``False``). query : Optional[DatasetQuery] @@ -302,9 +321,19 @@ def __init__(self, action: ManagementAction, dataset_name: Optional[str] = None, self._category = category self._domain = domain self._data_location = data_location + self._blk_start = blk_start + self._blk_size = blk_size self._query = query self._is_pending_data = is_pending_data + @property + def blk_size(self) -> Optional[int]: + return self._blk_size + + @property + def blk_start(self) -> Optional[int]: + return self._blk_start + @property def data_location(self) -> Optional[str]: """ @@ -406,6 +435,10 @@ def to_dict(self) -> Dict[str, Union[str, Number, dict, list]]: serial[self._SERIAL_KEY_CATEGORY] = self.data_category.name if self.data_location is not None: serial[self._SERIAL_KEY_DATA_LOCATION] = self.data_location + if self._blk_start is not None: + serial[self._SERIAL_KEY_DATA_BLK_START] = self._blk_start + if self._blk_size is not None: + serial[self._SERIAL_KEY_DATA_BLK_SIZE] = self._blk_size if self.data_domain is not None: serial[self._SERIAL_KEY_DATA_DOMAIN] = self.data_domain.to_dict() if self.query is not None: @@ -602,6 +635,10 @@ def __init__(self, session_secret: str, *args, **kwargs): is_read_only_dataset : bool category : Optional[DataCategory] data_location : Optional[str] + blk_start : Optional[int] + Optional starting point for when acting upon a block/chunk of data. + blk_size : Optional[int] + Optional block size for when acting upon a block/chunk of data. is_pending_data : bool query : Optional[DataQuery] """ diff --git a/python/lib/communication/dmod/communication/maas_request/__init__.py b/python/lib/communication/dmod/communication/maas_request/__init__.py index cad8e026e..0932d2261 100644 --- a/python/lib/communication/dmod/communication/maas_request/__init__.py +++ b/python/lib/communication/dmod/communication/maas_request/__init__.py @@ -11,4 +11,4 @@ from .model_exec_request import ModelExecRequest, get_available_models from .model_exec_request_response import ModelExecRequestResponse from .nwm import NWMRequest, NWMRequestResponse -from .ngen import NGENRequest, NGENRequestResponse +from .ngen import NGENRequest, NGENRequestResponse, NgenCalibrationRequest, NgenCalibrationResponse diff --git a/python/lib/communication/dmod/communication/maas_request/dmod_job_request.py b/python/lib/communication/dmod/communication/maas_request/dmod_job_request.py index 5288e7941..e5a9d22e7 100644 --- a/python/lib/communication/dmod/communication/maas_request/dmod_job_request.py +++ b/python/lib/communication/dmod/communication/maas_request/dmod_job_request.py @@ -11,8 +11,10 @@ class DmodJobRequest(AbstractInitRequest, ABC): The base class underlying all types of messages requesting execution of some kind of workflow job. """ - def __int__(self, *args, **kwargs): - super(DmodJobRequest, self).__int__(*args, **kwargs) + # TODO: this also needs to include cpu count, memory, allocation paradigm, and perhaps other things + + def __init__(self, *args, **kwargs): + super(DmodJobRequest, self).__init__(*args, **kwargs) @property @abstractmethod diff --git a/python/lib/communication/dmod/communication/maas_request/model_exec_request.py b/python/lib/communication/dmod/communication/maas_request/model_exec_request.py index de537091b..f956ffeb4 100644 --- a/python/lib/communication/dmod/communication/maas_request/model_exec_request.py +++ b/python/lib/communication/dmod/communication/maas_request/model_exec_request.py @@ -12,12 +12,25 @@ def get_available_models() -> dict: """ :return: The names of all models mapped to their class """ - available_models = dict() + # TODO: the previous implementation; confirm reason this change was needed + # available_models = dict() + # + # for subclass in ModelExecRequest.__subclasses__(): # type: ModelExecRequest + # available_models[subclass.model_name] = subclass + # + # return available_models - for subclass in ModelExecRequest.__subclasses__(): # type: ModelExecRequest - available_models[subclass.model_name] = subclass + def recursively_get_all_model_subclasses(model_exec_request: "ModelExecRequest") -> dict: + available_models = dict() - return available_models + for subclass in model_exec_request.__subclasses__(): # type: ModelExecRequest + available_models[subclass.model_name] = subclass + # TODO: what to do if descendant subclass "overwrites" ancestor subclass? + available_models.update(recursively_get_all_model_subclasses(subclass)) + + return available_models + + return recursively_get_all_model_subclasses(ModelExecRequest) class ModelExecRequest(ExternalRequest, DmodJobRequest, ABC): diff --git a/python/lib/communication/dmod/communication/maas_request/ngen/__init__.py b/python/lib/communication/dmod/communication/maas_request/ngen/__init__.py index c53e6e3ee..2dcc80cc6 100644 --- a/python/lib/communication/dmod/communication/maas_request/ngen/__init__.py +++ b/python/lib/communication/dmod/communication/maas_request/ngen/__init__.py @@ -1 +1,2 @@ from .ngen_request import NGENRequest, NGENRequestResponse +from .ngen_calibration_request import NgenCalibrationRequest, NgenCalibrationResponse diff --git a/python/lib/communication/dmod/communication/maas_request/ngen/ngen_calibration_request.py b/python/lib/communication/dmod/communication/maas_request/ngen/ngen_calibration_request.py new file mode 100644 index 000000000..7b2d0fd19 --- /dev/null +++ b/python/lib/communication/dmod/communication/maas_request/ngen/ngen_calibration_request.py @@ -0,0 +1,180 @@ +from numbers import Number +from dmod.core.meta_data import TimeRange +from typing import Dict, List, Optional, Set, Tuple, Union + +from ...message import MessageEventType +from ...maas_request import ExternalRequestResponse, ModelExecRequestResponse +from .ngen_request import NGENRequest + + +class NgenCalibrationRequest(NGENRequest): + """ + An extension of ::class:`NGENRequest` for requesting ngen-cal calibration jobs. + """ + + event_type: MessageEventType = MessageEventType.CALIBRATION_REQUEST + model_name = 'ngen-cal' #FIXME case sentitivity + + # TODO: probably will need to re-examine this + _DEFAULT_CPU_COUNT = 1 + """ The default number of CPUs to assume are being requested for the job, when not explicitly provided. """ + + _KEY_CAL_STRATEGY_ALGO = 'strategy_algorithm' + _KEY_CAL_STRATEGY_OBJ_FUNC = 'strategy_objective_function' + _KEY_CAL_STRATEGY_TYPE = 'strategy_type' + _KEY_EVALUTATION_TIME = 'evaluation_time_range' + _KEY_IS_OBJ_FUNC_MIN = 'is_obj_func_min' + _KEY_IS_RESTART = 'is_restart' + _KEY_ITERATIONS = 'iterations' + _KEY_JOB_NAME = 'job_name' + _KEY_MODEL_CAL_PARAMS = 'model_cal_params' + _KEY_MODEL_STRATEGY = 'model_strategy' + + @classmethod + def _additional_deserialized_args(cls, json_obj: dict) -> dict: + """ + Parse any additional, (sub)class-specific deserialization params. + + Parameters + ---------- + json_obj + + Returns + ------- + dict + """ + additional_kw_args = dict() + additional_kw_args['cal_strategy_algorithm'] = json_obj[cls._KEY_CAL_STRATEGY_ALGO] + additional_kw_args['cal_strategy_objective_func'] = json_obj[cls._KEY_CAL_STRATEGY_OBJ_FUNC] + additional_kw_args['cal_strategy_type'] = json_obj[cls._KEY_CAL_STRATEGY_TYPE] + additional_kw_args['evaluation_time_range'] = TimeRange.factory_init_from_deserialized_json(json_obj[cls._KEY_EVALUTATION_TIME]) + additional_kw_args['is_objective_func_minimized'] = json_obj[cls._KEY_IS_OBJ_FUNC_MIN] + additional_kw_args['is_restart'] = json_obj[cls._KEY_IS_RESTART] + additional_kw_args['iterations'] = json_obj[cls._KEY_ITERATIONS] + additional_kw_args['job_name'] = json_obj[cls._KEY_JOB_NAME] + additional_kw_args['model_cal_params'] = json_obj[cls._KEY_MODEL_CAL_PARAMS] + additional_kw_args['model_strategy'] = json_obj[cls._KEY_MODEL_STRATEGY] + return additional_kw_args + + @classmethod + def factory_init_correct_response_subtype(cls, json_obj: dict) -> 'NgenCalibrationResponse': + """ + Init a :obj:`Response` instance of the appropriate subtype for this class from the provided JSON object. + + Parameters + ---------- + json_obj + + Returns + ------- + CalibrationJobResponse + A response of the correct type, with state details from the provided JSON. + """ + return NgenCalibrationResponse.factory_init_from_deserialized_json(json_obj=json_obj) + + def __init__(self, evaluation_time_range: TimeRange, model_cal_params: Dict[str, Tuple[float, float, float]], + iterations: int, cal_strategy_type: str = 'estimation', cal_strategy_algorithm: str = 'dds', + cal_strategy_objective_func: str = 'nnse', is_objective_func_minimized: bool = True, + model_strategy: str = 'uniform', job_name: Optional[str] = None, is_restart: bool = False, *args, + **kwargs): + """ + Initialize an instance. + + Parameters + ---------- + evaluation_time_range : TimeRange + The time range for calibration for use within the ngen-cal config for the job. + model_cal_params : Dict[str, Tuple[float, float, float]] + A collection of the calibratable params, keyed by name, with a tuple of the min, max, and initial values. + iterations : int + The total number of search iterations to run. + cal_strategy_type : str + The ngen-cal general strategy type for the calibration config (default: ``estimation``). + cal_strategy_algorithm : str + Calibration strategy algorithm ("dds" by default). + cal_strategy_objective_func : str + The standard name ("kling_gupta", "nnse", "custom", "single_peak", "volume") or full ngen_cal package module + name for the objective function to use ("nnse" by default). + is_objective_func_minimized : bool + Whether to minimize the objective function (implies maximize when ``False``; default value: ``True``). + model_strategy : str + The ngen-cal model calibration strategy; one of : + 'uniform' : Each catchment shares the same parameter space, evaluates at one observable nexus + 'independent' : Each catchment upstream of observable nexus gets its own permutated parameter space, + evaluates at one observable nexus + 'explicit' : Only calibrates basins in the realization_config with a "calibration" definition and an + observable nexus + job_name : Optional[str] + Optional job name for the calibration run, which can be used by ngen-cal when generating files. + is_restart : bool + Whether this represents restarting a previous job; ``False`` by default. + + Keyword Args + ----------- + time_range : TimeRange + A definition of the time range for the configured execution of the ngen framework. + hydrofabric_uid : str + The unique ID of the applicable hydrofabric for modeling, which provides the outermost geospatial domain. + hydrofabric_data_id : str + A data identifier for the hydrofabric, for distinguishing between different hydrofabrics that cover the same + set of catchments and nexuses (i.e., the same sets of catchment and nexus ids). + catchments : Optional[Union[Set[str], List[str]]] + An optional collection of the catchment ids to narrow the geospatial domain, where the default of ``None`` + or an empty collection implies all catchments in the hydrofabric. + bmi_cfg_data_id : Optional[str] + The optional BMI init config ``data_id`` index, for identifying the particular BMI init config datasets + applicable to this request. + config_data_id : str + The config data id index, for identifying the particular configuration datasets applicable to this request. + session_secret : str + The session secret for the right session when communicating with the MaaS request handler + """ + super(NgenCalibrationRequest, self).__init__(*args, **kwargs) + self.evaluation_time_range = evaluation_time_range + self.model_cal_params = model_cal_params + self.iterations = iterations + self.cal_strategy_type = cal_strategy_type + self.cal_strategy_algorithm = cal_strategy_algorithm + self.cal_strategy_objective_function = cal_strategy_objective_func + self.is_objective_func_minimized = is_objective_func_minimized + self.model_strategy = model_strategy + self.job_name = job_name + + self.is_restart = is_restart + + # TODO: may need to modify this to have (realization) config dataset start empty (at least optionally) and apply + + def to_dict(self) -> Dict[str, Union[str, Number, dict, list]]: + serial = super(NgenCalibrationRequest, self).to_dict() + serial[self._KEY_EVALUTATION_TIME] = self.evaluation_time_range.to_dict() + serial[self._KEY_MODEL_CAL_PARAMS] = self.model_cal_params + serial[self._KEY_CAL_STRATEGY_TYPE] = self.cal_strategy_type + serial[self._KEY_CAL_STRATEGY_ALGO] = self.cal_strategy_algorithm + serial[self._KEY_CAL_STRATEGY_OBJ_FUNC] = self.cal_strategy_objective_function + serial[self._KEY_IS_OBJ_FUNC_MIN] = self.is_objective_func_minimized + serial[self._KEY_ITERATIONS] = self.iterations + serial[self._KEY_JOB_NAME] = self.job_name + serial[self._KEY_MODEL_STRATEGY] = self.model_strategy + serial[self._KEY_IS_RESTART] = self.is_restart + return serial + + # TODO: This should likely be created or determined if it already exsits on the fly + # @property + # def data_requirements(self) -> List[DataRequirement]: + # """ + # List of all the explicit and implied data requirements for this request, as needed fo r creating a job object. + + # Returns + # ------- + # List[DataRequirement] + # List of all the explicit and implied data requirements for this request. + # """ + # data_requirements = super().data_requirements + # return [self.calibration_cfg_data_requirement ,*data_requirements] + + +# TODO: aaraney. this looks unfinished +# class NgenCalibrationResponse(ExternalRequestResponse): +class NgenCalibrationResponse(ModelExecRequestResponse): + + response_to_type = NgenCalibrationRequest diff --git a/python/lib/communication/dmod/communication/maas_request/ngen/ngen_request.py b/python/lib/communication/dmod/communication/maas_request/ngen/ngen_request.py index 074045370..2ff4426b7 100644 --- a/python/lib/communication/dmod/communication/maas_request/ngen/ngen_request.py +++ b/python/lib/communication/dmod/communication/maas_request/ngen/ngen_request.py @@ -23,6 +23,23 @@ class NGENRequest(ModelExecRequest): model_name = "ngen" # FIXME case sentitivity """(:class:`str`) The name of the model to be used""" + @classmethod + def _additional_deserialized_args(cls, json_obj: dict) -> dict: + """ + Parse any additional, (sub)class-specific deserialization params. + + For the base type, any empty dict is returned. + + Parameters + ---------- + json_obj + + Returns + ------- + dict + """ + return dict() + @classmethod def factory_init_from_deserialized_json( cls, json_obj: dict @@ -62,6 +79,11 @@ def factory_init_from_deserialized_json( "model" ]["partition_config_data_id"] + additional_kw_args = cls._additional_deserialized_args(json_obj) + + for key, val in optional_kwargs_w_defaults.items(): + additional_kw_args[key] = val + return cls( time_range=TimeRange.factory_init_from_deserialized_json( json_obj["model"]["time_range"] @@ -71,7 +93,7 @@ def factory_init_from_deserialized_json( config_data_id=json_obj["model"]["config_data_id"], bmi_cfg_data_id=json_obj["model"]["bmi_config_data_id"], session_secret=json_obj["session-secret"], - **optional_kwargs_w_defaults + **additional_kw_args ) except Exception as e: return None @@ -219,13 +241,15 @@ def data_requirements(self) -> List[DataRequirement]: List[DataRequirement] List of all the explicit and implied data requirements for this request. """ - return [ + requirements = [ self.bmi_cfg_data_requirement, self.forcing_data_requirement, self.hydrofabric_data_requirement, - self.partition_cfg_data_requirement, self.realization_cfg_data_requirement, ] + if self.use_parallel_ngen: + requirements.append(self.partition_cfg_data_requirement) + return requirements @property def bmi_config_data_id(self) -> str: @@ -291,6 +315,7 @@ def forcing_data_requirement(self) -> DataRequirement: if self._forcing_data_requirement is None: # TODO: going to need to address the CSV usage later forcing_domain = DataDomain( + # TODO: come back to this to change to other type data_format=DataFormat.AORC_CSV, continuous_restrictions=[self._time_range], discrete_restrictions=[self._gen_catchments_domain_restriction()], @@ -357,6 +382,54 @@ def hydrofabric_uid(self) -> str: """ return self._hydrofabric_uid + @property + def use_parallel_ngen(self) -> bool: + """ + Whether this request specifies to use the variant of the NextGen framework compiled for parallel execution. + + NextGen may be compiled to execute either serially or using parallelization. DMOD and its NextGen job workers + can now support either. This property indicates whether this request indicates that parallel execution should + be used. + + In the current implementation, this property is ``True`` IFF ::method:`use_serial_ngen` is ``False``. Note that + this will result in CPU counts of ``0`` or negative numbers, if they were to occur, also resulting in this + returning ``True``. + + Returns + ------- + bool + Whether this request specifies parallel NextGen execution for the job. + + See Also + ------- + use_serial_ngen + """ + return not self.use_serial_ngen + + @property + def use_serial_ngen(self) -> bool: + """ + Whether this request specifies to use the variant of the NextGen framework compiled for serial execution. + + NextGen may be compiled to execute either serially or using parallelization. DMOD and its NextGen job workers + can now support either. This property indicates whether this request indicates that serially execution should + be used. + + In the current implementation, this property is ``True`` IFF the request required a CPU count of exactly ``1``. + + Returns + ------- + bool + Whether this request specifies serial NextGen execution for the job. + + See Also + ------- + use_parallel_ngen + """ + return self.cpu_count == 1 + + + @property def output_formats(self) -> List[DataFormat]: """ @@ -387,16 +460,16 @@ def partition_cfg_data_id(self) -> Optional[str]: return self._part_config_data_id @property - def partition_cfg_data_requirement(self) -> DataRequirement: + def partition_cfg_data_requirement(self) -> Optional[DataRequirement]: """ A requirement object defining of the partitioning configuration data needed to execute this request. Returns ------- - DataRequirement - A requirement object defining of the partitioning configuration data needed to execute this request. + Optional[DataRequirement] + Requirement object defining of the partitioning configuration data needed to execute this request. """ - if self._partition_cfg_data_requirement is None: + if self._partition_cfg_data_requirement is None and self.use_parallel_ngen: d_restricts = [] # Add restriction on hydrofabric diff --git a/python/lib/communication/dmod/communication/message.py b/python/lib/communication/dmod/communication/message.py index bad2e4869..88dcc2cdc 100644 --- a/python/lib/communication/dmod/communication/message.py +++ b/python/lib/communication/dmod/communication/message.py @@ -92,8 +92,8 @@ class AbstractInitRequest(Message, ABC): interactions. """ - def __int__(self, *args, **kwargs): - super(AbstractInitRequest, self).__int__(*args, **kwargs) + def __init__(self, *args, **kwargs): + super(AbstractInitRequest, self).__init__(*args, **kwargs) class Response(ResultIndicator, Message, ABC): diff --git a/python/lib/communication/dmod/communication/scheduler_request.py b/python/lib/communication/dmod/communication/scheduler_request.py index 790534475..9901c2015 100644 --- a/python/lib/communication/dmod/communication/scheduler_request.py +++ b/python/lib/communication/dmod/communication/scheduler_request.py @@ -1,10 +1,13 @@ from dmod.core.execution import AllocationParadigm from .maas_request import ModelExecRequest, ModelExecRequestResponse -from .message import AbstractInitRequest, MessageEventType, Response -from typing import Optional, Union +from .maas_request.dmod_job_request import DmodJobRequest +from .message import MessageEventType, Response +from typing import Optional, Union, List +from dmod.core.meta_data import DataRequirement, DataFormat -class SchedulerRequestMessage(AbstractInitRequest): + +class SchedulerRequestMessage(DmodJobRequest): event_type: MessageEventType = MessageEventType.SCHEDULER_REQUEST """ :class:`MessageEventType`: the event type for this message implementation """ @@ -60,7 +63,8 @@ def factory_init_from_deserialized_json(cls, json_obj: dict): # TODO: may need to generalize the underlying request to support, say, scheduling evaluation jobs def __init__(self, model_request: ModelExecRequest, user_id: str, cpus: Optional[int] = None, mem: Optional[int] = None, - allocation_paradigm: Optional[Union[str, AllocationParadigm]] = None): + allocation_paradigm: Optional[Union[str, AllocationParadigm]] = None, *args, **kwargs): + super(SchedulerRequestMessage, self).__init__(*args, *kwargs) self._model_request = model_request self._user_id = user_id self._cpus = cpus @@ -113,6 +117,10 @@ def cpus(self) -> int: """ return self.model_request.cpu_count if self._cpus is None else self._cpus + @property + def data_requirements(self) -> List[DataRequirement]: + return self.model_request.data_requirements + @property def memory(self) -> int: """ @@ -161,6 +169,10 @@ def nested_event(self) -> MessageEventType: """ return self.model_request.get_message_event_type() + @property + def output_formats(self) -> List[DataFormat]: + return self.model_request.output_formats + @property def user_id(self) -> str: """ diff --git a/python/lib/communication/dmod/test/test_ngen_request.py b/python/lib/communication/dmod/test/test_ngen_request.py index b26aee074..53528e086 100644 --- a/python/lib/communication/dmod/test/test_ngen_request.py +++ b/python/lib/communication/dmod/test/test_ngen_request.py @@ -2,7 +2,7 @@ import unittest from ..communication.maas_request import NGENRequest, NGENRequestResponse from ..test.test_ngen_request_response import TestNGENRequestResponse -from dmod.core.meta_data import TimeRange +from dmod.core.meta_data import DataFormat, TimeRange class TestNGENRequest(unittest.TestCase): @@ -95,6 +95,39 @@ def create_time_range(begin, end, var=None) -> TimeRange: bmi_cfg_data_id='02468', catchments=cat_ids_list)) + # Example 2 - like example 0, but with a CPU count of 1 (which should not require partitioning) + time_range = create_time_range('2022-01-01 00:00:00', '2022-03-01 00:00:00') + cpu_count_ex_2 = 1 + self.time_ranges.append(time_range) + self.request_strings.append( + '{"model": {"allocation_paradigm": "SINGLE_NODE", "bmi_config_data_id": "02468", "config_data_id": "02468", ' + '"cpu_count": ' + str(cpu_count_ex_2) + ', "hydrofabric_data_id": "9876543210", ' + '"hydrofabric_uid": "0123456789", "name": "ngen", "time_range": ' + time_range.to_json() + '}, ' + '"session-secret": "f21f27ac3d443c0948aab924bddefc64891c455a756ca77a4d86ec2f697cd13c"}' + ) + self.request_jsons.append({ + 'model': { + 'name': 'ngen', + 'allocation_paradigm': 'SINGLE_NODE', + 'cpu_count': cpu_count_ex_2, + 'time_range': time_range.to_dict(), + 'hydrofabric_data_id': '9876543210', + 'hydrofabric_uid': '0123456789', + 'bmi_config_data_id': '02468', + 'config_data_id': '02468' + }, + 'session-secret': 'f21f27ac3d443c0948aab924bddefc64891c455a756ca77a4d86ec2f697cd13c' + }) + self.request_objs.append( + NGENRequest(session_secret='f21f27ac3d443c0948aab924bddefc64891c455a756ca77a4d86ec2f697cd13c', + cpu_count=cpu_count_ex_2, + allocation_paradigm='SINGLE_NODE', + time_range=time_range, + hydrofabric_uid="0123456789", + hydrofabric_data_id='9876543210', + bmi_cfg_data_id='02468', + config_data_id='02468')) + def test_factory_init_from_deserialized_json_0_a(self): """ Assert that :meth:`NGENRequest.factory_init_from_deserialized_json` produces an equal object to the @@ -142,6 +175,28 @@ def test_factory_init_correct_response_subtype_1_a(self): obj = NGENRequest.factory_init_correct_response_subtype(json_obj) self.assertEqual(obj.__class__, NGENRequestResponse) + def test_data_requirements_0_a(self): + example_index = 0 + obj = self.request_objs[example_index] + self.assertIsNotNone(obj.partition_cfg_data_requirement) + + def test_data_requirements_0_b(self): + example_index = 0 + obj = self.request_objs[example_index] + partition_reqs = [r for r in obj.data_requirements if r.domain.data_format == DataFormat.NGEN_PARTITION_CONFIG] + self.assertEqual(len(partition_reqs), 1) + + def test_data_requirements_2_a(self): + example_index = 2 + obj = self.request_objs[example_index] + self.assertIsNone(obj.partition_cfg_data_requirement) + + def test_data_requirements_2_b(self): + example_index = 2 + obj = self.request_objs[example_index] + partition_reqs = [r for r in obj.data_requirements if r.domain.data_format == DataFormat.NGEN_PARTITION_CONFIG] + self.assertEqual(len(partition_reqs), 0) + def test_to_dict_0_a(self): """ Assert that the example object at the 0th index serializes to a dict as expected by comparing to the pre-set diff --git a/python/lib/core/dmod/core/_version.py b/python/lib/core/dmod/core/_version.py index b703f5c96..9bdd4d277 100644 --- a/python/lib/core/dmod/core/_version.py +++ b/python/lib/core/dmod/core/_version.py @@ -1 +1 @@ -__version__ = '0.4.1' \ No newline at end of file +__version__ = '0.5.0' \ No newline at end of file diff --git a/python/lib/core/dmod/core/dataset.py b/python/lib/core/dmod/core/dataset.py index b1e5ca2aa..bbb9701c5 100644 --- a/python/lib/core/dmod/core/dataset.py +++ b/python/lib/core/dmod/core/dataset.py @@ -819,7 +819,8 @@ def filter(self, base_dataset: Dataset, restrictions: List[Union[ContinuousRestr pass @abstractmethod - def get_data(self, dataset_name: str, item_name: str, **kwargs) -> Union[bytes, Any]: + def get_data(self, dataset_name: str, item_name: str, offset: Optional[int] = None, length: Optional[int] = None, + **kwargs) -> Union[bytes, Any]: """ Get data from this dataset. @@ -832,6 +833,10 @@ def get_data(self, dataset_name: str, item_name: str, **kwargs) -> Union[bytes, The dataset from which to get data. item_name : str The name of the object from which to get data. + offset : Optional[int] + Optional start byte position of object data. + length : Optional[int] + Optional number of bytes of object data from offset. kwargs Implementation-specific params for representing what data to get and how to get and deliver it. @@ -881,6 +886,26 @@ def link_user(self, user: DatasetUser, dataset: Dataset) -> bool: self._dataset_users[dataset.name].add(user.uuid) return True + @abstractmethod + def get_file_stat(self, dataset_name: str, file_name, **kwargs) -> Dict[str, Any]: + """ + Get the meta information about the given file. + + Parameters + ---------- + dataset_name : str + The name of the dataset containing the file of interest. + file_name : str + The name of the file of interest. + kwargs + + Returns + ------- + dict + Meta information about the given file, in dictionary form. + """ + pass + @abstractmethod def list_files(self, dataset_name: str, **kwargs) -> List[str]: """ diff --git a/python/lib/core/dmod/core/meta_data.py b/python/lib/core/dmod/core/meta_data.py index 9c74c9fe0..8a4e4173e 100644 --- a/python/lib/core/dmod/core/meta_data.py +++ b/python/lib/core/dmod/core/meta_data.py @@ -128,6 +128,52 @@ class DataFormat(Enum): # TODO: consider whether something to indicate the time step size is necessary # TODO: need format specifically for Nextgen model output (i.e., for evaluations) + @classmethod + def can_format_fulfill(cls, needed: 'DataFormat', alternate: 'DataFormat') -> bool: + """ + Test whether data in an alternate format is capable of satisfying requirements of some other format. + + This function indicates whether data in one format (the alternate format) is compatible with requirements + specified using a different format (the needed format). It is an indication of whether data is **potentially** + capable of satisfying a requirement - even if the data formats of the two are not the same - due to the two + formats being sufficiently similar. + + For example, the NextGen framework can support forcings in either CSV or NetCDF formats, represented as + ``AORC_CSV`` and ``NETCDF_FORCING_CANONICAL`` respectively. A job to execute NextGen would include a forcing + ::class:`DataRequirement` associated (albeit indirectly) with a particular format, with that being one of the + aforementioned values. However, even if the ``AORC_CSV`` data format was in the requirement, data in the + ``NETCDF_FORCING_CANONICAL`` format would be perfectly satisfactory (assuming it otherwise provided what the + job needed). + + Note that the following **is not guaranteed** for all values of ``f_1`` and ``f_2`` (though it will often be the + case): + + ``can_format_fulfill(needed=f_1, alternate=f_2) == can_format_fulfill(needed=f_2, alternate=f_1)`` + + It is guaranteed that ``can_format_fulfill(needed=f_1, alternate=f_1)`` is ``True``. + + Parameters + ---------- + needed : DataFormat + The format defined by some requirement. + alternate : DataFormat + An alternate format for data. + + Returns + ------- + bool + Whether the alternate format is compatible with the needed format. + """ + # Always return True for when the params are the same format + if needed == alternate: + return True + # For these forcing formats, they will all be compatible with each other + compatible_forcing_formats = {cls.AORC_CSV, cls.NETCDF_FORCING_CANONICAL, cls.NETCDF_AORC_DEFAULT} + if needed in compatible_forcing_formats and alternate in compatible_forcing_formats: + return True + # Anything else, they are compatible + return False + @classmethod def get_for_name(cls, name_str: str) -> Optional['DataFormat']: cleaned_up_str = name_str.strip().upper() @@ -601,7 +647,7 @@ def contains(self, other: Union[ContinuousRestriction, DiscreteRestriction, 'Dat return self._extends_continuous_restriction(other) elif isinstance(other, DiscreteRestriction): return self._extends_discrete_restriction(other) - elif self.data_format != other.data_format: + elif not DataFormat.can_format_fulfill(needed=other.data_format, alternate=self.data_format): return False else: for index in other.continuous_restrictions: diff --git a/python/lib/externalrequests/dmod/externalrequests/__init__.py b/python/lib/externalrequests/dmod/externalrequests/__init__.py index fea23d035..9778e48c0 100644 --- a/python/lib/externalrequests/dmod/externalrequests/__init__.py +++ b/python/lib/externalrequests/dmod/externalrequests/__init__.py @@ -1,8 +1,8 @@ from .auth_handler import AuthHandler from .maas_request_handlers import DatasetRequestHandler, MaaSRequestHandler, PartitionRequestHandler -from .model_exec_request_handler import ModelExecRequestHandler +from .model_exec_request_handler import ModelExecRequestHandler, NgenCalibrationRequestHandler from .evaluation_request_handler import EvaluationRequestHandler from .evaluation_request_handler import LaunchEvaluationMessage from .evaluation_request_handler import OpenEvaluationMessage -name = 'externalrequests' \ No newline at end of file +name = 'externalrequests' diff --git a/python/lib/externalrequests/dmod/externalrequests/maas_request_handlers.py b/python/lib/externalrequests/dmod/externalrequests/maas_request_handlers.py index dff0b45ac..0235dae39 100644 --- a/python/lib/externalrequests/dmod/externalrequests/maas_request_handlers.py +++ b/python/lib/externalrequests/dmod/externalrequests/maas_request_handlers.py @@ -31,7 +31,7 @@ class MaaSRequestHandler(AbstractRequestHandler, ABC): """ def __init__(self, session_manager: SessionManager, authorizer: Authorizer, service_host: str, service_port: int, - service_ssl_dir: Path): + service_ssl_dir: Path, *args, **kwargs): self._session_manager = session_manager self._authorizer = authorizer self._service_host = service_host @@ -155,13 +155,23 @@ def service_url(self) -> str: class PartitionRequestHandler(MaaSRequestHandler): - def __init__(self, session_manager: SessionManager, authorizer: Authorizer, partition_service_host: str, - partition_service_port: int, partition_service_ssl_dir: Path): - super(PartitionRequestHandler, self).__init__(session_manager=session_manager, - authorizer=authorizer, - service_host=partition_service_host, - service_port=partition_service_port, - service_ssl_dir=partition_service_ssl_dir) + def __init__(self, *args, **kwargs): + """ + + Parameters + ---------- + args + kwargs + + Other Parameters + ---------- + session_manager + authorizer + service_host + service_port + service_ssl_dir + """ + super(PartitionRequestHandler, self).__init__(*args, **kwargs) # TODO: implement properly self._default_required_access_type = None @@ -196,7 +206,7 @@ def service_client(self) -> PartitionerServiceClient: return self._service_client async def handle_request(self, request: PartitionRequest, **kwargs) -> PartitionResponse: - session, is_authorized, reason, msg = self.get_authorized_session(request) + session, is_authorized, reason, msg = await self.get_authorized_session(request) if not is_authorized: return PartitionResponse(success=False, reason=reason.name, message=msg) # In this case, we actually can pass the request as-is straight through (i.e., after confirming authorization) @@ -209,13 +219,24 @@ async def handle_request(self, request: PartitionRequest, **kwargs) -> Partition class DatasetRequestHandler(MaaSRequestHandler): - def __init__(self, session_manager: SessionManager, authorizer: Authorizer, data_service_host: str, - data_service_port: int, data_service_ssl_dir: Path): - super(DatasetRequestHandler, self).__init__(session_manager=session_manager, - authorizer=authorizer, - service_host=data_service_host, - service_port=data_service_port, - service_ssl_dir=data_service_ssl_dir) + def __init__(self, *args, **kwargs): + """ + + Parameters + ---------- + args + kwargs + + Other Parameters + ---------- + session_manager + authorizer + service_host + service_port + service_ssl_dir + + """ + super(DatasetRequestHandler, self).__init__(*args, **kwargs) # TODO: implement properly self._default_required_access_type = None @@ -291,25 +312,28 @@ async def handle_request(self, request: MaaSDatasetManagementMessage, **kwargs) session, is_authorized, reason, msg = await self.get_authorized_session(request) if not is_authorized: return MaaSDatasetManagementResponse(success=False, reason=reason.name, message=msg) - # In this case, we actually can pass the request as-is straight through (i.e., after confirming authorization) - async with self.service_client as client: - # Have to handle these two slightly differently, since multiple message will be going over the websocket - if request.management_action == ManagementAction.REQUEST_DATA: - await client.connection.send(str(request)) - mgmt_response = await self._handle_data_download(client_websocket=kwargs['upstream_websocket'], - service_websocket=client.connection) - elif request.management_action == ManagementAction.ADD_DATA: - await client.connection.send(str(request)) - mgmt_response = await self._handle_data_upload(client_websocket=kwargs['upstream_websocket'], - service_websocket=client.connection) - else: - mgmt_response = await client.async_make_request(request) - logging.debug("************* {} received response:\n{}".format(self.__class__.__name__, str(mgmt_response))) - # Likewise, can just send back the response from the internal service client - return MaaSDatasetManagementResponse.factory_create(mgmt_response) + try: + # In this case, we actually can pass the request as-is straight through (i.e., after confirming authorization) + async with self.service_client as client: + # Have to handle these two slightly differently, since multiple message will be going over the websocket + if request.management_action == ManagementAction.REQUEST_DATA: + await client.connection.send(str(request)) + mgmt_response = await self._handle_data_download(client_websocket=kwargs['upstream_websocket'], + service_websocket=client.connection) + elif request.management_action == ManagementAction.ADD_DATA: + await client.connection.send(str(request)) + mgmt_response = await self._handle_data_upload(client_websocket=kwargs['upstream_websocket'], + service_websocket=client.connection) + else: + mgmt_response = await client.async_make_request(request) + logging.debug("************* {} received response:\n{}".format(self.__class__.__name__, str(mgmt_response))) + # Likewise, can just send back the response from the internal service client + return MaaSDatasetManagementResponse.factory_create(mgmt_response) + except Exception as e: + raise e @property def service_client(self) -> DataServiceClient: if self._service_client is None: - self._service_client = DataServiceClient(self.service_url, self.service_ssl_dir) + self._service_client = DataServiceClient(endpoint_uri=self.service_url, ssl_directory=self.service_ssl_dir) return self._service_client diff --git a/python/lib/externalrequests/dmod/externalrequests/model_exec_request_handler.py b/python/lib/externalrequests/dmod/externalrequests/model_exec_request_handler.py index b827ca9c5..bc0a641b3 100644 --- a/python/lib/externalrequests/dmod/externalrequests/model_exec_request_handler.py +++ b/python/lib/externalrequests/dmod/externalrequests/model_exec_request_handler.py @@ -3,8 +3,8 @@ from pathlib import Path from dmod.access import Authorizer from dmod.communication import FullAuthSession, InitRequestResponseReason, ModelExecRequest, ModelExecRequestResponse, \ - NGENRequest, NGENRequestResponse, NWMRequest, NWMRequestResponse, SchedulerClient, SchedulerRequestMessage, \ - SchedulerRequestResponse, SessionManager + NGENRequest, NGENRequestResponse, NgenCalibrationRequest, NgenCalibrationResponse, NWMRequest, NWMRequestResponse, \ + SchedulerClient, SchedulerRequestMessage, SchedulerRequestResponse, SessionManager from .maas_request_handlers import MaaSRequestHandler from typing import Optional @@ -17,9 +17,24 @@ class ModelExecRequestHandler(MaaSRequestHandler): - def __init__(self, session_manager: SessionManager, authorizer: Authorizer, scheduler_host: str, - scheduler_port: int, scheduler_ssl_dir: Path): - super().__init__(session_manager, authorizer, scheduler_host, scheduler_port, scheduler_ssl_dir) + def __init__(self, *args, **kwargs): + """ + + Parameters + ---------- + args + kwargs + + Other Parameters + ---------- + session_manager + authorizer + service_host + service_port + service_ssl_dir + + """ + super().__init__(*args, **kwargs) # TODO: implement properly self._default_required_access_type = None @@ -104,25 +119,47 @@ async def determine_required_access_types(self, request: ModelExecRequest, user) # FIXME: for now, just use the default type (which happens to be "everything") return self._default_required_access_type, + async def _preprocess_request(self, request: ModelExecRequest): + """ + Execute any appropriate preprocessing steps for this request before passing it to the scheduler. + + The default implementation does not perform any actions. + + Parameters + ---------- + request + + Raises + ------- + RuntimeError + """ + pass + async def handle_request(self, request: ModelExecRequest, **kwargs) -> ModelExecRequestResponse: """ - Handle the given request for a new NWM job execution and return the resulting response. + Handle the given request for a new job execution and return the resulting response. Parameters ---------- request: ModelExecRequest - A ``ModelExecRequest`` message instance with details of the job being requested. + A ::class:`ModelExecRequest` (or subclass) instance with details of the job being requested. Returns ------- response: ModelExecRequestResponse - An appropriate ``NWMRequestResponse`` object. + An appropriate response object derived from ::class:`ModelExecRequestResponse`. """ session, is_authorized, reason, msg = await self.get_authorized_session(request) if not is_authorized: return self._generate_request_response(exec_request=request, success=False, reason=reason.name, message=msg, scheduler_response=None) + try: + await self._preprocess_request(request=request) + except RuntimeError as e: + return self._generate_request_response(exec_request=request, success=False, reason='Preprocessing Failure', + message=str(e), scheduler_response=None) + # The context manager manages a SINGLE connection to the scheduler server # Adhoc calls to the scheduler can be made for this connection via the scheduler_client # These adhoc calls will use the SAME connection the context was initialized with @@ -146,5 +183,83 @@ async def handle_request(self, request: ModelExecRequest, **kwargs) -> ModelExec @property def service_client(self) -> SchedulerClient: if self._scheduler_client is None: - self._scheduler_client = SchedulerClient(self.service_url, self.service_ssl_dir) + self._scheduler_client = SchedulerClient(ssl_directory=self.service_ssl_dir, endpoint_uri=self.service_url) return self._scheduler_client + + +class NgenCalibrationRequestHandler(ModelExecRequestHandler): + """ + An extension of ::class:`ModelExecRequestHandler` specifically for Nextgen calibration requests. + """ + + def __init__(self, *args, **kwargs): + """ + + Parameters + ---------- + args + kwargs + + Other Parameters + ---------- + session_manager + authorizer + service_host + service_port + service_ssl_dir + + """ + super().__init__(*args, **kwargs) + + # TODO: implement properly (yes, manually doing this again here) + self._default_cal_required_access_type = None + + def _generate_request_response(self, exec_request: NgenCalibrationRequest, success: bool, reason: str, message: str, + scheduler_response: Optional[SchedulerRequestResponse]) -> NgenCalibrationResponse: + """ + Generate a response message of the appropriate type for the given model exec request message. + + Parameters + ---------- + exec_request : NgenCalibrationRequest + The originating ::class:`NgenCalibrationRequest` message requiring a response. + success : bool + Whether the request was successful. + reason : string + A summary of why the request was successful or not. + message : string + A more detailed description of why the request was successful or not. + scheduler_response : Optional[SchedulerRequestResponse] + Response message from the scheduler when processing the exec request resulted in a scheduler request. + Returns + ------- + NgenCalibrationResponse + A generated calibration response object. + """ + if not isinstance(exec_request, NgenCalibrationRequest): + msg = "{} cannot generate calibration response to unexpected {}" + raise RuntimeError(msg.format(self.__class__.__name__, exec_request.__class__.__name__)) + else: + return NgenCalibrationResponse(success=success, reason=reason, message=message, + scheduler_response=scheduler_response) + + async def determine_required_access_types(self, request: NgenCalibrationRequest, user) -> tuple: + """ + Determine what access is required for this request from this user to be accepted. + + Determine the necessary access types for which the given user needs to be authorized in order for the user to + be allowed to submit this request, in the context of the current state of the system. + + Parameters + ---------- + request + user + + Returns + ------- + A tuple of required access types required for authorization for the given request at this time. + """ + # TODO: implement; in particular, consider things like current job count for user, and whether different access + # types are required at different counts. + # FIXME: for now, just use the default type (which happens to be "everything") + return self._default_cal_required_access_type, diff --git a/python/lib/externalrequests/dmod/test/it_model_exec_request_handler.py b/python/lib/externalrequests/dmod/test/it_model_exec_request_handler.py index 532d8f1c4..b5994b202 100644 --- a/python/lib/externalrequests/dmod/test/it_model_exec_request_handler.py +++ b/python/lib/externalrequests/dmod/test/it_model_exec_request_handler.py @@ -212,9 +212,9 @@ def setUp(self) -> None: #self._handler = None self.handler = ModelExecRequestHandler(session_manager=self.session_manager, authorizer=self.success_authorizer, - scheduler_host=self.scheduler_host, - scheduler_port=self.scheduler_port, - scheduler_ssl_dir=self.scheduler_ssl_dir) + service_host=self.scheduler_host, + service_port=self.scheduler_port, + service_ssl_dir=self.scheduler_ssl_dir) def tearDown(self) -> None: pass diff --git a/python/lib/modeldata/dmod/modeldata/data/object_store_manager.py b/python/lib/modeldata/dmod/modeldata/data/object_store_manager.py index dffb5a8cf..551cb2aa1 100644 --- a/python/lib/modeldata/dmod/modeldata/data/object_store_manager.py +++ b/python/lib/modeldata/dmod/modeldata/data/object_store_manager.py @@ -10,7 +10,7 @@ from minio.api import ObjectWriteResult from minio.deleteobjects import DeleteObject from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple +from typing import Any, Dict, List, Optional, Set, Tuple from uuid import UUID @@ -59,8 +59,13 @@ def __init__(self, obj_store_host_str: str, access_key: Optional[str] = None, se # For any buckets that have the standard serialized object (i.e., were for datasets previously), reload them for bucket_name in self.list_buckets(): serialized_item = self._gen_dataset_serial_obj_name(bucket_name) - if serialized_item in [o.object_name for o in self._client.list_objects(bucket_name)]: + try: self.reload(reload_from=bucket_name, serialized_item=serialized_item) + except minio.error.S3Error as e: + # Continue with looping through buckets and initializing if we get this particular exception and + # error code, but otherwise pass through the exception + if e.code != "NoSuchKey": + raise e except Exception as e: self._errors.append(e) # TODO: consider if we should not re-throw this (which would likely force us to ensure users checked this) @@ -418,7 +423,32 @@ def delete_data(self, dataset_name: str, **kwargs) -> bool: self._errors.extend(error_list) return False - def get_data(self, dataset_name: str, item_name: str, **kwargs) -> bytes: + def get_file_stat(self, dataset_name: str, file_name, **kwargs) -> Dict[str, Any]: + """ + Get the meta information about the given file. + + Parameters + ---------- + dataset_name : str + The name of the dataset containing the file of interest. + file_name : str + The name of the file of interest. + kwargs + + Returns + ------- + dict + Meta information about the given file, in dictionary form. + """ + obj_stat = self._client.stat_object(dataset_name, file_name) + as_dict = dict() + as_dict["name"] = obj_stat.object_name + as_dict["size"] = obj_stat.size + # TODO: get more of this if worth it + return as_dict + + def get_data(self, dataset_name: str, item_name: str, offset: Optional[int] = None, length: Optional[int] = None, + **kwargs) -> bytes: """ Get data from this dataset. @@ -432,15 +462,12 @@ def get_data(self, dataset_name: str, item_name: str, **kwargs) -> bytes: The name of the dataset (i.e., bucket) from which to get data. item_name : str The name of the object from which to get data. - kwargs - Implementation-specific params for representing what data to get and how to get and deliver it. - - Keyword Args - ------- - offset : int + offset : Optional[int] Optional start byte position of object data. - length : int + length : Optional[int] Optional number of bytes of object data from offset. + kwargs + Implementation-specific params for representing what data to get and how to get and deliver it. Returns ------- @@ -450,8 +477,10 @@ def get_data(self, dataset_name: str, item_name: str, **kwargs) -> bytes: if item_name not in self.list_files(dataset_name): raise RuntimeError('Cannot get data for non-existing {} file in {} dataset'.format(item_name, dataset_name)) optional_params = dict() - for key in [k for k in self.data_chunking_params if k in kwargs]: - optional_params[key] = kwargs[key] + if offset is not None: + optional_params['offset'] = offset + if length is not None: + optional_params['length'] = length response_object = self._client.get_object(bucket_name=dataset_name, object_name=item_name, **optional_params) return response_object.data @@ -552,12 +581,14 @@ def reload(self, reload_from: str, serialized_item: Optional[str] = None) -> Dat if serialized_item is None: serialized_item = self._gen_dataset_serial_obj_name(reload_from) + response_obj = None try: response_obj = self._client.get_object(bucket_name=reload_from, object_name=serialized_item) response_data = json.loads(response_obj.data.decode()) finally: - response_obj.close() - response_obj.release_conn() + if response_obj is not None: + response_obj.close() + response_obj.release_conn() # If we can safely infer it, make sure the "type" key is set in cases when it is missing if len(self.supported_dataset_types) == 1 and Dataset._KEY_TYPE not in response_data: diff --git a/python/lib/modeldata/dmod/modeldata/hydrofabric/hydrofabric.py b/python/lib/modeldata/dmod/modeldata/hydrofabric/hydrofabric.py index 5f086684b..fc33fc612 100644 --- a/python/lib/modeldata/dmod/modeldata/hydrofabric/hydrofabric.py +++ b/python/lib/modeldata/dmod/modeldata/hydrofabric/hydrofabric.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any, Callable, Dict, FrozenSet, List, Optional, Set, Tuple, Union from ..subset import SubsetDefinition +from dmod.core.meta_data import DataDomain, DataFormat, DiscreteRestriction, StandardDatasetIndex class Hydrofabric(ABC): @@ -222,6 +223,23 @@ def _get_string_for_hashing(self) -> str: ",".join(sorted_nex_ids), ",".join(self._get_link_representations())) + @abstractmethod + def generate_domain(self, dataset_name: str) -> DataDomain: + """ + Automatically assemble an appropriate ::class:`DataDomain` for a ::class:`Dataset` containing this hydrofabric. + + Parameters + ---------- + dataset_name : str + A (potentially hypothetical) name for the dataset, which is used as the domain's ``DATA_ID``. + + Returns + ------- + DataDomain + An appropriate ::class:`DataDomain` for a ::class:`Dataset` containing this hydrofabric. + """ + pass + @abstractmethod def get_all_catchment_ids(self) -> Tuple[str, ...]: """ @@ -847,7 +865,7 @@ def __init__(self, base: GeoJsonHydrofabricReader, subset: SubsetDefinition): self._subset = subset -class MappedGraphHydrofabric(Hydrofabric): +class MappedGraphHydrofabric(Hydrofabric, ABC): """ Subtype of ::class:`Hydrofabric` created from an object graph stored as a dictionary. """ @@ -1171,6 +1189,27 @@ def __init__(self, geojson_reader: GeoJsonHydrofabricReader): """ super(GeoJsonHydrofabric, self).__init__(geojson_reader.hydrofabric_graph, geojson_reader.roots, geojson_reader) + def generate_domain(self, dataset_name: str) -> DataDomain: + """ + Automatically assemble an appropriate ::class:`DataDomain` for a ::class:`Dataset` containing this hydrofabric. + + Parameters + ---------- + dataset_name : str + A (potentially hypothetical) name for the dataset, which is used as the domain's ``DATA_ID``. + + Returns + ------- + DataDomain + An appropriate ::class:`DataDomain` for a ::class:`Dataset` containing this hydrofabric. + """ + d_restricts = [DiscreteRestriction(variable=StandardDatasetIndex.CATCHMENT_ID, + values=list(self.get_all_catchment_ids())), + DiscreteRestriction(variable=StandardDatasetIndex.DATA_ID, values=[dataset_name]), + DiscreteRestriction(variable=StandardDatasetIndex.HYDROFABRIC_ID, values=[self.uid])] + + return DataDomain(data_format=DataFormat.NGEN_GEOJSON_HYDROFABRIC, discrete_restrictions=d_restricts) + def get_subset_hydrofabric(self, subset: SubsetDefinition) -> 'GeoJsonHydrofabric': """ Derive a hydrofabric object from this one with only entities included in a given subset. diff --git a/python/lib/modeldata/dmod/modeldata/subset/subset_handler.py b/python/lib/modeldata/dmod/modeldata/subset/subset_handler.py index a751768f2..6f82e9ead 100644 --- a/python/lib/modeldata/dmod/modeldata/subset/subset_handler.py +++ b/python/lib/modeldata/dmod/modeldata/subset/subset_handler.py @@ -1,8 +1,11 @@ from abc import ABC, abstractmethod +import geopandas as gpd +import pandas as pd from hypy import Catchment, Nexus from queue import Queue from typing import Collection, Optional, Set, Tuple, Union from .subset_definition import SubsetDefinition +from pathlib import Path from ..hydrofabric import Hydrofabric, GeoJsonHydrofabricReader, GeoJsonHydrofabric @@ -137,10 +140,14 @@ def invalid_reason(self, subset: SubsetDefinition) -> Optional[str]: class SubsetHandler: @classmethod - def factory_create_from_geojson(cls, catchment_data, nexus_data, cross_walk, - validator: Optional[SubsetValidator] = None) -> 'SubsetHandler': - hydrofabric = GeoJsonHydrofabric(GeoJsonHydrofabricReader(catchment_data, nexus_data, cross_walk)) - return cls(hydrofabric=hydrofabric, validator=validator) + def factory_create_from_geojson(cls, catchment_data: Union[str, Path, gpd.GeoDataFrame], + nexus_data: Union[str, Path, gpd.GeoDataFrame], + cross_walk: Union[str, Path, pd.DataFrame], + validator: Optional[SubsetValidator] = None) -> 'GeoJsonBackedSubsetHandler': + #subtype = [sc for sc in cls.__subclasses__() if sc.__name__ == 'GeoJsonBackedSubsetHandler'][0] + #return subtype(catchment_data=catchment_data, nexus_data=nexus_data, cross_walk=cross_walk, validator=validator) + return GeoJsonBackedSubsetHandler(catchment_data=catchment_data, nexus_data=nexus_data, cross_walk=cross_walk, + validator=validator) def __init__(self, hydrofabric: Hydrofabric, validator: Optional[SubsetValidator] = None): """ @@ -324,3 +331,46 @@ def validate(self, subset: SubsetDefinition) -> Tuple[bool, Optional[str]]: description = self._validator.invalid_reason(subset) return description is None, description + +class GeoJsonBackedSubsetHandler(SubsetHandler): + def __init__(self, + catchment_data: Union[str, Path, gpd.GeoDataFrame], + nexus_data: Union[str, Path, gpd.GeoDataFrame], + cross_walk: Union[str, Path, pd.DataFrame], + validator: Optional[SubsetValidator] = None): + self._reader = GeoJsonHydrofabricReader(catchment_data, nexus_data, cross_walk) + super(GeoJsonBackedSubsetHandler, self).__init__(hydrofabric=GeoJsonHydrofabric(self._reader), + validator=validator) + + def get_geojson_for_subset(self, feature_type: str, subset: SubsetDefinition) -> dict: + feature_type = feature_type.strip().lower() + #if feature_type not in ('catchment', 'nexus', 'flowpath'): + if feature_type not in ('catchment', 'nexus'): + feature_type = 'catchment' + + if feature_type == 'catchment': + return self._reader.catchment_geodataframe.loc[subset.catchment_ids]._to_geo() + elif feature_type == 'nexus': + return self._reader.nexus_geodataframe.loc[subset.nexus_ids]._to_geo() + else: + msg = 'Unsupported geojson subset feature type {} passed to {}' + raise RuntimeError(msg.format(feature_type, self.__class__.__name__)) + + def get_geodataframe_for_bounds(self, feature_type: str, min_x: float, min_y: float, max_x: float, + max_y: float) -> gpd.GeoDataFrame: + if feature_type not in ('catchment', 'nexus'): + feature_type = 'catchment' + + if feature_type == 'catchment': + src_features = self._reader.catchment_geodataframe + elif feature_type == 'nexus': + src_features = self._reader.nexus_geodataframe + else: + msg = 'Unsupported geojson subset feature type {} passed to {}' + raise RuntimeError(msg.format(feature_type, self.__class__.__name__)) + + return src_features.cx[min_x:max_x, min_y:max_y] + + def get_geojson_for_bounds(self, feature_type: str, min_x: float, min_y: float, max_x: float, + max_y: float) -> dict: + return self.get_geodataframe_for_bounds(feature_type, min_x, min_y, max_x, max_y)._to_geo() diff --git a/python/lib/modeldata/dmod/test/test_simple_hydrofabric_subset.py b/python/lib/modeldata/dmod/test/test_simple_hydrofabric_subset.py index dfedce0c7..0e516600a 100644 --- a/python/lib/modeldata/dmod/test/test_simple_hydrofabric_subset.py +++ b/python/lib/modeldata/dmod/test/test_simple_hydrofabric_subset.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Optional, Union from ..modeldata.subset import SimpleHydrofabricSubset, SubsetHandler -from ..modeldata.hydrofabric import GeoJsonHydrofabricReader, MappedGraphHydrofabric +from ..modeldata.hydrofabric import GeoJsonHydrofabricReader, GeoJsonHydrofabric class TestSimpleHydrofabricSubset(unittest.TestCase): @@ -53,7 +53,7 @@ def setUp(self) -> None: geojson_reader = GeoJsonHydrofabricReader(catchment_data=catchment_geojson, nexus_data=nexus_geojson, cross_walk=crosswalk_json) - self.hydrofabric = MappedGraphHydrofabric(geojson_reader.hydrofabric_graph, geojson_reader.roots, geojson_reader) + self.hydrofabric = GeoJsonHydrofabric(geojson_reader) self.subset_handler = SubsetHandler(self.hydrofabric) self.subset_examples = list() diff --git a/python/lib/modeldata/dmod/test/test_subset_handler.py b/python/lib/modeldata/dmod/test/test_subset_handler.py index d2e87e05b..67caa52b1 100644 --- a/python/lib/modeldata/dmod/test/test_subset_handler.py +++ b/python/lib/modeldata/dmod/test/test_subset_handler.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Dict, Optional, Union from ..modeldata.subset import SubsetHandler +from ..modeldata.subset.subset_handler import GeoJsonBackedSubsetHandler class TestSubsetHandler(unittest.TestCase): @@ -73,13 +74,13 @@ def tearDown(self) -> None: pass # Test that the function can initialize a new subset handler via the GeoJSON factory method - def test_factory_create_from_geojson_1_a(self): + def test_geojson_init_1_a(self): ex_ind = 1 cf = str(self.hf_examples[ex_ind][self.CAT_KEY]) nf = str(self.hf_examples[ex_ind][self.NEX_KEY]) xf = str(self.hf_examples[ex_ind][self.CROSS_KEY]) - handler = SubsetHandler.factory_create_from_geojson(catchment_data=cf, nexus_data=nf, cross_walk=xf) + handler = GeoJsonBackedSubsetHandler(catchment_data=cf, nexus_data=nf, cross_walk=xf) self.assertIsInstance(handler, SubsetHandler) # Test that catchment can be retrieved by id @@ -91,7 +92,7 @@ def test_get_catchment_by_id_1_a(self): ex_cat_id = 'cat-67' - handler = SubsetHandler.factory_create_from_geojson(catchment_data=cf, nexus_data=nf, cross_walk=xf) + handler = GeoJsonBackedSubsetHandler(catchment_data=cf, nexus_data=nf, cross_walk=xf) catchment = handler.get_catchment_by_id(ex_cat_id) self.assertIsInstance(catchment, Catchment) @@ -105,7 +106,7 @@ def test_get_catchment_by_id_1_b(self): ex_cat_id = 'cat-67' - handler = SubsetHandler.factory_create_from_geojson(catchment_data=cf, nexus_data=nf, cross_walk=xf) + handler = GeoJsonBackedSubsetHandler(catchment_data=cf, nexus_data=nf, cross_walk=xf) catchment = handler.get_catchment_by_id(ex_cat_id) self.assertEqual(catchment.id, ex_cat_id) @@ -119,7 +120,7 @@ def test_get_catchment_by_id_2_a(self): ex_cat_id = 'cat-27' - handler = SubsetHandler.factory_create_from_geojson(catchment_data=cf, nexus_data=nf, cross_walk=xf) + handler = GeoJsonBackedSubsetHandler(catchment_data=cf, nexus_data=nf, cross_walk=xf) catchment = handler.get_catchment_by_id(ex_cat_id) self.assertIsInstance(catchment, Catchment) @@ -133,7 +134,7 @@ def test_get_catchment_by_id_2_b(self): ex_cat_id = 'cat-27' - handler = SubsetHandler.factory_create_from_geojson(catchment_data=cf, nexus_data=nf, cross_walk=xf) + handler = GeoJsonBackedSubsetHandler(catchment_data=cf, nexus_data=nf, cross_walk=xf) catchment = handler.get_catchment_by_id(ex_cat_id) self.assertEqual(catchment.id, ex_cat_id) diff --git a/python/lib/modeldata/setup.py b/python/lib/modeldata/setup.py index 7c389a0e7..0a3e64ac4 100644 --- a/python/lib/modeldata/setup.py +++ b/python/lib/modeldata/setup.py @@ -20,7 +20,7 @@ author_email='', url='', license='', - install_requires=['numpy>=1.20.1', 'pandas', 'geopandas', 'dmod-communication>=0.4.2', 'dmod-core>=0.3.0', 'minio', + install_requires=['numpy>=1.20.1', 'pandas', 'geopandas', 'dmod-communication>=0.9.1', 'dmod-core>=0.3.0', 'minio', 'aiohttp<=3.7.4', 'hypy@git+https://github.com/NOAA-OWP/hypy@master#egg=hypy&subdirectory=python'], packages=find_namespace_packages(exclude=['dmod.test', 'schemas', 'ssl', 'src']) ) diff --git a/python/lib/scheduler/dmod/scheduler/_version.py b/python/lib/scheduler/dmod/scheduler/_version.py index 1f0478037..f323a57be 100644 --- a/python/lib/scheduler/dmod/scheduler/_version.py +++ b/python/lib/scheduler/dmod/scheduler/_version.py @@ -1 +1 @@ -__version__ = '0.9.2' +__version__ = '0.11.0' diff --git a/python/lib/scheduler/dmod/scheduler/job/job.py b/python/lib/scheduler/dmod/scheduler/job/job.py index e7cf9b912..4d409dc54 100644 --- a/python/lib/scheduler/dmod/scheduler/job/job.py +++ b/python/lib/scheduler/dmod/scheduler/job/job.py @@ -946,7 +946,7 @@ def parse_serialized_job_id(cls, serialized_value: Optional[str], **kwargs): raise RuntimeError(msg) def __init__(self, cpu_count: int, memory_size: int, model_request: ExternalRequest, - allocation_paradigm: Union[str, AllocationParadigm], alloc_priority: int = 0): + allocation_paradigm: Union[str, AllocationParadigm], alloc_priority: int = 0, *args, **kwargs): self._cpu_count = cpu_count self._memory_size = memory_size self._model_request = model_request @@ -1329,17 +1329,18 @@ def factory_init_from_deserialized_json(cls, json_obj: dict): return None # Create the object initially from the request - new_obj = cls(job_request=request) + new_obj = cls(job_request=request, cpu_count=cpus, memory_size=memory, allocation_paradigm=paradigm, + alloc_priority=priority) # Then update its properties based on the deserialized values, as those are considered most correct # Use property setter for job id to handle string or UUID new_obj.job_id = job_id - new_obj._cpu_count = cpus - new_obj._memory_size = memory - new_obj._allocation_paradigm = paradigm - new_obj._allocation_priority = priority + #new_obj._cpu_count = cpus + #new_obj._memory_size = memory + #new_obj._allocation_paradigm = paradigm + #new_obj._allocation_priority = priority new_obj._rsa_key_pair = rsa_key_pair new_obj._status = status new_obj._allocations = allocations @@ -1351,12 +1352,43 @@ def factory_init_from_deserialized_json(cls, json_obj: dict): return new_obj - def __init__(self, job_request: SchedulerRequestMessage): + @classmethod + def factory_init_from_request(cls, job_request: SchedulerRequestMessage) -> 'RequestedJob': + """ + Factory init function to create an object from the parameters implied by the job request. + + Parameters + ---------- + job_request + + Returns + ------- + + """ + return cls(job_request=job_request, cpu_count=job_request.cpus, memory_size=job_request.memory, + allocation_paradigm=job_request.allocation_paradigm) + + def __init__(self, job_request: SchedulerRequestMessage, *args, **kwargs): + """ + Initialize this instance. + + Parameters + ---------- + job_request + args + kwargs + + Other Parameters + ---------- + cpu_count + memory_size + model_request + allocation_paradigm + alloc_priority + """ + super(RequestedJob, self).__init__(model_request=job_request.model_request, *args, **kwargs) self._originating_request = job_request - super().__init__(cpu_count=job_request.cpus, memory_size=job_request.memory, - model_request=job_request.model_request, - allocation_paradigm=job_request.allocation_paradigm) - self.data_requirements = self.model_request.data_requirements + self.data_requirements = job_request.model_request.data_requirements @property def model_request(self) -> ExternalRequest: diff --git a/python/lib/scheduler/dmod/scheduler/job/job_manager.py b/python/lib/scheduler/dmod/scheduler/job/job_manager.py index 34a76835f..91552885c 100644 --- a/python/lib/scheduler/dmod/scheduler/job/job_manager.py +++ b/python/lib/scheduler/dmod/scheduler/job/job_manager.py @@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Tuple, Union from uuid import UUID, uuid4 as random_uuid from dmod.core.execution import AllocationParadigm +from dmod.communication.maas_request.dmod_job_request import DmodJobRequest from .job import Job, JobExecPhase, JobExecStep, JobStatus, RequestedJob from .job_util import JobUtil, RedisBackedJobUtil from ..resources.resource_allocation import ResourceAllocation @@ -110,7 +111,7 @@ def build_prioritized_pending_allocation_queues(cls, jobs_eligible_for_allocate: pass @abstractmethod - def create_job(self, **kwargs) -> Job: + def create_job(self, request: DmodJobRequest, *args, **kwargs) -> Job: """ Create and return a new job object. @@ -123,8 +124,12 @@ def create_job(self, **kwargs) -> Job: Parameters ---------- + request : DmodJobRequest + The originating request for the job. + args + Other optional positional arguments kwargs - Other appropriate, implementation-specific keyed parameters supported for creating the job object. + Other optional keyword arguments. Returns ------- @@ -412,7 +417,7 @@ def _request_allocations_for_queue(self, jobs_priority_queue: List[Tuple[int, Re self.save_job(j) return allocated_successfully - def create_job(self, **kwargs) -> RequestedJob: + def create_job(self, request: SchedulerRequestMessage, *args, **kwargs) -> RequestedJob: """ Create and return a new job object that has been saved to the backend store. @@ -420,15 +425,17 @@ def create_job(self, **kwargs) -> RequestedJob: ::class:`SchedulerRequestMessage` as a parameter. This is in the ``request`` keyword arg. Parameters - ---------- - kwargs - Implementation-specific keyed parameters for creating appropriate job objects (see *Keyword Args* section). - - Keyword Args ------------ request : SchedulerRequestMessage The originating request for the job. - job_id : str, UUID, None + args + Other optional positional arguments + kwargs + Other optional keyword arguments. + + Other Parameters + ------------ + job_id : Union[str, UUID] Optional value to try use for the job's id, falling back to random if not present, invalid, or already used. Returns @@ -436,16 +443,19 @@ def create_job(self, **kwargs) -> RequestedJob: RequestedJob The newly created job object. """ - job_obj = RequestedJob(job_request=kwargs['request']) + job_obj = RequestedJob.factory_init_from_request(job_request=request) + # TODO: do some processing here or in the object init to build restrictions and constraints that make sense globally; + # i.e., make sure the requirements for forcings satisfy the necessary hydrofabric if the config doesn't include a specific subset explicitly + uuid_param = kwargs.get('job_id', random_uuid()) try: - job_uuid = kwargs['job_id'] if isinstance(kwargs['job_id'], UUID) else UUID(str(kwargs['job_id'])) - if not self._does_redis_key_exist(self._get_job_key_for_id(job_uuid)): - job_obj.job_id = job_uuid - else: - job_obj.job_id = random_uuid() + job_uuid = uuid_param if isinstance(uuid_param, UUID) else UUID(str(uuid_param)) except: - job_obj.job_id = random_uuid() + job_uuid = random_uuid() + + while self._does_redis_key_exist(self._get_job_key_for_id(job_uuid)): + job_uuid = random_uuid() + job_obj.job_id = job_uuid self.save_job(job_obj) return job_obj diff --git a/python/lib/scheduler/dmod/scheduler/scheduler.py b/python/lib/scheduler/dmod/scheduler/scheduler.py index 87f771bb3..94b24cfe8 100644 --- a/python/lib/scheduler/dmod/scheduler/scheduler.py +++ b/python/lib/scheduler/dmod/scheduler/scheduler.py @@ -2,7 +2,7 @@ import logging from requests.exceptions import ReadTimeout -from dmod.communication import MessageEventType, NGENRequest, NWMRequest +from dmod.communication import MessageEventType, NGENRequest, NWMRequest, NgenCalibrationRequest from dmod.core.exception import DmodRuntimeError from dmod.core.meta_data import DataCategory, DataFormat from os import getenv @@ -380,13 +380,13 @@ def _generate_docker_cmd_args(self, job: 'Job', worker_index: int) -> List[str]: https://docs.docker.com/engine/reference/builder/#understand-how-cmd-and-entrypoint-interact """ # TODO (later): handle non-model-exec jobs in the future - if job.model_request.event_type != MessageEventType.MODEL_EXEC_REQUEST: + if job.model_request.event_type != MessageEventType.MODEL_EXEC_REQUEST and job.model_request.event_type != MessageEventType.CALIBRATION_REQUEST: raise RuntimeError("Unsupported requested job event type {}; cannot generate Docker CMD arg values".format( job.model_request.get_message_event_type())) # TODO (later): have something more intelligent than class type to determine right entrypoint format and # values, but for now assume/require a "standard" image - if not (isinstance(job.model_request, NWMRequest) or isinstance(job.model_request, NGENRequest)): + if not (isinstance(job.model_request, NWMRequest) or isinstance(job.model_request, NGENRequest) or isinstance(job.model_request, NgenCalibrationRequest)): raise RuntimeError("Unexpected request type {}: cannot build Docker CMD arg list".format( job.model_request.__class__.__name__)) @@ -418,9 +418,20 @@ def _generate_docker_cmd_args(self, job: 'Job', worker_index: int) -> List[str]: docker_cmd_args.append(bmi_config_dataset_names[0]) # $9 is the name of the partition config dataset (which will imply a directory location) - partition_config_dataset_names = self._ds_names_helper(job, worker_index, DataCategory.CONFIG, max_count=1, - data_format=DataFormat.NGEN_PARTITION_CONFIG) - docker_cmd_args.append(partition_config_dataset_names[0]) + # TODO: this probably will eventually break things if $10 is added for calibration config dataset + # TODO: need to overhaul entrypoint for ngen and ngen-calibration images with flag-based args + if job.cpu_count > 1: + partition_config_dataset_names = self._ds_names_helper(job, worker_index, DataCategory.CONFIG, + max_count=1, + data_format=DataFormat.NGEN_PARTITION_CONFIG) + docker_cmd_args.append(partition_config_dataset_names[0]) + + # $10 is the name of the calibration config dataset (which will imply a directory location) + # TODO: this *might* need to be added depending on how we decide to handle calibration + # configs. meaning if they are datasets or not. + # calibration_config_dataset_names = self._ds_names_helper(job, worker_index, DataCategory.CONFIG, max_count=1, + # data_format=DataFormat.NGEN_CAL_CONFIG) + # docker_cmd_args.append(calibration_config_dataset_names[0]) # Also do a sanity check here to ensure there is at least one forcing dataset self._ds_names_helper(job, worker_index, DataCategory.FORCING) @@ -490,9 +501,17 @@ def determine_image_for_job(self, job: 'Job') -> str: str String name, including tag, of the appropriate Docker image for this job. """ + # For now, these are the only two requests supported + # NOTE: NgenCalibrationRequest needs to come first, because it is a subclass of NGENRequest. + # In the future, we should refactor this so this method doesn't need to know about this + # subclass relationship. + + # TODO: move registry name into environment variable or other more appropriate place + if isinstance(job.model_request, NgenCalibrationRequest): + return "127.0.0.1:5000/ngen-cal:latest" + if isinstance(job.model_request, NGENRequest): return "127.0.0.1:5000/ngen:latest" - # For now, this is the only thing supported else: msg = "Unable to determine correct scheduler image for job {} with request of {} type" raise DmodRuntimeError(msg.format(job.job_id, job.model_request.__class__.__name__)) diff --git a/python/services/dataservice/dmod/dataservice/__main__.py b/python/services/dataservice/dmod/dataservice/__main__.py index 282d2a00d..2d2d9a05a 100644 --- a/python/services/dataservice/dmod/dataservice/__main__.py +++ b/python/services/dataservice/dmod/dataservice/__main__.py @@ -157,6 +157,7 @@ def main(): # Setup other required async tasks service_manager.add_async_task(service_manager.manage_required_data_checks()) service_manager.add_async_task(service_manager.manage_data_provision()) + service_manager.add_async_task(service_manager.manage_hydrofabric_availability()) service_manager.run() diff --git a/python/services/dataservice/dmod/dataservice/_version.py b/python/services/dataservice/dmod/dataservice/_version.py index b703f5c96..83e147c62 100644 --- a/python/services/dataservice/dmod/dataservice/_version.py +++ b/python/services/dataservice/dmod/dataservice/_version.py @@ -1 +1 @@ -__version__ = '0.4.1' \ No newline at end of file +__version__ = '0.6.0' \ No newline at end of file diff --git a/python/services/dataservice/dmod/dataservice/service.py b/python/services/dataservice/dmod/dataservice/service.py index 04e1fa0af..4a53ba177 100644 --- a/python/services/dataservice/dmod/dataservice/service.py +++ b/python/services/dataservice/dmod/dataservice/service.py @@ -8,7 +8,8 @@ ManagementAction, WebSocketInterface from dmod.communication.dataset_management_message import DatasetQuery, QueryType from dmod.communication.data_transmit_message import DataTransmitMessage, DataTransmitResponse -from dmod.core.meta_data import DataCategory, DataDomain, DataRequirement, DiscreteRestriction, StandardDatasetIndex +from dmod.core.meta_data import DataCategory, DataDomain, DataFormat, DataRequirement, DiscreteRestriction, \ + StandardDatasetIndex from dmod.core.serializable import ResultIndicator, BasicResultIndicator from dmod.core.exception import DmodRuntimeError from dmod.modeldata.data.object_store_manager import Dataset, DatasetManager, DatasetType, ObjectStoreDatasetManager @@ -78,29 +79,74 @@ def _get_worker_required_datasets(self, job: Job) -> Set[str]: worker_required_datasets.add(fulfilled_by) return worker_required_datasets - def init_volumes(self, job: Job): + def _pre_vol_service_ds_name_processing(self, dataset_names: Set[str]) -> Set[str]: """ - Primary function for this type, creating needed dataset volumes on all hosts through a global Swarm service. + Run sanity checks and filtering of dataset names before creating service to create S3FS volumes for them. - Function creates a ``global`` Docker service using the appropriate image, where the image name and tag was - provided to the instance when it was created. It is expected that this image contains a script that can expect - standardized args and environment variables, and initialize the appropriate Docker volumes for the needed - datasets on each host. + Parameters + ---------- + dataset_names + + Returns + ------- + Set[str] + The dataset names from the initial params that don't already have S3FS volumes, and thus for which volumes + should be created. + """ + # Just immediately return if empty + if len(dataset_names) == 0: + return dataset_names + + # Return immediately if there are no outstanding dataset names that don't already have volumes + if len(dataset_names) == 0: + return dataset_names + + # Also, make sure these dataset names a valid for this kind of volume creation + known_datasets: Dict[str, Dataset] = self._service_manager.get_known_datasets() + + # First, bail if any dataset names don't correspond to a known dataset + unrecognized = [ds_name for ds_name in dataset_names if ds_name not in known_datasets.keys()] + if len(unrecognized) > 0: + msg = "Can't create {} volumes for unrecognized dataset names: ({})" + raise DmodRuntimeError(msg.format(self._docker_plugin_alias, ','.join(unrecognized))) + + # Also bail if any (outstanding) dataset names correspond to known dataset of type other than OBJECT_STORE + non_obj_store_dataset_names = [n for n, d in known_datasets.items() if + n in dataset_names and d.dataset_type != DatasetType.OBJECT_STORE] + if len(non_obj_store_dataset_names) > 0: + types = ['{}:{}'.format(n, known_datasets[n].dataset_type.name) for n in non_obj_store_dataset_names] + raise DmodRuntimeError('Attempting to pass non-object-store datasets to S3FS volume util: {}'.format(types)) + + return dataset_names + + def init_volume_create_service(self, dataset_names: Set[str], helper_service_name: str): + """ + Initialize and execute a Docker service that creates S3FS-based volumes for appropriate datasets. + + Function creates a ``global`` Docker service of the given name. The service containers initialize an associated + Docker volume for each object store dataset, using a custom S3FS storage driver. Because the service is + ``global``, it will run on each Swarm node, thereby creating the same set of desired volumes on each node. + + The image used for the created service containers is determined from "name" and "tag" values provided to this + instance when it was initialized. It is expected and assumed the image runs the appropriate entrypoint script + for initializing volumes as described above. Parameters ---------- - job : Job - The job for which volumes should be created, where each such volume correspond to an object store dataset - required by one of the job's workers. + dataset_names + helper_service_name """ - worker_required_datasets = self._get_worker_required_datasets(job) - if len(worker_required_datasets) == 0: + # Run sanity checks and filter out names for which there are already S3FS driver volumes + dataset_names = self._pre_vol_service_ds_name_processing(dataset_names=dataset_names) + + # Exit early if there are no actual names that need volumes created + if len(dataset_names) == 0: return secrets = [self.get_secret_reference(sn) for sn in self._obj_store_docker_secret_names] docker_cmd_args = ['--sentinel', self.sentinel_file, '--service-mode'] - docker_cmd_args.extend(worker_required_datasets) + docker_cmd_args.extend(dataset_names) env_vars = ['PLUGIN_ALIAS={}'.format(self._docker_plugin_alias)] if self._obj_store_url is not None: @@ -108,13 +154,18 @@ def init_volumes(self, job: Job): env_vars.append('S3FS_ACCESS_KEY={}'.format(self._obj_store_access)) env_vars.append('S3FS_SECRET_KEY={}'.format(self._obj_store_secret)) + # TODO: might need to add logic here to make sure there isn't an existing service with this name + # TODO: might also need to parameterize what happens if there is an existing service with this name + + service = None + try: service = self.docker_client.services.create(image=self.image, mode=ServiceMode(mode='global'), args=docker_cmd_args, cap_add=['SYS_ADMIN'], env=env_vars, - name='{}-{}'.format(self.DOCKER_SERVICE_NAME, job.job_id), + name=helper_service_name, # Make sure to re-mount the Docker socket inside the helper # service container that gets started mounts=['/var/run/docker.sock:/var/run/docker.sock:rw'], @@ -122,19 +173,64 @@ def init_volumes(self, job: Job): restart_policy=RestartPolicy(condition='none'), healthcheck=self.service_healthcheck, secrets=secrets) - time_sleep(5) - for tries in range(5): + time_sleep(1) + service_running = False + running_states = dict() + # TODO: still don't think this works exactly correctly, but should only be an issue when we create new vols + for tries in range(120): + # Note that this just reloads the true state of the service from Docker (it's not a service restart) service.reload() - if all([task['Status']['State'] == task['DesiredState'] for task in service.tasks()]): + # Keys: ['ID', 'Version', 'CreatedAt', 'UpdatedAt', 'Labels', 'Spec', 'ServiceID', 'NodeID', 'Status', + if all([t['Status']['State'] == t['DesiredState'] for t in service.tasks()]): + service_running = True + break + time_sleep(1) + if not service_running: + msg = 'Unable to get all service tasks to desired state for volume helper service {}' + raise RuntimeError(msg.format(helper_service_name)) + time_sleep(5) + service.reload() + for tries in range(10): + if any([t['Status']['State'] != 'COMPLETE' for t in service.tasks()]): + time_sleep(2) + service.reload() + else: break - time_sleep(3) - service.remove() except KeyError as e: logging.error('Failure checking service status: {}'.format(str(e))) - service.remove() except Exception as e: + # TODO: might need to expand the use of service.remove() to here (while also making sure an object exists) logging.error(e) raise e + finally: + if service is not None: + service.remove() + + def init_volumes(self, job: Job): + """ + Primary function for this type, creating needed dataset volumes on all hosts through a global Swarm service. + + Function creates a ``global`` Docker service using the appropriate image, where the image name and tag was + provided to the instance when it was created. It is expected that this image contains a script that can expect + standardized args and environment variables, and initialize the appropriate Docker volumes for the needed + datasets on each host. + + Parameters + ---------- + job : Job + The job for which volumes should be created, where each such volume correspond to an object store dataset + required by one of the job's workers. + """ + worker_required_datasets = self._get_worker_required_datasets(job) + if len(worker_required_datasets) > 0: + self.init_volume_create_service(dataset_names=worker_required_datasets, + helper_service_name='{}-{}'.format(self.DOCKER_SERVICE_NAME, job.job_id)) + + def remove_existing_service(self, service_name: str) -> bool: + for srv in self.docker_client.services.list(): + if srv.name == service_name: + return srv.remove() + return False @property def sentinel_file(self) -> str: @@ -213,6 +309,11 @@ def __init__(self, job_util: JobUtil, *args, **kwargs): """ Map of dataset class type (key), to service's dataset manager (value) for handling that dataset type. """ self._managers_by_uuid: Dict[UUID, DatasetManager] = {} """ Map of dataset managers keyed by the UUID of each. """ + + # For now at least, this is on by default + # TODO: (later) properly account for whether Docker is actually being used + self._is_docker_swarm_active = bool(kwargs.get('docker_swarm_active', True)) + self._obj_store_data_mgr = None self._obj_store_access_key = None self._obj_store_secret_key = None @@ -684,9 +785,22 @@ def _process_query(self, message: DatasetManagementMessage) -> DatasetManagement dataset_name = message.dataset_name list_of_files = self.get_known_datasets()[dataset_name].manager.list_files(dataset_name) return DatasetManagementResponse(action=message.management_action, success=True, dataset_name=dataset_name, - reason='Obtained {} Items List', + reason='Obtained {} Items List'.format(dataset_name), data={DatasetManagementResponse._DATA_KEY_QUERY_RESULTS: list_of_files}) - # TODO: (later) add support for messages with other query types also + elif query_type == QueryType.GET_SERIALIZED_FORM: + dataset_name = message.dataset_name + serialized_form = self.get_known_datasets()[dataset_name].to_dict() + return DatasetManagementResponse(action=message.management_action, success=True, dataset_name=dataset_name, + reason='Obtained serialized {} dataset'.format(dataset_name), + data={DatasetManagementResponse._DATA_KEY_QUERY_RESULTS: serialized_form}) + if query_type == QueryType.GET_DATASET_ITEMS: + dataset = self.get_known_datasets()[message.dataset_name] + mgr = dataset.manager + item_details: List[dict] = [mgr.get_file_stat(dataset.name, f) for f in mgr.list_files(dataset.name)] + return DatasetManagementResponse(action=message.management_action, success=True, dataset_name=dataset.name, + reason='Obtained file details for {} dataset'.format(dataset.name), + data={DatasetManagementResponse._DATA_KEY_QUERY_RESULTS: item_details}) + # TODO: (later) add support for messages with other query types also else: reason = 'Unsupported {} Query Type - {}'.format(DatasetQuery.__class__.__name__, query_type.name) return DatasetManagementResponse(action=message.management_action, success=False, reason=reason) @@ -751,28 +865,46 @@ def find_dataset_for_requirement(self, requirement: DataRequirement) -> Optional # Keep track of a few things for logging purposes datasets_count_match_category = 0 datasets_count_match_format = 0 + # Keep those of the right category but wrong format, in case one is needed and satisfactory + potentially_compatible_alternates: List[Dataset] = [] for name, dataset in self.get_known_datasets().items(): # Skip anything with the wrong category if dataset.category != requirement.category: continue - else: - datasets_count_match_category += 1 - # ... or a different format + # Keep track of how many of the right category there were for error purposes + datasets_count_match_category += 1 + + # Skip (for now at least) anything with a different format (though set aside if potentially compatible) if dataset.data_format != requirement.domain.data_format: + # Check if this format could fulfill + if DataFormat.can_format_fulfill(needed=requirement.domain.data_format, alternate=dataset.data_format): + # We will return to examine these if no dataset qualifies that has the exact format in requirement + potentially_compatible_alternates.append(dataset) continue - else: - datasets_count_match_format += 1 + # When a dataset matches, keep track for error counts, and then test to see if it qualifies + datasets_count_match_format += 1 + # TODO: need additional test of some kind for cases when the requirement specifies "any" (e.g., "any" + # catchment (from hydrofabric) in realization config, for finding a forcing dataset) + if dataset.data_domain.contains(requirement.domain): + return dataset + + # At this point, no datasets qualify against the exact domain (including format) of the requirement + # However, before failing, check if any have different, but compatible format, and otherwise qualify + for dataset in potentially_compatible_alternates: if dataset.data_domain.contains(requirement.domain): return dataset + # Before failing, treat the count of alternates as being of the same format, for error messaging purposes + datasets_count_match_format += len(potentially_compatible_alternates) + if datasets_count_match_category == 0: msg = "Could not fill requirement for '{}': no datasets for this category" logging.error(msg.format(requirement.category.name)) elif datasets_count_match_format == 0: - msg = "Could not fill requirement with '{}' format domain: no datasets found this format" + msg = "Could not fill requirement with '{}' format domain: no datasets found this (or compatible) format" logging.error(msg.format(requirement.domain.data_format.name)) else: msg = "Could not find dataset meeting all restrictions of requirement: {}" @@ -815,25 +947,29 @@ def init_object_store_dataset_manager(self, obj_store_host: str, access_key: str self._obj_store_access_key = access_key self._obj_store_secret_key = secret_key - s3fs_helper_networks = ['host'] + if self._is_docker_swarm_active: - s3fs_url_proto = os.getenv('S3FS_URL_PROTOCOL', 'http') - s3fs_url_host = os.getenv('S3FS_URL_HOST') - s3fs_url_port = os.getenv('S3FS_URL_PORT', '9000') - if s3fs_url_host is not None: - s3fs_helper_url = '{}://{}:{}/'.format(s3fs_url_proto, s3fs_url_host, s3fs_url_port) - else: - s3fs_helper_url = None - - self._docker_s3fs_helper = DockerS3FSPluginHelper(service_manager=self, - obj_store_access=self._obj_store_access_key, - obj_store_secret=self._obj_store_secret_key, - docker_image_name=os.getenv('S3FS_VOL_IMAGE_NAME', '127.0.0.1:5000/s3fs-volume-helper'), - docker_image_tag=os.getenv('S3FS_VOL_IMAGE_TAG', 'latest'), - docker_networks=s3fs_helper_networks, - docker_plugin_alias=os.getenv('S3FS_PLUGIN_ALIAS', 's3fs'), - obj_store_url=s3fs_helper_url, - *args, **kwargs) + s3fs_helper_networks = ['host'] + + s3fs_url_proto = os.getenv('S3FS_URL_PROTOCOL', 'http') + s3fs_url_host = os.getenv('S3FS_URL_HOST') + s3fs_url_port = os.getenv('S3FS_URL_PORT', '9000') + if s3fs_url_host is not None: + s3fs_helper_url = '{}://{}:{}/'.format(s3fs_url_proto, s3fs_url_host, s3fs_url_port) + else: + s3fs_helper_url = None + + self._docker_s3fs_helper = DockerS3FSPluginHelper(service_manager=self, + obj_store_access=self._obj_store_access_key, + obj_store_secret=self._obj_store_secret_key, + docker_image_name=os.getenv('S3FS_VOL_IMAGE_NAME', + '127.0.0.1:5000/s3fs-volume-helper'), + docker_image_tag=os.getenv('S3FS_VOL_IMAGE_TAG', + 'latest'), + docker_networks=s3fs_helper_networks, + docker_plugin_alias=os.getenv('S3FS_PLUGIN_ALIAS', + 's3fs'), + obj_store_url=s3fs_helper_url, *args, **kwargs) async def listener(self, websocket: WebSocketServerProtocol, path): """ @@ -864,7 +1000,9 @@ async def listener(self, websocket: WebSocketServerProtocol, path): response = await self._async_process_add_data(dataset_name=dest_dataset_name, dest_item_name=partial_item_name, message=inbound_message, - is_temp=True, + # if True, causes: S3 operation failed; code: InvalidRequest, message: Bucket is missing ObjectLockConfiguration, + # is_temp=True, + is_temp=False, manager=dataset_manager) partial_indx += 1 if inbound_message.is_last and response.success: @@ -890,6 +1028,14 @@ async def listener(self, websocket: WebSocketServerProtocol, path): partial_indx = 0 elif inbound_message.management_action == ManagementAction.CREATE: response = await self._async_process_dataset_create(message=inbound_message) + elif inbound_message.management_action == ManagementAction.REQUEST_DATA and inbound_message.blk_start is not None: + manager = self.get_known_datasets()[inbound_message.dataset_name].manager + raw_data = manager.get_data(dataset_name=inbound_message.dataset_name, + item_name=inbound_message.data_location, + offset=inbound_message.blk_start, length=inbound_message.blk_size) + response = DatasetManagementResponse(success=raw_data is not None, + action=inbound_message.management_action, + data=raw_data, reason="Data Block Retrieve Complete") elif inbound_message.management_action == ManagementAction.REQUEST_DATA: response = await self._async_process_data_request(message=inbound_message, websocket=websocket) elif inbound_message.management_action == ManagementAction.ADD_DATA: @@ -953,7 +1099,7 @@ async def manage_required_data_checks(self): logging.info("All required data for {} is available.".format(job.job_id)) # Before moving to next successful step, also create output datasets and requirement entries self._create_output_datasets(job) - job.status_step = JobExecStep.AWAITING_PARTITIONING + job.status_step = JobExecStep.AWAITING_PARTITIONING if job.cpu_count > 1 else JobExecStep.AWAITING_ALLOCATION else: logging.error("Some or all required data for {} is unprovideable.".format(job.job_id)) job.status_step = JobExecStep.DATA_UNPROVIDEABLE @@ -994,6 +1140,36 @@ async def manage_data_provision(self): self._job_util.unlock_active_jobs(lock_id) await asyncio.sleep(5) + async def manage_hydrofabric_availability(self): + """ + Async task method to make sure hydrofabric datasets are available to GUI. + """ + logging.debug("Starting task loop for managing hydrofabric dataset availability") + while True: + # TODO: (later) also filter out any already-subdivided hydrofabric datasets + datasets = [d for n, d in self.get_known_datasets().items() if d.category == DataCategory.HYDROFABRIC] + + if self._is_docker_swarm_active: + service_name = 'hydrofabric_avail_task' + self._docker_s3fs_helper.remove_existing_service(service_name=service_name) + + # TODO: (later) support doing this or similar for non-object-store datasets + obj_store_ds_names = set([d.name for d in datasets if d.dataset_type == DatasetType.OBJECT_STORE]) + # TODO: (later) add something to make sure the volumes get removed if/when a dataset is deleted + + # We may re-do even for volumes that already exist; this will ensure any new swarm nodes also get it + self._docker_s3fs_helper.init_volume_create_service(dataset_names=obj_store_ds_names, + helper_service_name=service_name) + # Right no others are supported, so warn + non_obj_store_names = [d.name for d in datasets if d.dataset_type != DatasetType.OBJECT_STORE] + if len(non_obj_store_names) > 0: + msg = "Unexpected Hydrofabric datasets that cannot be made available to services: {}" + logging.error(msg.format(non_obj_store_names)) + + # TODO: (later) support other availability paradigms + + await asyncio.sleep(60) + async def perform_checks_for_job(self, job: Job) -> bool: """ Check whether all requirements for this job can be fulfilled, setting the fulfillment associations. diff --git a/python/services/dataservice/setup.py b/python/services/dataservice/setup.py index b839e55c9..9b2879cb6 100644 --- a/python/services/dataservice/setup.py +++ b/python/services/dataservice/setup.py @@ -17,7 +17,7 @@ author_email='', url='', license='', - install_requires=['dmod-core>=0.3.0', 'dmod-communication>=0.7.1', 'dmod-scheduler>=0.7.0', 'dmod-modeldata>=0.9.0', + install_requires=['dmod-core>=0.5.0', 'dmod-communication>=0.12.0', 'dmod-scheduler>=0.11.0', 'dmod-modeldata>=0.9.0', 'redis'], packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src']) ) diff --git a/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/evaluation_listing.css b/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/evaluation_listing.css index 5a593a31c..394a40f6e 100644 --- a/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/evaluation_listing.css +++ b/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/evaluation_listing.css @@ -16,7 +16,7 @@ tr:hover { } th { - background-color: #33ade0; + background-color: #828a8f; color: white; padding: 10px; } diff --git a/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/ready_evaluation_async.css b/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/ready_evaluation_async.css index 78ce4dd3d..ba7b4841a 100644 --- a/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/ready_evaluation_async.css +++ b/python/services/evaluationservice/dmod/evaluationservice/evaluation_service/static/evaluation_service/css/ready_evaluation_async.css @@ -137,7 +137,7 @@ button:hover { } .popup-header { - background-color: #33ade0; + background-color: #828a8f; color: white; padding: 15px; } @@ -170,7 +170,7 @@ button:hover { } #search-header-row { - background-color: #33ade0; + background-color: #828a8f; color: white; } diff --git a/python/services/evaluationservice/dmod/evaluationservice/templates/base.html b/python/services/evaluationservice/dmod/evaluationservice/templates/base.html index bf1812da1..add2aa01b 100644 --- a/python/services/evaluationservice/dmod/evaluationservice/templates/base.html +++ b/python/services/evaluationservice/dmod/evaluationservice/templates/base.html @@ -15,7 +15,7 @@ background-size: 100px 100px; background-position-x: 10px; height: 125px; - background-color: #33ade0; + background-color: #828a8f; } #base-banner h1 { diff --git a/python/services/partitionerservice/dmod/partitionerservice/_version.py b/python/services/partitionerservice/dmod/partitionerservice/_version.py index fb13a3556..9dd16a345 100644 --- a/python/services/partitionerservice/dmod/partitionerservice/_version.py +++ b/python/services/partitionerservice/dmod/partitionerservice/_version.py @@ -1 +1 @@ -__version__ = '0.2.1' \ No newline at end of file +__version__ = '0.2.2' \ No newline at end of file diff --git a/python/services/partitionerservice/dmod/partitionerservice/service.py b/python/services/partitionerservice/dmod/partitionerservice/service.py index e57dd81cf..1fb244122 100644 --- a/python/services/partitionerservice/dmod/partitionerservice/service.py +++ b/python/services/partitionerservice/dmod/partitionerservice/service.py @@ -454,6 +454,12 @@ async def manage_job_partitioning(self): for job in [j for j in self._job_util.get_all_active_jobs() if j.status_step == JobExecStep.AWAITING_PARTITIONING]: + + if job.cpu_count == 1: + logging.warning("No need to partition job {} with only 1 CPU allocated".format(job.job_id)) + job.status_step = JobExecStep.AWAITING_ALLOCATION + continue + logging.info("Processing partitioning for active job {}".format(job.job_id)) try: # See if there is already an existing dataset to use for this diff --git a/python/services/requestservice/dmod/requestservice/__main__.py b/python/services/requestservice/dmod/requestservice/__main__.py index 08b9186b4..918ca7dea 100644 --- a/python/services/requestservice/dmod/requestservice/__main__.py +++ b/python/services/requestservice/dmod/requestservice/__main__.py @@ -69,16 +69,16 @@ def _handle_args(): default=None) parser.add_argument('--evaluation-service-host', help='Set the appropriate hostname for the evaluation service to connect with', - dest='data_service_host', - default='localhost') + dest='evaluation_service_host', + default='evaluation-service') parser.add_argument('--evaluation-service-port', help='Set the appropriate port value for the evaluation service to connect with', - dest='data_service_port', - default='3014') + dest='evaluation_service_port', + default='3016') parser.add_argument('--evaluation-service-ssl-dir', help='Set the ssl directory for evaluation service certs, ' 'if not the same as for the request handler', - dest='data_service_ssl_dir', + dest='evaluation_service_ssl_dir', default=None) parser.add_argument('--pycharm-remote-debug', help='Activate Pycharm remote debugging support', @@ -161,7 +161,8 @@ def main(): partitioner_port=args.partitioner_service_port, partitioner_ssl_dir=args.partitioner_service_ssl_dir, evaluation_service_host=args.evaluation_service_host, - evaluation_service_port=args.evaluation_service_port) + evaluation_service_port=args.evaluation_service_port, + evaluation_service_ssl_dir=args.evaluation_service_ssl_dir) handler.run() diff --git a/python/services/requestservice/dmod/requestservice/_version.py b/python/services/requestservice/dmod/requestservice/_version.py index f93e0653b..ccf9e6286 100644 --- a/python/services/requestservice/dmod/requestservice/_version.py +++ b/python/services/requestservice/dmod/requestservice/_version.py @@ -1 +1 @@ -__version__ = '0.5.2' \ No newline at end of file +__version__ = '0.8.0' \ No newline at end of file diff --git a/python/services/requestservice/dmod/requestservice/service.py b/python/services/requestservice/dmod/requestservice/service.py index f10a160f5..7e74d71f2 100755 --- a/python/services/requestservice/dmod/requestservice/service.py +++ b/python/services/requestservice/dmod/requestservice/service.py @@ -12,12 +12,13 @@ from dmod.access import DummyAuthUtil, RedisBackendSessionManager from dmod.communication import AbstractInitRequest, InvalidMessageResponse, MessageEventType, NGENRequest, NWMRequest, \ - PartitionRequest, WebSocketSessionsInterface, SessionInitMessage, SchedulerClient, UnsupportedMessageTypeResponse + NgenCalibrationRequest, PartitionRequest, WebSocketSessionsInterface, SessionInitMessage, SchedulerClient, \ + UnsupportedMessageTypeResponse from dmod.communication.dataset_management_message import MaaSDatasetManagementMessage -from dmod.externalrequests import AuthHandler, DatasetRequestHandler, ModelExecRequestHandler, PartitionRequestHandler -from dmod.externalrequests import EvaluationRequestHandler +from dmod.externalrequests import AuthHandler, DatasetRequestHandler, ModelExecRequestHandler, \ + NgenCalibrationRequestHandler, PartitionRequestHandler, EvaluationRequestHandler -from .alternate_service import EvaluationMessage +from .alternate_service import LaunchEvaluationMessage, OpenEvaluationMessage logging.basicConfig( level=logging.DEBUG, @@ -45,11 +46,13 @@ class RequestService(WebSocketSessionsInterface): """ _PARSEABLE_REQUEST_TYPES = [ SessionInitMessage, + NgenCalibrationRequest, NWMRequest, NGENRequest, MaaSDatasetManagementMessage, PartitionRequest, - EvaluationMessage + LaunchEvaluationMessage, + OpenEvaluationMessage ] """ Parseable request types, which are all authenticated ::class:`ExternalRequest` subtypes for this implementation. """ @@ -71,19 +74,19 @@ def __init__(self, listen_host='', scheduler_port: Union[str, int] = 3013, partitioner_host: str = 'partitioner-service', data_service_host: str = 'data-service', - evaluation_service_host: str = 'evaluation-service', + evaluation_service_host: str = 'evaluation-service', partitioner_port: Union[str, int] = 3014, data_service_port: Union[str, int] = 3015, - evaluation_service_port: Union[str, int] = 3016, + evaluation_service_port: Union[str, int] = 3016, ssl_dir=None, cert_pem=None, priv_key_pem=None, scheduler_ssl_dir=None, partitioner_ssl_dir=None, data_service_ssl_dir=None, - evaluation_service_ssl_dir=None, - **kwargs - ): + evaluation_service_ssl_dir=None, + **kwargs + ): super().__init__(listen_host=listen_host, port=port, ssl_dir=ssl_dir, cert_pem=cert_pem, priv_key_pem=priv_key_pem) self._session_manager: RedisBackendSessionManager = RedisBackendSessionManager() @@ -120,23 +123,30 @@ def __init__(self, listen_host='', self._model_exec_request_handler = ModelExecRequestHandler(session_manager=self._session_manager, authorizer=self.authorizer, - scheduler_host=scheduler_host, - scheduler_port=int(scheduler_port), - scheduler_ssl_dir=self.scheduler_client_ssl_dir) + service_host=scheduler_host, + service_port=int(scheduler_port), + service_ssl_dir=self.scheduler_client_ssl_dir) + + self._calibration_request_handler = NgenCalibrationRequestHandler(session_manager=self._session_manager, + authorizer=self.authorizer, + service_host=scheduler_host, + service_port=int(scheduler_port), + service_ssl_dir=self.scheduler_client_ssl_dir) self._partition_request_handler = PartitionRequestHandler(session_manager=self._session_manager, authorizer=self.authorizer, - partition_service_host=partitioner_host, - partition_service_port=int(partitioner_port), - partition_service_ssl_dir=self.partitioner_ssl_dir) + service_host=partitioner_host, + service_port=int(partitioner_port), + service_ssl_dir=self.partitioner_ssl_dir) self._data_service_handler = DatasetRequestHandler(session_manager=self._session_manager, authorizer=self.authorizer, - data_service_host=data_service_host, - data_service_port=int(data_service_port), - data_service_ssl_dir=self.data_service_ssl_dir) + service_host=data_service_host, + service_port=int(data_service_port), + service_ssl_dir=self.data_service_ssl_dir) self._evaluation_service_handler = EvaluationRequestHandler( + target_service='evaluation-service', service_host=evaluation_service_host, service_port=evaluation_service_port, ssl_directory=evaluation_service_ssl_dir @@ -159,7 +169,7 @@ async def listener(self, websocket: WebSocketServerProtocol, path): req_message = await self.deserialized_message(message_data=data) event_type = MessageEventType.INVALID if req_message is None else req_message.get_message_event_type() - if isinstance(req_message, EvaluationMessage): + if isinstance(req_message, LaunchEvaluationMessage) or isinstance(req_message, OpenEvaluationMessage): response = await self._evaluation_service_handler.handle_request( request=req_message, socket=websocket, @@ -195,6 +205,11 @@ async def listener(self, websocket: WebSocketServerProtocol, path): response = await self._partition_request_handler.handle_request(request=req_message) logging.debug('************************* Handled request response: {}'.format(str(response))) await websocket.send(str(response)) + elif event_type == MessageEventType.CALIBRATION_REQUEST: + logging.debug('Handled calibration request') + response = await self._calibration_request_handler.handle_request(request=req_message) + logging.debug('Processed calibration request; response was: {}'.format(str(response))) + await websocket.send(str(response)) # FIXME: add another message type for closing a session else: msg = 'Received valid ' + event_type.name + ' request, but listener does not currently support' diff --git a/python/services/requestservice/setup.py b/python/services/requestservice/setup.py index fafbc703d..9c8b2bec5 100644 --- a/python/services/requestservice/setup.py +++ b/python/services/requestservice/setup.py @@ -17,7 +17,7 @@ author_email='', url='', license='', - install_requires=['websockets', 'dmod-core>=0.1.0', 'dmod-communication>=0.7.0', 'dmod-access>=0.2.0', - 'dmod-externalrequests>=0.3.0'], + install_requires=['websockets', 'dmod-core>=0.3.0', 'dmod-communication>=0.12.0', 'dmod-access>=0.2.0', + 'dmod-externalrequests>=0.4.0'], packages=find_namespace_packages(exclude=['dmod.test', 'schemas', 'ssl', 'src']) ) diff --git a/python/services/schedulerservice/dmod/schedulerservice/_version.py b/python/services/schedulerservice/dmod/schedulerservice/_version.py index 7320e64e1..1658609d0 100644 --- a/python/services/schedulerservice/dmod/schedulerservice/_version.py +++ b/python/services/schedulerservice/dmod/schedulerservice/_version.py @@ -1 +1 @@ -__version__ = '0.7.1' \ No newline at end of file +__version__ = '0.9.0' \ No newline at end of file diff --git a/python/services/schedulerservice/setup.py b/python/services/schedulerservice/setup.py index df2b72527..c2b4f22f6 100644 --- a/python/services/schedulerservice/setup.py +++ b/python/services/schedulerservice/setup.py @@ -17,6 +17,6 @@ author_email='', url='', license='', - install_requires=['dmod-core>=0.2.0', 'dmod-communication>=0.8.0', 'dmod-scheduler>=0.9.0'], + install_requires=['dmod-core>=0.2.0', 'dmod-communication>=0.12.0', 'dmod-scheduler>=0.11.0'], packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src']) ) diff --git a/python/services/subsetservice/dmod/subsetservice/__main__.py b/python/services/subsetservice/dmod/subsetservice/__main__.py index dd058dde3..7454fcd58 100644 --- a/python/services/subsetservice/dmod/subsetservice/__main__.py +++ b/python/services/subsetservice/dmod/subsetservice/__main__.py @@ -1,16 +1,24 @@ import argparse import flask import json -from dmod.modeldata import SubsetDefinition, SubsetHandler +from dmod.modeldata import SubsetDefinition +from dmod.modeldata.subset.subset_handler import GeoJsonBackedSubsetHandler from pathlib import Path -from typing import Optional +from typing import Dict from .cli import Cli from . import name as package_name +import logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s,%(msecs)d %(levelname)s: %(message)s", + datefmt="%H:%M:%S" +) + app = flask.Flask(__name__) app.config["DEBUG"] = True -subset_handler: SubsetHandler = None +subset_handlers: Dict[str, GeoJsonBackedSubsetHandler] = dict() @app.route('/', methods=['GET']) @@ -22,7 +30,8 @@ def home(): @app.route('/subset/cat_id_valid', methods=['POST']) def is_catchment_id_valid(): record = json.loads(flask.request.data) - # Expect JSON with 'id' key and then a single string id + # Expect JSON with 'id' key, 'fabric_name' key for the hydrofabric, and then a single string id + subset_handler = subset_handlers[record['fabric_name']] is_recognized = subset_handler.is_catchment_recognized(record['id']) return flask.jsonify({'catchment_id': record['id'], 'valid': is_recognized}) @@ -31,16 +40,28 @@ def is_catchment_id_valid(): @app.route('/subset/for_cat_id', methods=['POST']) def get_subset_for_catchment_id(): record = json.loads(flask.request.data) - # Expect JSON with 'ids' key and then a list of ids + subset_handler = subset_handlers[record['fabric_name']] + # Expect JSON with 'ids' key, 'fabric_name' key for the hydrofabric, and then a list of ids subset = subset_handler.get_subset_for(record['ids']) return flask.jsonify(subset.to_dict()) +@app.route('/subset/bounds', methods=['POST']) +def get_subset_hydrofabric_for_bounds(): + # min_x, min_y, max_x, max_y + record = json.loads(flask.request.data) + subset_handler = subset_handlers[record['fabric_name']] + features = subset_handler.get_geojson_for_bounds(record['feature_type'], record['min_x'], record['min_y'], + record['max_x'], record['max_y']) + return flask.jsonify(features) + + # A route to get a subset the goes upstream from one or more catchments specified by their ids @app.route('/subset/upstream', methods=['POST']) def get_upstream_subset(): record = json.loads(flask.request.data) - # Expect JSON with 'ids' key and then a list of ids + # Expect JSON with 'ids' key, 'fabric_name' key for the hydrofabric, and then a list of ids + subset_handler = subset_handlers[record['fabric_name']] # Potentially a limit key can also be provided limit = None if 'limit' in record: @@ -53,7 +74,7 @@ def get_upstream_subset(): return flask.jsonify(subset.to_dict()) -def _validate_subset(json_data): +def _validate_subset(subset_handler, json_data): subset = SubsetDefinition.factory_init_from_deserialized_json(json_data) if subset is None: return flask.jsonify({'valid': False, 'reason': 'Could not deserialize to subset definition object'}) @@ -64,15 +85,17 @@ def _validate_subset(json_data): @app.route('/subset/validate', methods=['POST']) def validate_subset(): record = json.loads(flask.request.data) - return _validate_subset(json_data=record['subset'] if 'subset' in record else record) + return _validate_subset(subset_handler=subset_handlers[record['fabric_name']], + json_data=record['subset'] if 'subset' in record else record) @app.route('/subset/validate_file', methods=['POST']) def validate_subset_file(): uploaded_file = flask.request.files['file'] + record = json.loads(flask.request.data) if uploaded_file.filename == '': return flask.jsonify({'valid': False, 'reason': 'Invalid file or filename provided to validation routine'}) - return _validate_subset(json_data=json.load(uploaded_file)) + return _validate_subset(subset_handler=subset_handlers[record['fabric_name']], json_data=json.load(uploaded_file)) def _handle_args(): @@ -141,6 +164,23 @@ def _handle_args(): '-I', help="When running CLI operation to create subdivided hydrofabric, only write files for this partition index.", dest='partition_index') + parser.add_argument('--pycharm-remote-debug', + help='Activate Pycharm remote debugging support', + dest='pycharm_debug', + action='store_true') + parser.add_argument('--pycharm-remote-debug-egg', + help='Set path to .egg file for Python remote debugger util', + dest='remote_debug_egg_path', + default='/pydevd-pycharm.egg') + parser.add_argument('--remote-debug-host', + help='Set remote debug host to connect back to debugger', + dest='remote_debug_host', + default='host.docker.internal') + parser.add_argument('--remote-debug-port', + help='Set remote debug port to connect back to debugger', + dest='remote_debug_port', + type=int, + default=55874) parser.prog = package_name return parser.parse_args() @@ -153,31 +193,117 @@ def exec_cli_op(cli, args) -> bool: output_file_name=args.output_file) +def _is_hydrofabric_dir(directory: Path, cat_file_name: str, nexus_file_name: str) -> bool: + catchment_data_path = directory.joinpath(cat_file_name) + nexus_data_path = directory.joinpath(nexus_file_name) + return directory.is_dir() and catchment_data_path.is_file() and nexus_data_path.is_file() + + +def _determine_xwalk(hy_dir: Path, first_name: str) -> Path: + # Do some extra checking for crosswalk for alternative path/suffix + # TODO: this needs to be improved somehow + crosswalk_path: Path = hy_dir.joinpath(first_name) + if crosswalk_path.exists(): + return crosswalk_path + else: + alt_xwalk = crosswalk_path.with_suffix('.csv') + if not alt_xwalk.exists(): + msg = "Required crosswalk file to load hydrofabric not found at {} or {}" + logging.error(msg.format(crosswalk_path, alt_xwalk)) + raise RuntimeError(msg.format(crosswalk_path, alt_xwalk)) + else: + return alt_xwalk + + +def _load_subset_handler(hy_dir: Path, catchment_file_name: str, nexus_file_name: str, crosswalk_name: str) -> GeoJsonBackedSubsetHandler: + # TODO: add more intelligence to file base names and detection + #logging.info("Loading hydrofabric files from {}".format(args.files_directory)) + logging.info("Loading hydrofabric files from {}".format(hy_dir)) + + #catchment_data_path = args.files_directory.joinpath(args.catchment_data_file) + #nexus_data_path = args.files_directory.joinpath(args.nexus_data_file) + catchment_data_path = hy_dir.joinpath(catchment_file_name) + nexus_data_path = hy_dir.joinpath(nexus_file_name) + + try: + crosswalk_path: Path = _determine_xwalk(hy_dir=hy_dir, first_name=crosswalk_name) + except RuntimeError as e: + msg = "Subset service start failed: {}".format(str(e)) + logging.error(msg) + raise RuntimeError(msg) + + subset_handler = GeoJsonBackedSubsetHandler(catchment_data=catchment_data_path, nexus_data=nexus_data_path, + cross_walk=crosswalk_path) + logging.info("{} hydrofabric loaded into subset handler".format(hy_dir.name)) + return subset_handler + + def main(): - global subset_handler + global subset_handlers args = _handle_args() + if args.pycharm_debug: + logging.info("Preparing remote debugging connection for subset service.") + if args.remote_debug_egg_path == '': + print('Error: set to debug with Pycharm, but no path to remote debugger egg file provided') + exit(1) + if not Path(args.remote_debug_egg_path).exists(): + print('Error: no file at given path to remote debugger egg file "{}"'.format(args.remote_debug_egg_path)) + exit(1) + import sys + sys.path.append(args.remote_debug_egg_path) + import pydevd_pycharm + try: + pydevd_pycharm.settrace(args.remote_debug_host, port=args.remote_debug_port, stdoutToServer=True, + stderrToServer=True) + except Exception as error: + msg = 'Warning: could not set debugging trace to {} on {} due to {} - {}' + print(msg.format(args.remote_debug_host, args.remote_debug_port, error.__class__.__name__, str(error))) + else: + logging.info("Skipping subset service remote debugging setup.") + # TODO: put warning in about not trying multiple CLI operations at once # TODO: try to split off functionality so that Flask stuff (though declared globally) isn't started for CLI ops if not args.files_directory.is_dir(): - print("Error: given param '{}' for files directory is not an existing directory".format(args.files_directory)) + logging.error("Given param '{}' for files directory is not an existing directory".format(args.files_directory)) - catchment_data_path = args.files_directory.joinpath(args.catchment_data_file) - nexus_data_path = args.files_directory.joinpath(args.nexus_data_file) - crosswalk_path = args.files_directory.joinpath(args.crosswalk_file) + running_cli = args.partition_file or args.do_simple_subset or args.do_upstream_subset - subset_handler = SubsetHandler.factory_create_from_geojson(catchment_data=catchment_data_path, - nexus_data=nexus_data_path, - cross_walk=crosswalk_path) + subdirs = [d for d in args.files_directory.glob('*') if d.is_dir()] - if args.partition_file or args.do_simple_subset or args.do_upstream_subset: - cli = Cli(catchment_geojson=catchment_data_path, nexus_geojson=nexus_data_path, crosswalk_json=crosswalk_path, - partition_file_str=args.partition_file, subset_handler=subset_handler) - result = exec_cli_op(cli, args) + if len(subdirs) == 0 and _is_hydrofabric_dir(directory=args.files_directory, cat_file_name=args.catchment_data_file, + nexus_file_name=args.nexus_data_file): + sub_handler = _load_subset_handler(hy_dir=args.files_directory, catchment_file_name=args.catchment_data_file, + nexus_file_name=args.nexus_data_file, crosswalk_name=args.crosswalk_file) + subset_handlers[args.files_directory.name] = sub_handler + if running_cli: + cli = Cli(catchment_geojson=args.files_directory.joinpath(args.catchment_data_file), + nexus_geojson=args.files_directory.joinpath(args.catchment_data_file), + crosswalk_json=_determine_xwalk(hy_dir=args.files_directory, first_name=args.crosswalk_file), + partition_file_str=args.partition_file, subset_handler=sub_handler) + else: + cli = None + else: + cli = None + for hy_dir in subdirs: + if _is_hydrofabric_dir(hy_dir, args.catchment_data_file, args.nexus_data_file): + subset_handlers[hy_dir.name] = _load_subset_handler(hy_dir=hy_dir, + catchment_file_name=args.catchment_data_file, + nexus_file_name=args.nexus_data_file, + crosswalk_name=args.crosswalk_file) + else: + logging.info("Skipping subdirectory {} without hydrofabric data files from".format(hy_dir)) + + if running_cli and cli is None: + logging.error('Cannot run subset CLI functionality without valid directory containing a single hydrofabric') + exit(1) + elif running_cli and cli is not None: + result = exec_cli_op(cli, args) else: + logging.info("Starting app API service on port {}".format(args.port)) app.run(host=args.host, port=args.port) result = True diff --git a/python/services/subsetservice/dmod/subsetservice/cli.py b/python/services/subsetservice/dmod/subsetservice/cli.py index 5c4434125..7216feb2a 100644 --- a/python/services/subsetservice/dmod/subsetservice/cli.py +++ b/python/services/subsetservice/dmod/subsetservice/cli.py @@ -1,6 +1,7 @@ import geopandas as gpd import json from dmod.modeldata import SubsetHandler +from dmod.modeldata.subset.subset_handler import GeoJsonBackedSubsetHandler from pathlib import Path from typing import List, Optional, Union @@ -30,9 +31,8 @@ def __init__(self, self._partitions_file_str = partition_file_str self._partitions_file = None if subset_handler is None: - self.handler = SubsetHandler.factory_create_from_geojson(catchment_data=catchment_geojson, - nexus_data=nexus_geojson, - cross_walk=crosswalk_json) + self.handler = GeoJsonBackedSubsetHandler(catchment_data=catchment_geojson, nexus_data=nexus_geojson, + cross_walk=crosswalk_json) else: self.handler = subset_handler diff --git a/python/services/subsetservice/setup.py b/python/services/subsetservice/setup.py index 405a22ab7..523afaa32 100644 --- a/python/services/subsetservice/setup.py +++ b/python/services/subsetservice/setup.py @@ -17,6 +17,6 @@ author_email='', url='', license='', - install_requires=['flask', 'dmod-core>=0.1.0', 'dmod-modeldata>=0.5.0'], + install_requires=['flask', 'dmod-core>=0.3.0', 'dmod-modeldata>=0.9.1'], packages=find_namespace_packages(exclude=['dmod.test', 'deprecated', 'conf', 'schemas', 'ssl', 'src']) ) diff --git a/scripts/init_object_store_dataset_volumes.sh b/scripts/init_object_store_dataset_volumes.sh index 156a53646..284bdea50 100755 --- a/scripts/init_object_store_dataset_volumes.sh +++ b/scripts/init_object_store_dataset_volumes.sh @@ -167,7 +167,7 @@ while [ ${#} -gt 0 ]; do DO_REMOVE_ALL='true' ;; --sentinel|-S) - SENTINEL="/tmp/${2:?}" + SENTINEL="${2:?}" shift ;; --service-mode)