diff --git a/.gitignore b/.gitignore index 564452318..33fe087ef 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,17 @@ .vscode .venv .ruff_cache +.mypy_cache venv **__pycache__ **/venv +*.pyc +.pyc + +# Ignore setuptools metadata +*.egg-info/ +*.egg-info +**/*.egg-info/ # keys and certificates *.pem @@ -37,6 +45,7 @@ security/templates/** docs/build/* # Ignore all .env files at any level +.env *.env **/*.env !*.env.template diff --git a/deploy/database.env b/deploy/database.env index d4ad0d7fc..478e13c66 100644 --- a/deploy/database.env +++ b/deploy/database.env @@ -1,9 +1,13 @@ -# production db name -POSTGRES_DATABANK_DB=cogstack - POSTGRES_DB_MAX_CONNECTIONS=100 # Prefix of file names to load the DB schema for in /services/cogstack-db/(pgsql/mssql)/schemas/ folder POSTGRES_DB_SCHEMA_PREFIX="cogstack_db" -POSTGRES_SHM_SIZE="1g" +# production db name +DATABASE_DB_NAME=cogstack + +DATABASE_DOCKER_SHM_SIZE=1g + +DATABASE_DOCKER_CPU_MIN=1 +DATABASE_DOCKER_CPU_MAX=1 +DATABASE_DOCKER_RAM=1g diff --git a/deploy/elasticsearch.env b/deploy/elasticsearch.env index b084dba38..387409be5 100644 --- a/deploy/elasticsearch.env +++ b/deploy/elasticsearch.env @@ -9,10 +9,10 @@ ELASTICSEARCH_VERSION=opensearch # possible values : # - elasticsearch : docker.elastic.co/elasticsearch/elasticsearch:8.18.2 # - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-elasticsearch:latest -# - opensearch : opensearchproject/opensearch:3.2.0 +# - opensearch : opensearchproject/opensearch:3.3.0 # the custom cogstack image is always based on the last image of ES native -ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.2.0 +ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.3.0 ELASTICSEARCH_LOG_LEVEL=INFO @@ -88,9 +88,14 @@ ELASTICSEARCH_BACKUP_PARTITION_CONFIG=../data/es_snapshot_backups/config_backup ELASTICSEARCH_SECURITY_DIR=../security/certificates/elastic/ # MEMORY CONFIG -ELASTICSEARCH_JAVA_OPTS="-Xms2048m -Xmx2048m -Des.failure_store_feature_flag_enabled=true" +ELASTICSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Des.failure_store_feature_flag_enabled=true" + +ELASTICSEARCH_DOCKER_CPU_MIN=1 +ELASTICSEARCH_DOCKER_CPU_MAX=1 +ELASTICSEARCH_DOCKER_RAM=1g + +ELASTICSEARCH_DOCKER_SHM_SIZE=512m -ELASTICSEARCH_SHM_SIZE="1g" ELASTICSEARCH_DOCKER_LOG_SIZE_PER_FILE="1000m" ELASTICSEARCH_DOCKER_LOG_NUM_FILES=10 @@ -140,9 +145,6 @@ ELASTICSEARCH_HOSTS='["https://elasticsearch-1:9200","https://elasticsearch-2:92 KIBANA_HOST="https://kibana:5601" -KIBANA_SERVER_NAME="cogstack-kibana" - - ########################################################################## KIBANA Env vars ########################################################################### # NOTE: some variables from the Elasticsearch section are used # - ${ELASTICSEARCH_VERSION} is used for certificate paths, as well as kibana.yml config path. @@ -158,15 +160,15 @@ KIBANA_VERSION=opensearch-dashboards # - kibana # - opensearch_dashboards # make note of the underscore... -KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards +KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards # possible values: # - elasticsearch : docker.elastic.co/kibana/kibana:8.18.2 # - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-kibana:latest -# - opensearch : opensearchproject/opensearch-dashboards:3.2.0 +# - opensearch : opensearchproject/opensearch-dashboards:3.3.0 # the custom cogstack image is always based on the last image of ES native -ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.2.0 +ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.3.0 KIBANA_SERVER_NAME="cogstack-kibana" KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601" @@ -174,7 +176,11 @@ KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601" KIBANA_SERVER_HOST="0.0.0.0" KIBANA_SERVER_OUTPUT_PORT=5601 -KIBANA_SHM_SIZE="1g" +KIBANA_DOCKER_SHM_SIZE=512m +KIBANA_DOCKER_CPU_MIN=1 +KIBANA_DOCKER_CPU_MAX=1 +KIBANA_DOCKER_RAM=1g + # this is used in Kibana # it needs to be generated via the API @@ -201,6 +207,10 @@ ELASTICSEARCH_XPACK_SECURITY_REPORTING_ENCRYPTION_KEY="e0Y1gTxHWOopIWMTtpjQsDS6K METRICBEAT_IMAGE="docker.elastic.co/beats/metricbeat:8.18.2" +METRICBEAT_DOCKER_SHM=512m +METRICBEAT_DOCKER_CPU_MIN=1 +METRICBEAT_DOCKER_CPU_MAX=1 +METRICBEAT_DOCKER_RAM=1g ########################################################################## FILEBEAT Env vars ########################################################################### @@ -213,3 +223,9 @@ FILEBEAT_STARTUP_COMMAND="-e --strict.perms=false" FILEBEAT_HOST="https://elasticsearch-1:9200" FILEBEAT_IMAGE="docker.elastic.co/beats/filebeat:8.18.2" + + +FILEBEAT_DOCKER_SHM=512m +FILEBEAT_DOCKER_CPU_MIN=1 +FILEBEAT_DOCKER_CPU_MAX=1 +FILEBEAT_DOCKER_RAM=1g diff --git a/deploy/export_env_vars.sh b/deploy/export_env_vars.sh index ea8266095..58b446543 100755 --- a/deploy/export_env_vars.sh +++ b/deploy/export_env_vars.sh @@ -3,12 +3,15 @@ # Enable strict mode (without -e to avoid exit-on-error) set -uo pipefail +# Support being sourced in shells where BASH_SOURCE is unset (e.g. zsh) +SCRIPT_SOURCE="${BASH_SOURCE[0]-$0}" +SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_SOURCE")" && pwd)" +SCRIPT_NAME="$(basename "$SCRIPT_SOURCE")" -echo "🔧 Running $(basename "${BASH_SOURCE[0]}")..." +echo "🔧 Running $SCRIPT_NAME..." set -a -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEPLOY_DIR="$SCRIPT_DIR" SECURITY_DIR="$SCRIPT_DIR/../security/env" SERVICES_DIR="$SCRIPT_DIR/../services" @@ -38,6 +41,18 @@ env_files=( "$SERVICES_DIR/cogstack-nlp/medcat-service/env/medcat.env" ) +LINT_SCRIPT="$SCRIPT_DIR/../nifi/user_scripts/utils/lint_env.py" + +if [ -x "$LINT_SCRIPT" ]; then + echo "🔍 Validating env files..." + if ! python3 "$LINT_SCRIPT" "${env_files[@]}"; then + echo "❌ Env validation failed. Fix the errors above before continuing." + exit 1 + fi +else + echo "⚠️ Skipping env validation; $LINT_SCRIPT not found or not executable." +fi + for env_file in "${env_files[@]}"; do if [ -f "$env_file" ]; then echo "✅ Sourcing $env_file" @@ -56,4 +71,4 @@ set +a # Restore safe defaults for interactive/dev shell set +u -set +o pipefail \ No newline at end of file +set +o pipefail diff --git a/deploy/gitea.env b/deploy/gitea.env index 0009d5759..e2ef85779 100644 --- a/deploy/gitea.env +++ b/deploy/gitea.env @@ -23,3 +23,8 @@ GITEA_LOCAL_PUB_KEY_PATH="$GITEA_LOCAL_KEY_PATH.pub" GITEA_LOCAL_KEY_TITLE="gitea-cogstack-$(hostname)-$(date +%s)" GITEA_DEFAULT_MAIN_REMOTE_NAME="cogstack-gitea" + +GITEA_DOCKER_SHM_SIZE=512m +GITEA_DOCKER_CPU_MIN=1 +GITEA_DOCKER_CPU_MAX=1 +GITEA_DOCKER_RAM=1g diff --git a/deploy/nginx.env b/deploy/nginx.env index aae2c825d..a08762ca2 100644 --- a/deploy/nginx.env +++ b/deploy/nginx.env @@ -1,3 +1,9 @@ NGINX_KIBANA_HOST=kibana NGINX_KIBANA_PROXY_PORT=5601 NGINX_ES_NODE_SOURCE_INSTANCE_NAME="elasticsearch-1" + + +NGINX_SHM_SIZE=1g +NGINX_DOCKER_CPU_MIN=1 +NGINX_DOCKER_CPU_MAX=1 +NGINX_DOCKER_RAM=1g diff --git a/deploy/nifi.env b/deploy/nifi.env index 35826b8da..fd7e83062 100644 --- a/deploy/nifi.env +++ b/deploy/nifi.env @@ -1,3 +1,29 @@ + + +############################################################################################################################## +# IMPORTANT SETTINGS FOR DEPLOYMENTS RESOURCE SCOPED +############################################################################################################################## +NIFI_JVM_OPTS="-XX:+UseG1GC -XX:MaxGCPauseMillis=250 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom" +NIFI_JVM_HEAP_INIT=768m +NIFI_JVM_HEAP_MAX=1g + + +NIFI_DOCKER_SHM_SIZE=1g +NIFI_DOCKER_REGISTRY_SHM_SIZE=1g + +NIFI_DOCKER_CPU_MIN=1 +NIFI_DOCKER_CPU_MAX=1 +NIFI_DOCKER_RAM=1g + +NIFI_REGISTRY_DOCKER_CPU_MIN=1 +NIFI_REGISTRY_DOCKER_CPU_MAX=1 +NIFI_REGISTRY_DOCKER_RAM=1g + +NIFI_DOCKER_LOG_SIZE_PER_FILE="250m" +NIFI_DOCKER_LOG_NUM_FILES=10 + +############################################################################################################################## + # NiFi NIFI_ENV_FILE="./nifi.env" NIFI_SECURITY_DIR="../security/certificates/nifi/" @@ -6,11 +32,6 @@ NIFI_DATA_PATH="../data/" NIFI_VERSION="2.6.0" NIFI_TOOLKIT_VERSION=$NIFI_VERSION -NIFI_SHM_SIZE="1g" -NIFI_REGISTRY_SHM_SIZE="1g" -NIFI_DOCKER_LOG_SIZE_PER_FILE="250m" -NIFI_DOCKER_LOG_NUM_FILES=10 - #### Port and network settings NIFI_WEB_PROXY_CONTEXT_PATH="/nifi" diff --git a/deploy/services-dev.yml b/deploy/services-dev.yml index 24d944bd3..2496c103a 100644 --- a/deploy/services-dev.yml +++ b/deploy/services-dev.yml @@ -1,3 +1,67 @@ +#---------------------------------------------------------------------------# +# Common snippets / anchors # +#---------------------------------------------------------------------------# +x-nifi-logging-common: &nifi-logging-common + driver: "json-file" + options: + max-size: ${NIFI_DOCKER_LOG_SIZE_PER_FILE:-250m} + max-file: ${NIFI_DOCKER_LOG_NUM_FILES:-10} + +x-logging-common: &logging-common + driver: "json-file" + options: + max-size: ${DOCKER_LOG_SIZE_PER_FILE:-100m} + max-file: ${DOCKER_LOG_NUM_FILES:-10} + +x-all-env: &all-env + - ./project.env + - ./general.env + - ./nifi.env + - ./gitea.env + - ./nginx.env + - ./database.env + - ./elasticsearch.env + - ./network_settings.env + - ../security/env/users_nifi.env + - ../security/env/users_database.env + - ../security/env/users_nginx.env + - ../security/env/users_elasticsearch.env + - ../security/env/certificates_general.env + - ../security/env/certificates_elasticsearch.env + - ../security/env/certificates_nifi.env + +x-es-env: &es-env + - ./network_settings.env + - ./elasticsearch.env + - ../security/env/users_elasticsearch.env + - ../security/env/certificates_elasticsearch.env + +x-common-hosts: &common-hosts + - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} + - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} + - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0} + - ${KIBANA_HOST_NAME:-test-4:0.0.0.0} + - ${NIFI_HOST_NAME:-test-5:0.0.0.0} + - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} + +x-common-ulimits: &common-ulimits + ulimits: + nofile: + soft: 65535 + hard: 65535 + nproc: 65535 + memlock: + soft: -1 + hard: -1 + +x-nifi-common: &nifi-common + <<: *common-ulimits + restart: always + env_file: *all-env + extra_hosts: *common-hosts + networks: + - cognet + #---------------------------------------------------------------------------# # Used services # #---------------------------------------------------------------------------# @@ -7,7 +71,7 @@ services: # NiFi webapp # #---------------------------------------------------------------------------# nifi: - # image: cogstacksystems/cogstack-nifi:latest + <<: *nifi-common build: context: ../nifi/ args: @@ -16,19 +80,7 @@ services: no_proxy: $no_proxy container_name: cogstack-nifi hostname: nifi - restart: always - env_file: - - ./general.env - - ./project.env - - ./nifi.env - - ./elasticsearch.env - - ./network_settings.env - - ../security/users_nifi.env - - ../security/users_elasticsearch.env - - ../security/certificates_general.env - - ../security/certificates_elasticsearch.env - - ../security/certificates_nifi.env - shm_size: 1024mb + shm_size: ${NIFI_SHM_SIZE:-"1g"} environment: - USER_ID=${NIFI_UID:-1000} - GROUP_ID=${NIFI_GID:-1000} @@ -37,12 +89,10 @@ services: - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} - - NIFI_SECURITY_DIR=${NIFI_SECURITY_DIR:-../security/nifi_certificates/} - - ELASTICSEARCH_SECURITY_DIR=${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/} volumes: # INFO: drivers folder - ../nifi/drivers:/opt/nifi/drivers - + # INFO: if there are local changes, map these content from local host to container # (normally, these 3 directories below are bundled with our NiFi image) # N.B. The container user may not have the permission to read these directories/files. @@ -50,17 +100,14 @@ services: - ../nifi/user-scripts:/opt/nifi/user-scripts:rw - ../nifi/user-schemas:/opt/nifi/user-schemas:rw - # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.0.x + # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw # INFO: uncomment below to map security certificates if need to secure NiFi endpoints - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}:/opt/nifi/nifi-current/nifi_certificates:ro - - ./${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/}:/opt/nifi/nifi-current/es_certificates:ro - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi/nifi-current/conf/keystore.jks - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks:/opt/nifi/nifi-current/conf/truststore.jks + - ../security:/security:ro # Security credentials scripts - - ../security/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro + - ../security/scripts/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro # # Nifi properties file: - ../nifi/conf/:/opt/nifi/nifi-current/conf/:rw @@ -72,7 +119,7 @@ services: - ../services/cogstack-db/:/opt/cogstack-db/:rw # medcat models - - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/nlp-services/medcat-service/models/}:/opt/models:rw + - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw # rest of volumes to persist the state - nifi-vol-logs:/opt/nifi/nifi-current/logs @@ -85,51 +132,23 @@ services: # errors generated during data processing - nifi-vol-errors:/opt/nifi/pipeline/flowfile-errors - extra_hosts: - - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} - - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} - - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0} - - ${KIBANA_HOST_NAME:-test-4:0.0.0.0} - - ${NIFI_HOST_NAME:-test-5:0.0.0.0} - - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} - - # user: "${NIFI_UID:-1000}:${NIFI_GID:-1000}" - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 262144 - # INFO : Uncomment the below line to generate your own USERNAME and PASSWORD, # a bit messy this way as you will need to copy the credentials back # to the "login-identity-providers.xml" section. # entrypoint: bash -c "/opt/nifi/nifi-current/bin/nifi.sh set-single-user-credentials admin admincogstacknifi" - tty: true ports: - "${NIFI_OUTPUT_PORT:-8082}:${NIFI_INTERNAL_PORT:-8443}" - "${NIFI_INPUT_SOCKET_PORT:-10000}" - networks: - - cognet - + logging: *nifi-logging-common + nifi-registry-flow: + <<: *nifi-common image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.6.0} hostname: nifi-registry container_name: cogstack-nifi-registry-flow - restart: always + shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"} user: root - env_file: - - ./general.env - - ./network_settings.env - - ./nifi.env - - ./project.env - - ../security/users_nifi.env - - ../security/users_elasticsearch.env - - ../security/certificates_general.env - - ../security/certificates_elasticsearch.env - - ../security/certificates_nifi.env environment: - http_proxy=$HTTP_PROXY - https_proxy=$HTTPS_PROXY @@ -143,31 +162,20 @@ services: - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-./conf/truststore.jks} - TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks} - - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"CN=admin, OU=nifi"} + - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"} - AUTH=${NIFI_AUTH:-"tls"} - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} volumes: - ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro + - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro + - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro - nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database - nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage - nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work - nifi-registry-vol-logs:/opt/nifi-registry/nifi-registry-current/logs - extra_hosts: - - ${NIFI_HOST_NAME:-test-5:0.0.0.0} - - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} - - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 262144 - + extra_hosts: *common-hosts tty: true ports: - "${NIFI_REGISTRY_FLOW_OUTPUT_PORT:-8083}:${NIFI_REGISTRY_FLOW_INPUT_PORT:-18443}" @@ -177,43 +185,27 @@ services: chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/work && \ chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/logs && \ bash /opt/nifi-registry/scripts/start.sh" - - networks: - - cognet - + logging: *nifi-logging-common + nifi-nginx: - # image: cogstacksystems/nifi-nginx:latest - build: - context: ../services/nginx/ - args: - HTTP_PROXY: $HTTP_PROXY - HTTPS_PROXY: $HTTPS_PROXY - no_proxy: $no_proxy + image: cogstacksystems/nifi-nginx:latest container_name: cogstack-nifi-nginx restart: always - env_file: - - ./network_settings.env - - ./nginx.env - - ./nifi.env - - ./elasticsearch.env - - ./project.env - - ./nlp_service.env + shm_size: 512mb + env_file: *all-env volumes: - ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro - ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw - ../services/nginx/config/nginx.conf:/etc/nginx/nginx.conf:rw - - ../security/root_certificates:/etc/nginx/root_certificates:ro - - ../security/nifi_certificates:/etc/nginx/nifi_certificates:ro - - - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.crt.pem:/etc/nginx/es_certificates/elastic-stack-ca.crt.pem:ro - - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.key.pem:/etc/nginx/es_certificates/elastic-stack-ca.key.pem:ro - # - ../security/es_certificates/:/etc/nginx/es_certificates/:ro + - ../security/certificates:/certificates:ro ports: - "${NIFI_EXTERNAL_PORT_NGINX:-8443}:${NIFI_INTERNAL_PORT_NGINX:-8443}" - "${NIFI_REGISTRY_EXTERNAL_PORT_NGINX:-18443}:${NIFI_REGISTRY_INTERNAL_PORT_NGINX:-18443}" networks: - cognet command: /bin/bash -c "envsubst < /etc/nginx/config/nginx.conf.template > /etc/nginx/config/nginx.conf && nginx -g 'daemon off;'" + extra_hosts: *common-hosts + logging: *nifi-logging-common #---------------------------------------------------------------------------# # Docker named volumes # @@ -249,7 +241,6 @@ volumes: driver: local nifi-registry-vol-logs: driver: local - #---------------------------------------------------------------------------# # Docker networks. # #---------------------------------------------------------------------------# diff --git a/deploy/services.yml b/deploy/services.yml index 362308a57..8e6bf1c85 100644 --- a/deploy/services.yml +++ b/deploy/services.yml @@ -42,6 +42,10 @@ x-es-env: &es-env - ../security/env/users_elasticsearch.env - ../security/env/certificates_elasticsearch.env +x-db-env: &db-env + - ./database.env + - ../security/env/users_database.env + x-common-hosts: &common-hosts - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} @@ -68,6 +72,20 @@ x-nifi-common: &nifi-common networks: - cognet +x-db-common: &db-common + <<: *common-ulimits + shm_size: ${DATABASE_DOCKER_SHM_SIZE:-"1g"} + restart: unless-stopped + env_file: *db-env + deploy: + resources: + limits: + cpus: "${DATABASE_DOCKER_CPU_MAX}" + memory: "${DATABASE_DOCKER_RAM}" + reservations: + cpus: "${DATABASE_DOCKER_CPU_MIN}" + memory: "${DATABASE_DOCKER_RAM}" + x-es-common-volumes: &es-common-volumes # Shared configs - ../services/elasticsearch/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:ro @@ -93,9 +111,9 @@ x-es-common-volumes: &es-common-volumes x-es-common: &es-common <<: *common-ulimits - image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.2.0} - shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"} - restart: always + image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.3.0} + shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g} + restart: unless-stopped env_file: *es-env networks: - cognet @@ -108,12 +126,21 @@ x-es-common: &es-common OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver} ELASTICSEARCH_VERSION: ${ELASTICSEARCH_VERSION:-opensearch} logging: *es-logging-common + deploy: + resources: + limits: + cpus: "${ELASTICSEARCH_DOCKER_CPU_MAX}" + memory: "${ELASTICSEARCH_DOCKER_RAM}" + reservations: + cpus: "${ELASTICSEARCH_DOCKER_CPU_MIN}" + memory: "${ELASTICSEARCH_DOCKER_RAM}" x-metricbeat-common: &metricbeat-common <<: *common-ulimits image: ${METRICBEAT_IMAGE:-docker.elastic.co/beats/metricbeat:8.18.2} command: -e --strict.perms=false restart: unless-stopped + shm_size: ${METRICBEAT_DOCKER_SHM:-1g} env_file: - ./elasticsearch.env - ../security/env/users_elasticsearch.env @@ -122,6 +149,14 @@ x-metricbeat-common: &metricbeat-common - METRICBEAT_USER=${METRICBEAT_USER:-elastic} - METRICBEAT_PASSWORD=${METRICBEAT_PASSWORD:-kibanaserver} - KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"} + deploy: + resources: + limits: + cpus: "${METRICBEAT_DOCKER_CPU_MAX}" + memory: "${METRICBEAT_DOCKER_RAM}" + reservations: + cpus: "${METRICBEAT_DOCKER_CPU_MIN}" + memory: "${METRICBEAT_DOCKER_RAM}" volumes: - ../services/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/usr/share/metricbeat/root-ca.crt:ro @@ -136,6 +171,7 @@ x-filebeat-common: &filebeat-common image: ${FILEBEAT_IMAGE:-docker.elastic.co/beats/filebeat:8.18.2} command: ${FILEBEAT_STARTUP_COMMAND:-'-e --strict.perms=false'} restart: unless-stopped + shm_size: ${FILEBEAT_DOCKER_SHM:-1g} env_file: - ./elasticsearch.env - ../security/env/users_elasticsearch.env @@ -144,6 +180,14 @@ x-filebeat-common: &filebeat-common - FILEBEAT_USER=${FILEBEAT_USER:-elastic} - FILEBEAT_PASSWORD=${FILEBEAT_PASSWORD:-kibanaserver} - KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"} + deploy: + resources: + limits: + cpus: "${FILEBEAT_DOCKER_CPU_MAX}" + memory: "${FILEBEAT_DOCKER_RAM}" + reservations: + cpus: "${FILEBEAT_DOCKER_CPU_MIN}" + memory: "${FILEBEAT_DOCKER_RAM}" volumes: - ../services/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:rw - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/etc/pki/root/root-ca.crt:ro @@ -162,15 +206,10 @@ services: # Postgres container with sample data # #---------------------------------------------------------------------------# samples-db: - <<: *common-ulimits + <<: *db-common image: postgres:17.5-alpine container_name: cogstack-samples-db - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always platform: linux/amd64 - env_file: - - ./database.env - - ../security/env/users_database.env environment: # PG env vars - POSTGRES_USER=${POSTGRES_USER_SAMPLES:-test} @@ -194,19 +233,14 @@ services: # CogStack Databank / Cogstack-DB, production database # #---------------------------------------------------------------------------# cogstack-databank-db: - <<: *common-ulimits + <<: *db-common image: postgres:17.5-alpine container_name: cogstack-production-databank-db - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always platform: linux/amd64 - env_file: - - ./database.env - - ../security/env/users_database.env environment: - - POSTGRES_USER=${POSTGRES_USER:-admin} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-admin} - - POSTGRES_DATABANK_DB=${POSTGRES_DATABANK_DB:-cogstack} + - POSTGRES_USER=${DATABASE_USER:-admin} + - POSTGRES_PASSWORD=${DATABASE_PASSWORD:-admin} + - POSTGRES_DATABANK_DB=${DATABASE_DB_NAME:-cogstack} volumes: # mapping postgres data dump and initialization - ../services/cogstack-db/pgsql/schemas:/data/:ro @@ -222,14 +256,9 @@ services: - cognet cogstack-databank-db-mssql: - <<: *common-ulimits + <<: *db-common image: mcr.microsoft.com/mssql/server:2019-latest container_name: cogstack-production-databank-db-mssql - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always - env_file: - - ./database.env - - ../security/env/users_database.env environment: - ACCEPT_EULA=y - MSSQL_SA_USER=${MSSQL_SA_USER:-sa} @@ -252,7 +281,7 @@ services: es_native_create_certs: container_name: es_create_certs image: docker.elastic.co/elasticsearch/elasticsearch:8.18.2 - shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"} + shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g} env_file: *es-env restart: "no" command: bash -c "bash /usr/share/elasticsearch/es_native_cert_generator.sh" @@ -287,7 +316,7 @@ services: ports: - "${ELASTICSEARCH_NODE_1_OUTPUT_PORT:-9200}:9200" - "${ELASTICSEARCH_NODE_1_COMM_OUTPUT_PORT:-9300}:9300" - - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600" elasticsearch-2: extends: @@ -306,7 +335,7 @@ services: ports: - "${ELASTICSEARCH_NODE_2_OUTPUT_PORT:-9201}:9200" - "${ELASTICSEARCH_NODE_2_COMM_OUTPUT_PORT:-9301}:9300" - - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600" elasticsearch-3: extends: @@ -325,7 +354,7 @@ services: ports: - "${ELASTICSEARCH_NODE_3_OUTPUT_PORT:-9202}:9200" - "${ELASTICSEARCH_NODE_3_COMM_OUTPUT_PORT:-9302}:9300" - - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600" metricbeat-1: <<: *metricbeat-common @@ -389,9 +418,9 @@ services: #---------------------------------------------------------------------------# kibana: <<: *common-ulimits - image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.2.0} + image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.3.0} container_name: cogstack-kibana - shm_size: ${KIBANA_SHM_SIZE:-"1g"} + shm_size: ${KIBANA_DOCKER_SHM_SIZE:-1g} restart: always env_file: *es-env environment: @@ -401,7 +430,14 @@ services: # INFO: uncomment below to enable SSL keys SERVER_SSL_ENABLED: ${ELASTICSEARCH_SSL_ENABLED:-"true"} OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver} - + deploy: + resources: + limits: + cpus: "${KIBANA_DOCKER_CPU_MAX}" + memory: "${KIBANA_DOCKER_RAM}" + reservations: + cpus: "${KIBANA_DOCKER_CPU_MIN}" + memory: "${KIBANA_DOCKER_RAM}" volumes: # INFO: Kibana configuration mapped via volume (make sure to comment this and uncomment the next line if you are using NATIVE kibana deployment) - ../services/kibana/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/${KIBANA_CONFIG_FILE_VERSION:-opensearch_dashboards}.yml:ro @@ -434,7 +470,7 @@ services: image: cogstacksystems/cogstack-nifi:latest container_name: cogstack-nifi hostname: nifi - shm_size: ${NIFI_SHM_SIZE:-"1g"} + shm_size: ${NIFI_DOCKER_SHM_SIZE:-"1g"} environment: - USER_ID=${NIFI_UID:-1000} - GROUP_ID=${NIFI_GID:-1000} @@ -443,6 +479,15 @@ services: - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} + - JVM_OPTS="${NIFI_JVM_OPTS:--XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom}" + deploy: + resources: + limits: + cpus: "${NIFI_DOCKER_CPU_MAX}" + memory: "${NIFI_DOCKER_RAM}" + reservations: + cpus: "${NIFI_DOCKER_CPU_MIN}" + memory: "${NIFI_DOCKER_RAM}" volumes: # INFO: drivers folder - ../nifi/drivers:/opt/nifi/drivers @@ -450,12 +495,12 @@ services: # INFO: if there are local changes, map these content from local host to container # (normally, these 3 directories below are bundled with our NiFi image) # N.B. The container user may not have the permission to read these directories/files. - - ../nifi/user-templates:/opt/nifi/nifi-current/conf/templates:rw - - ../nifi/user-scripts:/opt/nifi/user-scripts:rw - - ../nifi/user-schemas:/opt/nifi/user-schemas:rw + - ../nifi/user_templates:/opt/nifi/nifi-current/conf/templates:rw + - ../nifi/user_scripts:/opt/nifi/user_scripts:rw + - ../nifi/user_schemas:/opt/nifi/user_schemas:rw # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x - - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw + - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw # INFO: uncomment below to map security certificates if need to secure NiFi endpoints - ../security:/security:ro @@ -501,7 +546,7 @@ services: image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.6.0} hostname: nifi-registry container_name: cogstack-nifi-registry-flow - shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"} + shm_size: ${NIFI_DOCKER_REGISTRY_SHM_SIZE:-1g} user: root environment: - http_proxy=$HTTP_PROXY @@ -521,6 +566,14 @@ services: - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} + deploy: + resources: + limits: + cpus: "${NIFI_REGISTRY_DOCKER_CPU_MAX}" + memory: "${NIFI_REGISTRY_DOCKER_RAM}" + reservations: + cpus: "${NIFI_REGISTRY_DOCKER_CPU_MIN}" + memory: "${NIFI_REGISTRY_DOCKER_RAM}" volumes: - ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro @@ -545,8 +598,16 @@ services: image: cogstacksystems/nifi-nginx:latest container_name: cogstack-nifi-nginx restart: always - shm_size: 512mb + shm_size: ${NGINX_SHM_SIZE:-1g} env_file: *all-env + deploy: + resources: + limits: + cpus: "${NGINX_DOCKER_CPU_MAX}" + memory: "${NGINX_DOCKER_RAM}" + reservations: + cpus: "${NGINX_DOCKER_CPU_MIN}" + memory: "${NGINX_DOCKER_RAM}" volumes: - ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro - ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw @@ -587,12 +648,20 @@ services: <<: *common-ulimits container_name: cogstack-gitea image: gitea/gitea:1.23-rootless - shm_size: ${DOCKER_SHM_SIZE:-"1g"} + shm_size: ${GITEA_DOCKER_SHM_SIZE:-"1g"} restart: always environment: - http_proxy=$HTTP_PROXY - https_proxy=$HTTPS_PROXY - no_proxy=$no_proxy + deploy: + resources: + limits: + cpus: "${GITEA_DOCKER_CPU_MAX}" + memory: "${GITEA_DOCKER_RAM}" + reservations: + cpus: "${GITEA_DOCKER_CPU_MIN}" + memory: "${GITEA_DOCKER_RAM}" volumes: # app config - ../services/gitea/app.ini:/etc/gitea/app.ini:rw diff --git a/docs/nifi/main.md b/docs/nifi/main.md index 42b1ae1fc..219465aaa 100644 --- a/docs/nifi/main.md +++ b/docs/nifi/main.md @@ -25,9 +25,9 @@ Avro Schema:[official documentation](https://avro.apache.org/docs/1.11.1/) ├── devel - custom folder that is mounted on the NiFi container where you may place your own scripts, again, read & write permissions required ├── drivers - drivers used for DB connections, currently PostgreSQL and MSSQL ├── nifi-app.log - log file mounted directly from the container for easy log checking -├── user-schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors -├── user-scripts - custom scripts used in workflows, you can put them here -└── user-templates - here we store the fully exported templates of the workflows within NiFi +├── user_schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors +├── user_scripts - custom scripts used in workflows, you can put them here +└── user_templates - here we store the fully exported templates of the workflows within NiFi ``` ## Custom Docker image diff --git a/nifi/Dockerfile b/nifi/Dockerfile index 66ad8fc89..63df1e84f 100644 --- a/nifi/Dockerfile +++ b/nifi/Dockerfile @@ -2,6 +2,8 @@ ARG NIFI_VERSION=2.6.0 FROM apache/nifi:${NIFI_VERSION} +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + ARG HTTP_PROXY="" ARG HTTPS_PROXY="" ARG no_proxy="" @@ -25,34 +27,65 @@ ENV NIFI_PYTHON_WORKING_DIRECTORY=${NIFI_PYTHON_WORKING_DIRECTORY} ENV PIP_PREFER_BINARY=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_NO_CACHE_DIR=1 +# Enables Python to generate .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=0 +# Turns off buffering for easier container logging +ENV PYTHONUNBUFFERED=1 # default env vars to prevent NiFi from running on HTTP ENV NIFI_WEB_HTTP_PORT="" ENV NIFI_WEB_HTTP_HOST="" -RUN echo "GID=${GID}" -RUN echo "UID=${UID}" - USER root -# run updates and install some base utility packages along with python support -RUN apt-get update && apt-get upgrade -y --no-install-recommends && apt-get install -y --no-install-recommends iputils-ping libssl-dev openssl apt-transport-https apt-utils curl software-properties-common wget git build-essential make cmake ca-certificates zip unzip tzdata jq - -RUN echo "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources - -RUN echo "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources - -# Microsoft repos -RUN wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg -RUN echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" | tee -a /etc/apt/sources.list - -RUN apt-get update && apt-get install --no-install-recommends -y ssl-cert libsqlite3-dev python3-dev python3-pip python3.11 python3.11-dev python3-venv sqlite3 postgresql-server-dev-all +# add repositories, install tooling, and clean up apt metadata in one layer +RUN set -eux; \ + apt-get update -y; \ + apt-get install -y --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + wget; \ + printf '%s\n' \ + "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \ + "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \ + "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \ + "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \ + "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \ + > /etc/apt/sources.list.d/debian.list; \ + wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg; \ + echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" > /etc/apt/sources.list.d/microsoft.list; \ + apt-get update -y; \ + apt-get upgrade -y --no-install-recommends; \ + apt-get install -y --no-install-recommends \ + apt-utils \ + build-essential \ + cmake \ + git \ + iputils-ping \ + jq \ + libsqlite3-dev \ + libssl-dev \ + make \ + openssl \ + postgresql-server-dev-all \ + python3.11 \ + python3.11-dev \ + python3-dev \ + python3-pip \ + python3-venv \ + software-properties-common \ + sqlite3 \ + ssl-cert \ + tzdata \ + unzip \ + zip; \ + apt-get clean; \ + rm -rf /var/lib/apt/lists/* # bust cache ENV UV_VERSION=latest @@ -60,36 +93,31 @@ ENV UV_VERSION=latest # install rust, medcat requirement, install UV ENV HOME=/root ENV PATH="/root/.cargo/bin:${PATH}" +ENV UV_INSTALL_DIR=/usr/local/bin -RUN curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh \ - && chmod +x /tmp/rustup-init.sh \ - && /tmp/rustup-init.sh -y \ - && rm /tmp/rustup-init.sh - -RUN curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh \ - && bash /tmp/install_uv.sh - -RUN UV_PATH=$(find / -name uv -type f | head -n1) && \ - ln -s "$UV_PATH" /usr/local/bin/uv +RUN set -eux; \ + curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh; \ + chmod +x /tmp/rustup-init.sh; \ + /tmp/rustup-init.sh -y; \ + rm /tmp/rustup-init.sh -# clean up apt -RUN apt-get clean autoclean && apt-get autoremove --purge -y +RUN set -eux; \ + curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh; \ + bash /tmp/install_uv.sh; \ + rm /tmp/install_uv.sh ######################################## Python / PIP SECTION ######################################## RUN uv pip install --no-cache-dir --break-system-packages --system --upgrade pip setuptools wheel -# install util packages used in NiFi scripts (such as MedCAT, avro, nifyapi, etc.) +# install util packages used in NiFi scripts (such as avro, nifyapi, etc.) COPY ./requirements.txt ./requirements.txt -RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt" +RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt" --index-url https://pypi.org/simple -# install MedCAT from the working with cogstack repo (as it is assumed to be stable and the latest version) -RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r https://raw.githubusercontent.com/CogStack/working_with_cogstack/refs/heads/main/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/ - ####################################################################################################### # solve groovy grape proxy issues, grape ignores the current environment's proxy settings -RUN export JAVA_OPTS="-Dhttp.proxyHost=$HTTP_PROXY -Dhttps.proxyHost=$HTTPS_PROXY -Dhttp.nonProxyHosts=$no_proxy" +ENV JAVA_OPTS="-Dhttp.proxyHost=${HTTP_PROXY} -Dhttps.proxyHost=${HTTPS_PROXY} -Dhttp.nonProxyHosts=${no_proxy}" # INSTALL NAR extensions WORKDIR /opt/nifi/nifi-current/lib/ diff --git a/nifi/user-schemas/avro/.keep b/nifi/__init__.py similarity index 100% rename from nifi/user-schemas/avro/.keep rename to nifi/__init__.py diff --git a/nifi/conf/nifi.properties b/nifi/conf/nifi.properties index 1363e7deb..1b8f10550 100644 --- a/nifi/conf/nifi.properties +++ b/nifi/conf/nifi.properties @@ -49,8 +49,8 @@ nifi.python.command=python3.11 nifi.python.framework.source.directory=/opt/nifi/nifi-current/python/framework nifi.python.extensions.source.directory.default=/opt/nifi/nifi-current/python_extensions nifi.python.working.directory=/opt/nifi/user-scripts -nifi.python.logs.directory=./logs -nifi.python.max.processes.per.extension.type=10 +nifi.python.logs.directory=./logs +nifi.python.max.processes.per.extension.type=10 nifi.python.max.processes=100 #################### @@ -362,4 +362,3 @@ nifi.diagnostics.on.shutdown.max.filecount=10 # The diagnostics folder's maximum permitted size in bytes. If the limit is exceeded, the oldest files are deleted. nifi.diagnostics.on.shutdown.max.directory.size=10 MB - diff --git a/nifi/requirements-dev.txt b/nifi/requirements-dev.txt new file mode 100644 index 000000000..e4219ea3b --- /dev/null +++ b/nifi/requirements-dev.txt @@ -0,0 +1,8 @@ + +ruff==0.12.12 +mypy==1.17.0 +mypy-extensions==1.1.0 +types-aiofiles==24.1.0.20250708 +types-PyYAML==6.0.12.20250516 +types-setuptools==80.9.0.20250529 +timeout-decorator==0.5.0 diff --git a/nifi/requirements.txt b/nifi/requirements.txt index a424dbc9d..9093822f4 100644 --- a/nifi/requirements.txt +++ b/nifi/requirements.txt @@ -1,45 +1,32 @@ +wheel==0.45.1 + +uv==0.9.12 + # data science pkgs -seaborn==0.13.2 -matplotlib==3.10.6 -graphviz==0.21 -plotly==6.3.0 -keras==3.12.0 nltk==3.9.1 -numpy>=1.26.0,<2.0.0 -pandas==1.5.3 -dill>=0.3.6,<1.0.0 -bokeh==3.8.0 -psycopg[c,binary]==3.2.9 +numpy==2.3.5 +pandas==2.3.3 # used in NiFi scripts: geolocation, avro conversion etc. py4j==0.10.9.9 rancoord==0.0.6 geocoder==1.38.1 -avro==1.12.0 +avro==1.12.1 nipyapi==1.0.0 py7zr==1.0.0 -ipyparallel==9.0.1 -cython==3.1.3 -tqdm==4.67.1 jsonpickle==4.1.1 -certifi==2025.8.3 -xlsxwriter==3.2.5 -mysql-connector-python==9.4.0 -pymssql==2.3.7 +xlsxwriter==3.2.9 +mysql-connector-python==9.5.0 +pymssql==2.3.9 +psycopg[c,binary]==3.2.9 +requests==2.32.5 +PyYAML==6.0.3 +pydantic==2.12.5 # other utils xnat==0.7.2 # ElasticSearch/OpenSearch packages -opensearch-py==3.0.0 elasticsearch9==9.1.0 +opensearch-py==3.0.0 neo4j==5.28.2 -eland==9.0.1 - -# git utils -dvc==3.62.0 -GitPython==3.1.45 -PyYAML==6.0.2 - -# code utils -ruff==0.12.12 diff --git a/nifi/user-scripts/dto/nifi_api_config.py b/nifi/user-scripts/dto/nifi_api_config.py deleted file mode 100644 index 303bdd1b1..000000000 --- a/nifi/user-scripts/dto/nifi_api_config.py +++ /dev/null @@ -1,45 +0,0 @@ -import os - - -class NiFiAPIConfig: - NIFI_URL_SCHEME: str = "https" - NIFI_HOST: str = "localhost" - NIFI_PORT: int = 8443 - NIFI_REGISTRY_PORT: int = 18443 - - NIFI_USERNAME: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin") - NIFI_PASSWORD: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi") - - ROOT_CERT_CA_PATH: str = os.path.abspath("../../../../security/certificates/root/root-ca.pem") - NIFI_CERT_PEM_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.pem") - NIFI_CERT_KEY_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.key") - - VERIFY_SSL: bool = True - - @property - def nifi_base_url(self) -> str: - """Full NiFi base URL, e.g. https://localhost:8443""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_PORT}" - - @property - def nifi_api_url(self) -> str: - """"NiFi REST API root, e.g. https://localhost:8443/nifi-api""" - return f"{self.nifi_base_url}/nifi-api" - - @property - def nifi_registry_base_url(self) -> str: - """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry/" - - @property - def nifi_registry_api_url(self) -> str: - """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry-api" - - def auth_credentials(self) -> tuple[str, str]: - """Convenience for requests auth=(user, password).""" - return (self.NIFI_USERNAME, self.NIFI_PASSWORD) - - def get_nifi_ssl_certs(self) -> tuple[str, str]: - """Convenience for requests cert=(cert_path, key_path).""" - return (self.NIFI_CERT_PEM_PATH, self.NIFI_CERT_KEY_PATH) diff --git a/nifi/user-scripts/dto/pg_config.py b/nifi/user-scripts/dto/pg_config.py deleted file mode 100644 index 19f15d029..000000000 --- a/nifi/user-scripts/dto/pg_config.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel, Field - - -class PGConfig(BaseModel): - host: str = Field(default="localhost") - port: int = Field(default=5432) - db: str = Field(default="samples_db") - user: str = Field(default="test") - password: str = Field(default="test") - timeout: int = Field(default=50) diff --git a/nifi/user-scripts/logs/.gitignore b/nifi/user-scripts/logs/.gitignore deleted file mode 100644 index f59ec20aa..000000000 --- a/nifi/user-scripts/logs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/nifi/user-scripts/utils/helpers/nifi_api_client.py b/nifi/user-scripts/utils/helpers/nifi_api_client.py deleted file mode 100644 index 1c353d2c1..000000000 --- a/nifi/user-scripts/utils/helpers/nifi_api_client.py +++ /dev/null @@ -1,82 +0,0 @@ -from logging import Logger - -from dto.nifi_api_config import NiFiAPIConfig -from nipyapi import canvas, security -from nipyapi.nifi import ApiClient, ProcessGroupsApi -from nipyapi.nifi.configuration import Configuration as NiFiConfiguration -from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity -from nipyapi.nifi.models.processor_entity import ProcessorEntity -from nipyapi.registry import ApiClient as RegistryApiClient -from nipyapi.registry import BucketsApi -from nipyapi.registry.configuration import Configuration as RegistryConfiguration -from utils.generic import get_logger - - -class NiFiRegistryClient: - def __init__(self, config: NiFiAPIConfig) -> None: - self.config = config or NiFiAPIConfig() - self.nipyapi_config = RegistryConfiguration() - self.nipyapi_config.host = self.config.nifi_registry_api_url - self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL - self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore - self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore - self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore - - self.logger: Logger = get_logger(self.__class__.__name__) - - self.api_client = RegistryApiClient(self.nipyapi_config.host) - self.buckets_api = BucketsApi(self.api_client) - - def list_buckets(self): - buckets = self.buckets_api.get_buckets() - for b in buckets: - self.logger.info("Bucket: %s (%s)", b.name, b.identifier) - return buckets - - -class NiFiClient: - def __init__(self, config: NiFiAPIConfig) -> None: - self.config = config or NiFiAPIConfig() - self.nipyapi_config = NiFiConfiguration() - self.nipyapi_config.host = self.config.nifi_api_url - self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL - self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore - self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore - self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore - - self.logger: Logger = get_logger(self.__class__.__name__) - - self.api_client = ApiClient(self.nipyapi_config) - self.process_group_api = ProcessGroupsApi(self.api_client) - - self._login() - - def _login(self) -> None: - security.service_login( - service='nifi', - username=self.config.NIFI_USERNAME, - password=self.config.NIFI_PASSWORD - ) - self.logger.info("✅ Logged in to NiFi") - - def get_root_process_group_id(self) -> str: - return canvas.get_root_pg_id() - - def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object: - return canvas.get_process_group(process_group_name, identifier_type="nam") - - def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity: - return canvas.get_process_group(process_group_id, identifier_type="id") - - def start_process_group(self, process_group_id: str) -> bool: - return canvas.schedule_process_group(process_group_id, True) - - def stop_process_group(self, process_group_id: str) -> bool: - return canvas.schedule_process_group(process_group_id, False) - - def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]: - parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id") - return canvas.list_all_process_groups(parent_pg.id) - - def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]: - return canvas.list_all_processors(process_group_id) diff --git a/nifi/user-scripts/utils/helpers/service.py b/nifi/user-scripts/utils/helpers/service.py deleted file mode 100644 index 9d4b28080..000000000 --- a/nifi/user-scripts/utils/helpers/service.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import time - -import psycopg2 -from psycopg2 import sql - -sys.path.append("../../dto/") - -from dto.pg_config import PGConfig - - -def check_postgres(cfg: PGConfig) -> tuple[bool, float | None, str | None]: - """Return (is_healthy, latency_ms, error_detail)""" - start = time.perf_counter() - try: - conn = psycopg2.connect( - host=cfg.host, - port=cfg.port, - dbname=cfg.db, - user=cfg.user, - password=cfg.password, - connect_timeout=cfg.timeout - ) - with conn.cursor() as cur: - cur.execute(sql.SQL("SELECT 1;")) - result = cur.fetchone() - conn.close() - if result != (1,): - return False, None, f"Unexpected result: {result}" - latency = (time.perf_counter() - start) * 1000 - return True, latency, None - except Exception as e: - return False, None, str(e) diff --git a/nifi/user-python-extensions/convert_avro_binary_field_to_base64.py b/nifi/user_python_extensions/convert_avro_binary_field_to_base64.py similarity index 100% rename from nifi/user-python-extensions/convert_avro_binary_field_to_base64.py rename to nifi/user_python_extensions/convert_avro_binary_field_to_base64.py diff --git a/nifi/user-python-extensions/convert_json_record_schema.py b/nifi/user_python_extensions/convert_json_record_schema.py similarity index 100% rename from nifi/user-python-extensions/convert_json_record_schema.py rename to nifi/user_python_extensions/convert_json_record_schema.py diff --git a/nifi/user-python-extensions/parse_service_response.py b/nifi/user_python_extensions/parse_service_response.py similarity index 100% rename from nifi/user-python-extensions/parse_service_response.py rename to nifi/user_python_extensions/parse_service_response.py diff --git a/nifi/user-python-extensions/prepare_record_for_nlp.py b/nifi/user_python_extensions/prepare_record_for_nlp.py similarity index 100% rename from nifi/user-python-extensions/prepare_record_for_nlp.py rename to nifi/user_python_extensions/prepare_record_for_nlp.py diff --git a/nifi/user-python-extensions/prepare_record_for_ocr.py b/nifi/user_python_extensions/prepare_record_for_ocr.py similarity index 100% rename from nifi/user-python-extensions/prepare_record_for_ocr.py rename to nifi/user_python_extensions/prepare_record_for_ocr.py diff --git a/nifi/user-python-extensions/record_add_geolocation.py b/nifi/user_python_extensions/record_add_geolocation.py similarity index 100% rename from nifi/user-python-extensions/record_add_geolocation.py rename to nifi/user_python_extensions/record_add_geolocation.py diff --git a/nifi/user-python-extensions/record_decompress_cerner_blob.py b/nifi/user_python_extensions/record_decompress_cerner_blob.py similarity index 100% rename from nifi/user-python-extensions/record_decompress_cerner_blob.py rename to nifi/user_python_extensions/record_decompress_cerner_blob.py diff --git a/nifi/user-python-extensions/sample_processor.py b/nifi/user_python_extensions/sample_processor.py similarity index 100% rename from nifi/user-python-extensions/sample_processor.py rename to nifi/user_python_extensions/sample_processor.py diff --git a/nifi/user-schemas/elasticsearch/indices/.keep b/nifi/user_schemas/avro/.keep similarity index 100% rename from nifi/user-schemas/elasticsearch/indices/.keep rename to nifi/user_schemas/avro/.keep diff --git a/nifi/user-schemas/elasticsearch/base_index_settings.json b/nifi/user_schemas/elasticsearch/base_index_settings.json similarity index 100% rename from nifi/user-schemas/elasticsearch/base_index_settings.json rename to nifi/user_schemas/elasticsearch/base_index_settings.json diff --git a/nifi/user-schemas/elasticsearch/templates/.keep b/nifi/user_schemas/elasticsearch/indices/.keep similarity index 100% rename from nifi/user-schemas/elasticsearch/templates/.keep rename to nifi/user_schemas/elasticsearch/indices/.keep diff --git a/nifi/user-schemas/json/.keep b/nifi/user_schemas/elasticsearch/templates/.keep similarity index 100% rename from nifi/user-schemas/json/.keep rename to nifi/user_schemas/elasticsearch/templates/.keep diff --git a/nifi/user-scripts/db/.gitignore b/nifi/user_schemas/json/.keep similarity index 100% rename from nifi/user-scripts/db/.gitignore rename to nifi/user_schemas/json/.keep diff --git a/nifi/user-schemas/legacy/annotation-medcat.avsc b/nifi/user_schemas/legacy/annotation-medcat.avsc similarity index 100% rename from nifi/user-schemas/legacy/annotation-medcat.avsc rename to nifi/user_schemas/legacy/annotation-medcat.avsc diff --git a/nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json b/nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json similarity index 100% rename from nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json rename to nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json diff --git a/nifi/user-schemas/legacy/cogstack_common_schema.avsc b/nifi/user_schemas/legacy/cogstack_common_schema.avsc similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema.avsc rename to nifi/user_schemas/legacy/cogstack_common_schema.avsc diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json b/nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json rename to nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_full.avsc b/nifi/user_schemas/legacy/cogstack_common_schema_full.avsc similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_full.avsc rename to nifi/user_schemas/legacy/cogstack_common_schema_full.avsc diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_mapping.json b/nifi/user_schemas/legacy/cogstack_common_schema_mapping.json similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_mapping.json rename to nifi/user_schemas/legacy/cogstack_common_schema_mapping.json diff --git a/nifi/user-schemas/legacy/document.avsc b/nifi/user_schemas/legacy/document.avsc similarity index 100% rename from nifi/user-schemas/legacy/document.avsc rename to nifi/user_schemas/legacy/document.avsc diff --git a/nifi/user-schemas/legacy/document_all_fields.avsc b/nifi/user_schemas/legacy/document_all_fields.avsc similarity index 100% rename from nifi/user-schemas/legacy/document_all_fields.avsc rename to nifi/user_schemas/legacy/document_all_fields.avsc diff --git a/nifi/user-scripts/logs/parse_json/.gitkeep b/nifi/user_scripts/__init__.py similarity index 100% rename from nifi/user-scripts/logs/parse_json/.gitkeep rename to nifi/user_scripts/__init__.py diff --git a/nifi/user-scripts/bootstrap_external_lib_imports.py b/nifi/user_scripts/bootstrap_external_lib_imports.py similarity index 100% rename from nifi/user-scripts/bootstrap_external_lib_imports.py rename to nifi/user_scripts/bootstrap_external_lib_imports.py diff --git a/nifi/user-scripts/clean_doc.py b/nifi/user_scripts/clean_doc.py similarity index 100% rename from nifi/user-scripts/clean_doc.py rename to nifi/user_scripts/clean_doc.py diff --git a/nifi/user-scripts/cogstack_cohort_generate_data.py b/nifi/user_scripts/cogstack_cohort_generate_data.py similarity index 100% rename from nifi/user-scripts/cogstack_cohort_generate_data.py rename to nifi/user_scripts/cogstack_cohort_generate_data.py diff --git a/nifi/user-scripts/cogstack_cohort_generate_random_data.py b/nifi/user_scripts/cogstack_cohort_generate_random_data.py similarity index 100% rename from nifi/user-scripts/cogstack_cohort_generate_random_data.py rename to nifi/user_scripts/cogstack_cohort_generate_random_data.py diff --git a/nifi/user-scripts/tmp/.gitignore b/nifi/user_scripts/db/.gitignore similarity index 100% rename from nifi/user-scripts/tmp/.gitignore rename to nifi/user_scripts/db/.gitignore diff --git a/nifi/user_scripts/dto/__init__.py b/nifi/user_scripts/dto/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user_scripts/dto/database_config.py b/nifi/user_scripts/dto/database_config.py new file mode 100644 index 000000000..1e29c86f7 --- /dev/null +++ b/nifi/user_scripts/dto/database_config.py @@ -0,0 +1,29 @@ +from pathlib import Path +from typing import Any + +from pydantic import AliasChoices, Field, PositiveInt, SecretStr +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class DatabaseConfig(BaseSettings): + model_config = SettingsConfigDict( + env_prefix="DATABASE_", + env_file=[Path(__file__).resolve().parents[3] / "deploy" / "database.env", + Path(__file__).resolve().parents[3] / "security" / "env" / "users_database.env", + ], + extra="ignore", + env_ignore_empty=True, + populate_by_name=True + ) + + host: str = Field(default="localhost", validation_alias=AliasChoices("POSTGRES_HOST")) + port: int = Field(default=5432,validation_alias=AliasChoices("POSTGRES_PORT"), ge=1, le=65535) + + database_name : str = Field(default="db_samples", validation_alias=AliasChoices("DB", "DB_NAME")) + username: str = Field(default="test", validation_alias=AliasChoices("POSTGRES_USER_SAMPLES", "POSTGRES_USER")) + password: SecretStr = Field(default_factory=lambda: SecretStr("test"), + validation_alias=AliasChoices("POSTGRES_PASSWORD_SAMPLES", "POSTGRES_PASSWORD")) + timeout: PositiveInt = Field(default=60, validation_alias=AliasChoices("TIMEOUT")) + + def get_field_values_kwargs(self) -> dict[str, Any]: + return self.model_dump() diff --git a/nifi/user_scripts/dto/elastic_config.py b/nifi/user_scripts/dto/elastic_config.py new file mode 100644 index 000000000..1419009b8 --- /dev/null +++ b/nifi/user_scripts/dto/elastic_config.py @@ -0,0 +1,57 @@ +import json +from pathlib import Path + +from pydantic import AliasChoices, Field, PositiveInt, SecretStr, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class ElasticConfig(BaseSettings): + + ROOT_DIR = Path(__file__).resolve().parents[3] + CERT_ROOT_DIR = ROOT_DIR / "security" / "certificates" / "elastic" + + model_config = SettingsConfigDict( + env_prefix="ELASTICSEARCH_", + env_file=[ROOT_DIR / "deploy" / "elasticsearch.env", + ROOT_DIR / "security" / "env" / "users_elasticsearch.env", + ], + extra="ignore", + env_ignore_empty=True, + populate_by_name=True + ) + + es_port_1: int = Field(validation_alias=AliasChoices("ELASTICSEARCH_NODE_1_OUTPUT_PORT"), ge=1, le=65535) + es_port_2: int = Field(validation_alias=AliasChoices("ELASTICSEARCH_NODE_2_OUTPUT_PORT"), ge=1, le=65535) + es_port_3: int = Field(validation_alias=AliasChoices("ELASTICSEARCH_NODE_3_OUTPUT_PORT"), ge=1, le=65535) + + hosts: list[str] = Field(default_factory=list) + ports: list[int] = [es_port_1, es_port_2, es_port_3] + timeout: int = Field(default=60) + verify_ssl: bool = Field(default=False, validation_alias=AliasChoices("SSL_ENABLED")) + user: str = Field(default="admin", validation_alias=AliasChoices("ELASTIC_USER")) + password: SecretStr = Field(default_factory=lambda: SecretStr("test"), + validation_alias=AliasChoices("ELASTIC_PASSWORD", "OPENSEARCH_INITIAL_ADMIN_PASSWORD")) + + root_cert_ca_path = (CERT_ROOT_DIR / "opensearch/elasticsearch/" / "elastic-stack-ca.crt.pem").as_posix() + elastic_node_cert_key_path = (CERT_ROOT_DIR / "elastic" / + "opensearch/elasticsearch/elasticsearch-1/elasticsearch-1.key").as_posix() + elastic_node_cert_pem_path = (CERT_ROOT_DIR / "elastic" / + "opensearch/elasticsearch/elasticsearch-1/elasticsearch-1.crt").as_posix() + + @field_validator("hosts", mode="before") + def parse_list(cls, v): + if isinstance(v, str): + return json.loads(v) + return v + + # def client_kwargs(self) -> dict[str, Any]: + # """ + # Connection kwargs ready for elasticsearch client constructors. + # """ + # return { + # "hosts": self.hosts, + # "basic_auth": (self.user, self.password.get_secret_value()), + # "verify_certs": self.verify_ssl, + # "ca_certs": self.ca_cert_path, + # "request_timeout": self.timeout, + # } diff --git a/nifi/user_scripts/dto/nifi_api_config.py b/nifi/user_scripts/dto/nifi_api_config.py new file mode 100644 index 000000000..b3a3c40df --- /dev/null +++ b/nifi/user_scripts/dto/nifi_api_config.py @@ -0,0 +1,47 @@ +import os +from pathlib import Path + +CERTS_ROOT = Path(__file__).resolve().parents[3] / "security" / "certificates" + + +class NiFiAPIConfig: + + def __init__(self): + self.nifi_url_scheme = "https" + self.nifi_host = "localhost" + self.nifi_port = 8443 + self.nifi_registry_port = 18443 + self.nifi_username = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin") + self.nifi_password = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi") + self.root_cert_ca_path = (CERTS_ROOT / "root" / "root-ca.pem").as_posix() + self.nifi_cert_pem_path = (CERTS_ROOT / "nifi" / "nifi.pem").as_posix() + self.nifi_cert_key_path = (CERTS_ROOT / "nifi" / "nifi.key").as_posix() + self.verify_ssl = True + + @property + def nifi_base_url(self) -> str: + """Full NiFi base URL, e.g. https://localhost:8443""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_port}" + + @property + def nifi_api_url(self) -> str: + """NiFi REST API root, e.g. https://localhost:8443/nifi-api""" + return f"{self.nifi_base_url}/nifi-api" + + @property + def nifi_registry_base_url(self) -> str: + """NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry/" + + @property + def nifi_registry_api_url(self) -> str: + """nifi registry rest api root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry-api/" + + def auth_credentials(self) -> tuple[str, str]: + """convenience for requests auth=(user, password).""" + return (self.nifi_username, self.nifi_password) + + def get_nifi_ssl_certs_paths(self) -> tuple[str, str]: + """convenience for requests cert=(cert_path, key_path).""" + return (self.nifi_cert_pem_path, self.nifi_cert_key_path) diff --git a/nifi/user-scripts/dto/service_health.py b/nifi/user_scripts/dto/service_health.py similarity index 59% rename from nifi/user-scripts/dto/service_health.py rename to nifi/user_scripts/dto/service_health.py index 5f6455dbb..9fdfe371e 100644 --- a/nifi/user-scripts/dto/service_health.py +++ b/nifi/user_scripts/dto/service_health.py @@ -10,14 +10,15 @@ class ServiceHealth(BaseModel): """ service: str = Field(..., description="Service name, e.g. NiFi, PostgreSQL, OpenSearch/ElasticSearch, etc.") - status: Literal["healthy", "unhealthy", "degraded"] = Field( - ..., description="Current service status" - ) - message: str | None = Field(None, description="Optional status message") - timestamp: datetime = Field(default_factory=datetime.utcnow) - avg_processing_ms: float | None = Field(None) - service_info: str | None = Field(None) - connected: bool | None = Field(None) + status: Literal["healthy", "unhealthy", "degraded"] = Field(description="Current service status", + default="unhealthy") + + message: str | None = Field(default=None, description="Optional status message") + timestamp: datetime | None = Field(default_factory=datetime.now) + avg_processing_ms: float | None = Field(default=None) + service_info: str | None = Field(default=None) + connected: bool = Field(default=False) + latency_ms: float = Field(default=0.0, description="Ping latency in milliseconds") class Config: extra = "ignore" @@ -28,18 +29,17 @@ class MLServiceHealth(ServiceHealth): model_card: str | None = Field(None, description="URL or path to the model card") class NiFiHealth(ServiceHealth): - active_threads: int | None = Field(None, description="Number of active threads") - queued_bytes: int | None = Field(None, description="Total queued bytes") - queued_count: int | None = Field(None, description="Number of queued flowfiles") + active_threads: int | None = Field(default=None, description="Number of active threads") + queued_bytes: int | None = Field(default=None, description="Total queued bytes") + queued_count: int | None = Field(default=None, description="Number of queued flowfiles") class ElasticsearchHealth(ServiceHealth): cluster_status: str | None = Field(None, description="Cluster health status") node_count: int | None = Field(None) active_shards: int | None = Field(None) -class PostgresHealth(ServiceHealth): - version: str | None = Field(None) - latency_ms: float | None = Field(None, description="Ping latency in milliseconds") +class DatabaseHealth(ServiceHealth): + version: str | None = Field(None, description="Database version, e.g PgSQL 17, MSSQL 21, etc.") db_name: str | None = Field(None, description="Database name") class MedCATTrainerHealth(ServiceHealth): diff --git a/nifi/user_scripts/elastic_schema_converter.py b/nifi/user_scripts/elastic_schema_converter.py new file mode 100644 index 000000000..ea880673d --- /dev/null +++ b/nifi/user_scripts/elastic_schema_converter.py @@ -0,0 +1,64 @@ +import json +import re +import sys +import traceback +from collections import defaultdict +from logging import Logger + +logger: Logger = Logger(__name__) + +origin_index_mapping = json.loads(sys.stdin.read()) + +INPUT_INDEX_NAME = "" +OUTPUT_INDEX_NAME = "" +OUTPUT_FILE_NAME = "" +JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = "" +TRANSFORM_KEYS_LOWER_CASE = False + +for arg in sys.argv: + _arg = arg.split("=", 1) + _arg[0] = _arg[0].lower() + if _arg[0] == "input_index_name": + INPUT_INDEX_NAME = _arg[1] + if _arg[0] == "output_index_name": + OUTPUT_INDEX_NAME = _arg[1] + if _arg[0] == "output_file_name": + OUTPUT_FILE_NAME = _arg[1] + if _arg[0] == "json_field_mapper_schema_file_path": + JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = _arg[1] + +try: + json_field_mapper: dict = {} + with open(JSON_FIELD_MAPPER_SCHEMA_FILE_PATH) as file: + json_field_mapper = json.load(file) + + output_index_mapping: dict = {} + + origin_index_name = INPUT_INDEX_NAME if INPUT_INDEX_NAME else \ + origin_index_mapping[list(origin_index_mapping.keys())[0]] + + for origin_field_name, origin_field_es_properties in origin_index_mapping["mappings"]["properties"]: + pass + + for curr_field_name, curr_field_value in record.items(): + curr_field_name = str(curr_field_name).lower() + if curr_field_name in new_schema_field_names: + # check if the mapping is not a dict (nested field) + if isinstance(json_mapper_schema[curr_field_name], str): + new_record.update({json_mapper_schema[curr_field_name] : curr_field_value}) + elif isinstance(json_mapper_schema[curr_field_name], dict): + # nested field + new_record.update({curr_field_name: {}}) + for nested_field_name, nested_field_value in curr_field_value.items(): + if nested_field_name in json_mapper_schema[curr_field_name].keys(): + new_record[curr_field_name].update({ \ + json_mapper_schema[curr_field_name][nested_field_name]: nested_field_value}) + + + +except Exception as exception: + logger.error("Exception during flowfile processing: " + traceback.format_exc()) + raise exception + +# Output cleaned JSON as UTF-8 +sys.stdout.buffer.write(json.dumps(output_index_mapping, ensure_ascii=False).encode("utf-8")) diff --git a/nifi/user-scripts/generate_location.py b/nifi/user_scripts/generate_location.py similarity index 85% rename from nifi/user-scripts/generate_location.py rename to nifi/user_scripts/generate_location.py index 5d88f9af7..4eb9426f7 100644 --- a/nifi/user-scripts/generate_location.py +++ b/nifi/user_scripts/generate_location.py @@ -37,21 +37,21 @@ def poly_creator(city: str): def main(): input_stream = sys.stdin.read() + log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME)) + output_stream = [] try: - log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME)) patients = json.loads(input_stream) locations = [poly_creator(location) for location in LOCATIONS.split(",")] - - output_stream = [] for patient in patients: to_append = {} id = patient["_source"][SUBJECT_ID_FIELD_NAME] - idx = randrange(len(locations)) # pick a random location specified - lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) # generate latitude and longitude - + # pick a random location specified + idx = randrange(len(locations)) + # generate latitude and longitude + lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) to_append[SUBJECT_ID_FIELD_NAME] = id to_append[LOCATION_NAME_FIELD] = "POINT (" + str(lon[0]) + " " + str(lat[0]) + ")" output_stream.append(to_append) @@ -62,8 +62,8 @@ def main(): else: with open(log_file_path, "a+") as log_file: log_file.write("\n" + str(traceback.print_exc())) - finally: - return output_stream + return output_stream sys.stdout.write(json.dumps(main())) + diff --git a/nifi/user-scripts/get_files_from_storage.py b/nifi/user_scripts/get_files_from_storage.py similarity index 98% rename from nifi/user-scripts/get_files_from_storage.py rename to nifi/user_scripts/get_files_from_storage.py index f1aefbbb2..2dc9e1de7 100644 --- a/nifi/user-scripts/get_files_from_storage.py +++ b/nifi/user_scripts/get_files_from_storage.py @@ -170,7 +170,8 @@ def get_files_and_metadata(): if generate_pseudo_doc_id is not False: _file_id_dict["document_Pseudo_Id"] = str(uuid.uuid4().hex) - txt_file_df = pandas.concat([txt_file_df, pandas.DataFrame.from_dict([_file_id_dict], orient="columns")]) + txt_file_df = pandas.concat([txt_file_df, + pandas.DataFrame.from_dict([_file_id_dict], orient="columns")]) folders_ingested[root].append(file_id) else: diff --git a/nifi/user_scripts/legacy_scripts/__init__.py b/nifi/user_scripts/legacy_scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user-scripts/legacy_scripts/annotation_creator.py b/nifi/user_scripts/legacy_scripts/annotation_creator.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_creator.py rename to nifi/user_scripts/legacy_scripts/annotation_creator.py diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager.py b/nifi/user_scripts/legacy_scripts/annotation_manager.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_manager.py rename to nifi/user_scripts/legacy_scripts/annotation_manager.py diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager_docs.py b/nifi/user_scripts/legacy_scripts/annotation_manager_docs.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_manager_docs.py rename to nifi/user_scripts/legacy_scripts/annotation_manager_docs.py diff --git a/nifi/user-scripts/legacy_scripts/anonymise_doc.py b/nifi/user_scripts/legacy_scripts/anonymise_doc.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/anonymise_doc.py rename to nifi/user_scripts/legacy_scripts/anonymise_doc.py diff --git a/nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py b/nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py rename to nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py diff --git a/nifi/user-scripts/legacy_scripts/ingest_into_es.py b/nifi/user_scripts/legacy_scripts/ingest_into_es.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/ingest_into_es.py rename to nifi/user_scripts/legacy_scripts/ingest_into_es.py diff --git a/nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py b/nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py rename to nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py diff --git a/nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py b/nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py rename to nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py diff --git a/nifi/user-scripts/legacy_scripts/parse-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-json-to-avro.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-json-to-avro.py rename to nifi/user_scripts/legacy_scripts/parse-json-to-avro.py diff --git a/nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py rename to nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py diff --git a/nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py b/nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py rename to nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py diff --git a/nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py b/nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py rename to nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py diff --git a/nifi/user-scripts/tests/generate_big_ann_file.py b/nifi/user_scripts/tests/generate_big_ann_file.py similarity index 100% rename from nifi/user-scripts/tests/generate_big_ann_file.py rename to nifi/user_scripts/tests/generate_big_ann_file.py diff --git a/nifi/user-scripts/tests/generate_files.py b/nifi/user_scripts/tests/generate_files.py similarity index 100% rename from nifi/user-scripts/tests/generate_files.py rename to nifi/user_scripts/tests/generate_files.py diff --git a/nifi/user-scripts/tests/get_ingested_files.py b/nifi/user_scripts/tests/get_ingested_files.py similarity index 100% rename from nifi/user-scripts/tests/get_ingested_files.py rename to nifi/user_scripts/tests/get_ingested_files.py diff --git a/nifi/user_scripts/tests/nifi/test_nifi.py b/nifi/user_scripts/tests/nifi/test_nifi.py new file mode 100644 index 000000000..0242088a0 --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_nifi.py @@ -0,0 +1,149 @@ +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path + +sys.path.insert(0, "../../") + +import requests +from dto.nifi_api_config import NiFiAPIConfig +from dto.database_config import DatabaseConfig +from dto.service_health import NiFiHealth, DatabaseHealth +from nipyapi import config as nipy_config +from nipyapi import security, versioning +from utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient +from utils.helpers.service import check_postgres + + +class TestServices(unittest.TestCase): + """Service connectivity and health checks.""" + + @classmethod + def setUpClass(cls): + + + # cls.pg_cfg = PGConfig() + # cls.nifi_api_config = NiFiAPIConfig() + # cls.nifi_client = NiFiClient(config=cls.nifi_api_config, login_on_init=False) + # cls.nifi_registry_client = NiFiRegistryClient(config=cls.nifi_api_config) + # cls.pg_config = PGConfig() + # cls.registry_bucket_name = os.environ.get("NIFI_REGISTRY_BUCKET", "cogstack") + # cls.flow_name = "opensearch_ingest_docs_db_to_es" + # cls.template_path = ( + # Path(__file__).resolve().parents[4] + # / "nifi" + # / "user-templates" + # / f"{cls.flow_name}.json" + # ) + # cls.es_hosts = os.environ.get("OPENSEARCH_URLS", "http://localhost:9200") + # cls.es_username = os.environ.get("OPENSEARCH_USERNAME", "admin") + # cls.es_password = os.environ.get("OPENSEARCH_PASSWORD", "admin") + + #@classmethod + #def _configure_nipyapi(cls) -> None: + # """Apply SSL + host config so nipyapi uses the same creds as the raw client.""" + # nipy_config.nifi_config.host = cls.nifi_api_config.nifi_api_url + # nipy_config.registry_config.host = cls.nifi_api_config.nifi_registry_api_url + # + # for cfg in (nipy_config.nifi_config, nipy_config.registry_config): + # cfg.verify_ssl = cls.nifi_api_config.VERIFY_SSL + # cfg.cert_file = cls.nifi_api_config.NIFI_CERT_PEM_PATH + # cfg.key_file = cls.nifi_api_config.NIFI_CERT_KEY_PATH + # cfg.ssl_ca_cert = cls.nifi_api_config.ROOT_CERT_CA_PATH + # + #def _prepare_snapshot_with_env_defaults(self) -> Path: + # """ + # Load the opensearch template and pre-fill controller service properties + # using env/default configs so the flow can start without manual clicks. + # """ + # with self.template_path.open() as fp: + # snapshot = json.load(fp) + # + # db_url = f"jdbc:postgresql://{self.pg_cfg.host}:{self.pg_cfg.port}/{self.pg_cfg.db}" + # + # for controller_service in snapshot.get("flowContents", {}).get("controllerServices", []): + # name = controller_service.get("name") + # properties = controller_service.setdefault("properties", {}) + # + # if name == "DBCPConnectionPool": + # properties["Database Connection URL"] = db_url + # properties["Database User"] = self.pg_cfg.user + # properties["Password"] = self.pg_cfg.password + # elif name == "ElasticSearchClientServiceImpl": + # properties["el-cs-http-hosts"] = self.es_hosts + # properties["el-cs-username"] = self.es_username + # properties["el-cs-password"] = self.es_password + # + # fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="nifi-template-") + # with os.fdopen(fd, "w") as tmp_file: + # json.dump(snapshot, tmp_file) + # + # return Path(tmp_path) + # + #def test_nifi_health(self) -> None: + # result = self.nifi_client._login() + # self.assertTrue(result) + # + #def test_nifi_registry_health(self) -> None: + # result = requests.head( + # url=self.nifi_api_config.nifi_registry_base_url, + # auth=self.nifi_api_config.auth_credentials(), + # cert=self.nifi_api_config.get_nifi_ssl_certs_paths(), + # verify=self.nifi_api_config.ROOT_CERT_CA_PATH, + # timeout=15, + # ) + # self.assertEqual(result.status_code, 200) + # + #def test_postgres_health(self): + # result, latency, err = check_postgres(self.pg_config) + # self.assertTrue(result, f"PostgreSQL unhealthy: {err}") + # print(f"✅ PostgreSQL OK, latency {latency:.2f} ms") + # + #def test_import_opensearch_template_and_configure_controller_services(self) -> None: + # """ + # Bring the opensearch template into the local NiFi Registry bucket and + # patch the controller services so they use local PG/ES credentials. + # """ + # self.assertTrue(self.nifi_client._login()) + # self._configure_nipyapi() + # + # security.service_login( + # service="registry", + # username=self.nifi_api_config.NIFI_USERNAME, + # password=self.nifi_api_config.NIFI_PASSWORD, + # ) + # + # bucket = versioning.get_bucket(self.registry_bucket_name) + # if bucket is None: + # bucket = versioning.create_bucket( + # bucket_name=self.registry_bucket_name, + # bucket_desc="Auto-created for test imports", + # ) + # + # flow = versioning.get_flow_in_bucket( + # bucket_id=bucket.identifier, + # identifier=self.flow_name, + # identifier_type="name", + # ) + # if flow is None: + # flow = versioning.create_flow( + # bucket_id=bucket.identifier, + # flow_name=self.flow_name, + # desc="Auto-imported from user-templates", + # ) + # + # snapshot_path = self._prepare_snapshot_with_env_defaults() + # + # try: + # snapshot = versioning.import_flow_version( + # bucket_id=bucket.identifier, + # flow_id=flow.identifier, + # file_path=str(snapshot_path), + # ) + # finally: + # snapshot_path.unlink(missing_ok=True) + # + # self.assertIsNotNone(snapshot) + # \ No newline at end of file diff --git a/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py new file mode 100644 index 000000000..a26238800 --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py @@ -0,0 +1,25 @@ +import unittest +from io import BytesIO + + +class DummyFlowFile: + def __init__(self, content: str): + self._data = BytesIO(content.encode()) + + def read(self): + return self._data.getvalue() + + def write(self, data): + self._data = BytesIO(data) + return self + +class TestMyProcessor(unittest.TestCase): + def test_uppercase(self): + proc = Proccc() + ff_in = DummyFlowFile("hello nifi") + ff_out = proc.transform({}, ff_in) + + self.assertEqual(ff_out.read().decode(), "HELLO NIFI") + +if __name__ == "__main__": + unittest.main() diff --git a/nifi/user_scripts/tests/nifi/test_services_health.py b/nifi/user_scripts/tests/nifi/test_services_health.py new file mode 100644 index 000000000..2dcbe42f6 --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_services_health.py @@ -0,0 +1,37 @@ +import unittest + +from nifi.user_scripts.dto.database_config import DatabaseConfig +from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig +from nifi.user_scripts.dto.service_health import DatabaseHealth, NiFiHealth +from nifi.user_scripts.utils.generic import get_logger +from nifi.user_scripts.utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient +from nifi.user_scripts.utils.helpers.service import check_postgres + + +class TestServices(unittest.TestCase): + """Service connectivity and health checks.""" + + logger = get_logger(__name__) + + @classmethod + def setUpClass(cls): + cls.nifi_api_config: NiFiAPIConfig = NiFiAPIConfig() + cls.nifi_client: NiFiClient = NiFiClient(config=cls.nifi_api_config, healh_check_on_init=False) + cls.nifi_registry_client: NiFiRegistryClient = NiFiRegistryClient(config=cls.nifi_api_config) + cls.pg_config: DatabaseConfig = DatabaseConfig(port=5554) + + def test_nifi_health(self) -> None: + health: NiFiHealth = self.nifi_client.health_check() + self.assertTrue(health.connected) + self.assertEqual(health.status, "healthy") + + def test_nifi_registry_health(self) -> None: + nifi_health: NiFiHealth = self.nifi_registry_client.health_check() + self.assertTrue(nifi_health.connected) + self.assertEqual(nifi_health.status, "healthy") + + def test_postgres_health(self): + database_health: DatabaseHealth = check_postgres(self.pg_config) + self.assertTrue(database_health.connected) + self.assertEqual(database_health.status, "healthy") + \ No newline at end of file diff --git a/nifi/user-scripts/tests/test_files/ex1.pdf b/nifi/user_scripts/tests/resources/ex1.pdf old mode 100755 new mode 100644 similarity index 100% rename from nifi/user-scripts/tests/test_files/ex1.pdf rename to nifi/user_scripts/tests/resources/ex1.pdf diff --git a/nifi/user_scripts/tests/test_avro.py b/nifi/user_scripts/tests/test_avro.py new file mode 100644 index 000000000..7f893747e --- /dev/null +++ b/nifi/user_scripts/tests/test_avro.py @@ -0,0 +1,61 @@ +import io +import json + +import avro +from avro.datafile import DataFileWriter +from avro.io import DatumWriter + +""" + Use this script to test avro schemas etc with python3 +""" + +stream = object() + +json_mapper_schema = json.loads(open("../user-schemas/cogstack_common_schema_mapping.json").read()) +avro_cogstack_schema = avro.schema.parse(open("../user-schemas/cogstack_common_schema_full.avsc", "rb").read(), validate_enum_symbols=False) + +test_records = [{ "docid" : "1", + "sampleid" : 1041, + "dct" : "2020-05-11 10:52:25.273518", + "binarydoc": "blablabla" }, + { "docid" : "1", + "sampleid" : 1041, + "dct" : "2020-05-11 10:52:25.273518", + "binarydoc": "blablabla" }] + +schema_fields = avro_cogstack_schema.props["fields"] +dict_fields_types = {} +for field in schema_fields: + dict_fields_types[field.name] = "" + tmp_list = json.loads(str(field.type)) + if len(tmp_list) > 1 and type(tmp_list) is not str: + if type(tmp_list[1]) is dict: + dict_fields_types[field.name] = tmp_list[1]["type"] + else: + dict_fields_types[field.name] = tmp_list[1] + else: + dict_fields_types[field.name] = field.type + +available_mapping_keys = {} +for k,v in json_mapper_schema.items(): + if v: + available_mapping_keys[k] = v + +bytes_io = io.BytesIO(bytes("", encoding="UTF-8")) + +type_mapping = {"boolean": "bool", "long": "int", "int": "int", "float" : "float", "byte":"bytes", "string": "str", "double": "float"} + + +print(avro_cogstack_schema) + +with DataFileWriter(bytes_io, DatumWriter(), avro_cogstack_schema) as writer: + # re-map the value to the new keys + + for _record in test_records: + record = {} + + for k, v in available_mapping_keys.items(): + if v in _record.keys(): + record[k] = _record[v] #getattr(__builtins__, type_mapping[dict_fields_types[k]])(_record[v]) + + writer.append(record) diff --git a/nifi/user_scripts/tmp/.gitignore b/nifi/user_scripts/tmp/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user_scripts/utils/__init__.py b/nifi/user_scripts/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user-scripts/utils/cerner_blob.py b/nifi/user_scripts/utils/cerner_blob.py similarity index 100% rename from nifi/user-scripts/utils/cerner_blob.py rename to nifi/user_scripts/utils/cerner_blob.py diff --git a/nifi/user-scripts/utils/ethnicity_map.py b/nifi/user_scripts/utils/ethnicity_map.py similarity index 100% rename from nifi/user-scripts/utils/ethnicity_map.py rename to nifi/user_scripts/utils/ethnicity_map.py diff --git a/nifi/user-scripts/utils/generic.py b/nifi/user_scripts/utils/generic.py similarity index 98% rename from nifi/user-scripts/utils/generic.py rename to nifi/user_scripts/utils/generic.py index 5d5be2ead..49cfcc870 100644 --- a/nifi/user-scripts/utils/generic.py +++ b/nifi/user_scripts/utils/generic.py @@ -79,7 +79,7 @@ def dict2jsonl_file(input_dict: dict | defaultdict, file_path: str) -> None: print('', file=outfile) -def get_logger(name: str) -> logging.Logger: +def get_logger(name: str, propagate: bool = True) -> logging.Logger: """Return a configured logger shared across all NiFi clients.""" level_name = os.getenv("NIFI_LOG_LEVEL", "INFO").upper() level = getattr(logging, level_name, logging.INFO) @@ -94,7 +94,7 @@ def get_logger(name: str) -> logging.Logger: handler.setFormatter(fmt) logger.addHandler(handler) logger.setLevel(level) - logger.propagate = False + logger.propagate = propagate return logger def download_file_from_url(url: str, output_path: str, ssl_verify: bool = False, chunk_size: int = 8192) -> None: diff --git a/nifi/user-scripts/utils/helpers/avro_json_encoder.py b/nifi/user_scripts/utils/helpers/avro_json_encoder.py similarity index 100% rename from nifi/user-scripts/utils/helpers/avro_json_encoder.py rename to nifi/user_scripts/utils/helpers/avro_json_encoder.py diff --git a/nifi/user-scripts/utils/helpers/base_nifi_processor.py b/nifi/user_scripts/utils/helpers/base_nifi_processor.py similarity index 100% rename from nifi/user-scripts/utils/helpers/base_nifi_processor.py rename to nifi/user_scripts/utils/helpers/base_nifi_processor.py diff --git a/nifi/user_scripts/utils/helpers/nifi_api_client.py b/nifi/user_scripts/utils/helpers/nifi_api_client.py new file mode 100644 index 000000000..03aa12ba1 --- /dev/null +++ b/nifi/user_scripts/utils/helpers/nifi_api_client.py @@ -0,0 +1,144 @@ +import time +from logging import Logger + +import requests +from nipyapi import canvas, security +from nipyapi.nifi import ApiClient, ProcessGroupsApi +from nipyapi.nifi.configuration import Configuration as NiFiConfiguration +from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity +from nipyapi.nifi.models.processor_entity import ProcessorEntity +from nipyapi.registry import ApiClient as RegistryApiClient +from nipyapi.registry import BucketsApi +from nipyapi.registry.configuration import Configuration as RegistryConfiguration + +from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig +from nifi.user_scripts.dto.service_health import NiFiHealth +from nifi.user_scripts.utils.generic import get_logger + + +class NiFiRegistryClient: + + def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None: + self.config = config or NiFiAPIConfig() + self.nipyapi_config = RegistryConfiguration() + self.nipyapi_config.host = self.config.nifi_registry_api_url + self.nipyapi_config.verify_ssl = self.config.verify_ssl + self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore + self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore + self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore + + self.logger: Logger = get_logger(self.__class__.__name__) + + self.api_client = RegistryApiClient(self.nipyapi_config.host) + self.buckets_api = BucketsApi(self.api_client) + + def list_buckets(self): + buckets = self.buckets_api.get_buckets() + for b in buckets: + self.logger.info("Bucket: %s (%s)", b.name, b.identifier) + return buckets + + def health_check(self, timeout: int = 15) -> NiFiHealth: + start = time.perf_counter() + nifi_health = NiFiHealth( + service="nifi-registry", + service_info=self.config.nifi_registry_base_url + ) + + try: + response = requests.head( + url=self.config.nifi_registry_base_url, + auth=self.config.auth_credentials(), + cert=self.config.get_nifi_ssl_certs_paths(), + verify=self.config.root_cert_ca_path, + timeout=timeout, + ) + + nifi_health.latency_ms = (time.perf_counter() - start) * 1000 + nifi_health.connected = response.ok + + if response.status_code == 200: + nifi_health.status = "healthy" + self.logger.info(f"✅ Logged in to NiFi Registry, latency {nifi_health.latency_ms:.2f} ms") + else: + nifi_health.status = "unhealthy" + nifi_health.message = f"❌ Unexpected status code {response.status_code}" + + except Exception as exc: + nifi_health.latency_ms = (time.perf_counter() - start) * 1000 + nifi_health.message = str(exc) + self.logger.info("❌ Failed to log in to NiFi: %s", exc) + + return nifi_health + + +class NiFiClient: + + def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None: + self.config = config or NiFiAPIConfig() + self.nipyapi_config = NiFiConfiguration() + self.nipyapi_config.host = self.config.nifi_api_url + self.nipyapi_config.verify_ssl = self.config.verify_ssl + self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore + self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore + self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore + + self.logger: Logger = get_logger(self.__class__.__name__) + + self.api_client = ApiClient(self.nipyapi_config) + self.process_group_api = ProcessGroupsApi(self.api_client) + + if healh_check_on_init: + self.health_check() + + def health_check(self) -> NiFiHealth: + start = time.perf_counter() + nifi_health = NiFiHealth( + service="nifi", + service_info=self.config.nifi_api_url + ) + + try: + result = security.service_login( + service='nifi', + username=self.config.nifi_username, + password=self.config.nifi_password + ) + + nifi_health.connected = bool(result) + nifi_health.latency_ms = (time.perf_counter() - start) * 1000 + + if result: + nifi_health.status = "healthy" + self.logger.info(f"✅ Logged in to NiFi, latency {nifi_health.latency_ms:.2f} ms") + else: + nifi_health.message = "Authentication returned False" + self.logger.info("❌ Failed to log in to NiFi") + + except Exception as exc: + nifi_health.message = str(exc) + self.logger.info("❌ Failed to log in to NiFi: %s", exc) + + return nifi_health + + def get_root_process_group_id(self) -> str: + return canvas.get_root_pg_id() + + def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object: + return canvas.get_process_group(process_group_name, identifier_type="nam") + + def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity: + return canvas.get_process_group(process_group_id, identifier_type="id") + + def start_process_group(self, process_group_id: str) -> bool: + return canvas.schedule_process_group(process_group_id, True) + + def stop_process_group(self, process_group_id: str) -> bool: + return canvas.schedule_process_group(process_group_id, False) + + def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]: + parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id") + return canvas.list_all_process_groups(parent_pg.id) + + def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]: + return canvas.list_all_processors(process_group_id) diff --git a/nifi/user_scripts/utils/helpers/service.py b/nifi/user_scripts/utils/helpers/service.py new file mode 100644 index 000000000..f255e5665 --- /dev/null +++ b/nifi/user_scripts/utils/helpers/service.py @@ -0,0 +1,49 @@ +import time + +import psycopg + +from nifi.user_scripts.dto.database_config import DatabaseConfig +from nifi.user_scripts.dto.service_health import DatabaseHealth +from nifi.user_scripts.utils.generic import get_logger + +logger = get_logger(__name__) + + +def check_postgres(config: DatabaseConfig) -> DatabaseHealth: + + start = time.perf_counter() + database_health = DatabaseHealth(service="cogstack-samples-db", + db_name=config.database_name, + version=None + ) + + try: + with psycopg.connect( + host=config.host, + port=config.port, + user=config.username, + password=config.password.get_secret_value(), + dbname=config.database_name, + connect_timeout=config.timeout, + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT version();") + result = cursor.fetchone() + + latency = (time.perf_counter() - start) * 1000 + database_health.latency_ms = latency + if result and result[0]: + database_health.version = result[0] + database_health.status = "healthy" + database_health.connected = True + logger.info(f"✅ PostgreSQL OK, latency {database_health.latency_ms:.2f} ms") + else: + database_health.message = "No version returned from database" + database_health.status = "unhealthy" + database_health.connected = True + + except Exception as e: + database_health.message = str(e) + database_health.status = "unhealthy" + database_health.connected = False + logger.info("❌ Failed to query PostgreSQLi: %s", str(e)) + return database_health diff --git a/nifi/user_scripts/utils/lint_env.py b/nifi/user_scripts/utils/lint_env.py new file mode 100644 index 000000000..8918e9152 --- /dev/null +++ b/nifi/user_scripts/utils/lint_env.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" + Lightweight env file validator used by deploy/export_env_vars.sh. +""" + +from __future__ import annotations + +import sys +from collections.abc import Iterable +from pathlib import Path + +PORT_SUFFIXES = ("_PORT", "_OUTPUT_PORT", "_INPUT_PORT") +BOOL_SUFFIXES = ("_ENABLED", "_SSL_ENABLED", "_BAKE") +BOOL_VALUES = {"true", "false", "1", "0", "yes", "no", "on", "off"} + + +def strip_quotes(value: str) -> str: + if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")): + return value[1:-1] + return value + + +def parse_env_file(path: Path) -> tuple[list[str], list[str], list[tuple[str, str, int]]]: + errors: list[str] = [] + warnings: list[str] = [] + entries: list[tuple[str, str, int]] = [] + + for lineno, raw_line in enumerate(path.read_text().splitlines(), start=1): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + if line.startswith("export "): + line = line[len("export ") :].strip() + + if "=" not in line: + errors.append(f"{path}:{lineno}: missing '=' (got: {raw_line})") + continue + + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + + if not key: + errors.append(f"{path}:{lineno}: empty key (got: {raw_line})") + continue + + entries.append((key, value, lineno)) + + seen = {} + for key, _, lineno in entries: + if key in seen: + warnings.append(f"{path}:{lineno}: duplicate key '{key}' (also on line {seen[key]})") + else: + seen[key] = lineno + + return errors, warnings, entries + + +def validate_entries(path: Path, entries: Iterable[tuple[str, str, int]]) -> list[str]: + errors: list[str] = [] + + for key, value, lineno in entries: + normalized = strip_quotes(value) + + if any(key.endswith(suffix) for suffix in PORT_SUFFIXES): + if not normalized.isdigit(): + errors.append(f"{path}:{lineno}: '{key}' should be an integer port (got '{value}')") + + if any(key.endswith(suffix) for suffix in BOOL_SUFFIXES): + if normalized.lower() not in BOOL_VALUES: + errors.append( + f"{path}:{lineno}: '{key}' should be one of {sorted(BOOL_VALUES)} (got '{value}')" + ) + + return errors + + +def main(args: list[str]) -> int: + if not args: + script = Path(__file__).name + print(f"Usage: {script} [ ...]") + return 1 + + warnings: list[str] = [] + errors: list[str] = [] + checked_files = 0 + + for path_str in args: + path = Path(path_str).resolve() + if not path.exists(): + warnings.append(f"Skipping missing env file: {path}") + continue + + checked_files += 1 + parse_errors, parse_warnings, entries = parse_env_file(path) + errors.extend(parse_errors) + warnings.extend(parse_warnings) + errors.extend(validate_entries(path, entries)) + + for warning in warnings: + print(f"⚠️ {warning}") + + if errors: + print("❌ Env validation failed:") + for err in errors: + print(f" - {err}") + return 1 + + print(f"✅ Env validation passed ({checked_files} files checked)") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/nifi/user-scripts/utils/pgsql_query.py b/nifi/user_scripts/utils/pgsql_query.py similarity index 71% rename from nifi/user-scripts/utils/pgsql_query.py rename to nifi/user_scripts/utils/pgsql_query.py index 12e32d96e..33c639f2f 100644 --- a/nifi/user-scripts/utils/pgsql_query.py +++ b/nifi/user_scripts/utils/pgsql_query.py @@ -1,6 +1,6 @@ -import psycopg2 +import psycopg -conn = psycopg2.connect( +conn = psycopg.connect( host="localhost", database="suppliers", user="YourUsername", diff --git a/nifi/user-scripts/utils/sqlite_query.py b/nifi/user_scripts/utils/sqlite_query.py similarity index 100% rename from nifi/user-scripts/utils/sqlite_query.py rename to nifi/user_scripts/utils/sqlite_query.py diff --git a/nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml b/nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml similarity index 100% rename from nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml rename to nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml diff --git a/nifi/user-templates/dt4h/raw_ingest_dt4h.xml b/nifi/user_templates/dt4h/raw_ingest_dt4h.xml similarity index 100% rename from nifi/user-templates/dt4h/raw_ingest_dt4h.xml rename to nifi/user_templates/dt4h/raw_ingest_dt4h.xml diff --git a/nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml b/nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml similarity index 100% rename from nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml rename to nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml diff --git a/nifi/user-templates/legacy/Common_schema_example_ingest.xml b/nifi/user_templates/legacy/Common_schema_example_ingest.xml similarity index 100% rename from nifi/user-templates/legacy/Common_schema_example_ingest.xml rename to nifi/user_templates/legacy/Common_schema_example_ingest.xml diff --git a/nifi/user-templates/legacy/DEID_sample_pipeline.xml b/nifi/user_templates/legacy/DEID_sample_pipeline.xml similarity index 100% rename from nifi/user-templates/legacy/DEID_sample_pipeline.xml rename to nifi/user_templates/legacy/DEID_sample_pipeline.xml diff --git a/nifi/user-templates/legacy/Generate_location_ES.xml b/nifi/user_templates/legacy/Generate_location_ES.xml similarity index 100% rename from nifi/user-templates/legacy/Generate_location_ES.xml rename to nifi/user_templates/legacy/Generate_location_ES.xml diff --git a/nifi/user-templates/legacy/Grab_non_annotated_docs.xml b/nifi/user_templates/legacy/Grab_non_annotated_docs.xml similarity index 100% rename from nifi/user-templates/legacy/Grab_non_annotated_docs.xml rename to nifi/user_templates/legacy/Grab_non_annotated_docs.xml diff --git a/nifi/user-templates/legacy/HealTAC_23.xml b/nifi/user_templates/legacy/HealTAC_23.xml similarity index 100% rename from nifi/user-templates/legacy/HealTAC_23.xml rename to nifi/user_templates/legacy/HealTAC_23.xml diff --git a/nifi/user-templates/legacy/OS_annotate_per_doc.xml b/nifi/user_templates/legacy/OS_annotate_per_doc.xml similarity index 100% rename from nifi/user-templates/legacy/OS_annotate_per_doc.xml rename to nifi/user_templates/legacy/OS_annotate_per_doc.xml diff --git a/nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml diff --git a/nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml b/nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml similarity index 100% rename from nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml rename to nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml diff --git a/nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json b/nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json rename to nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json diff --git a/nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json rename to nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json diff --git a/nifi/user-templates/opensearch_ingest_docs_db_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_ingest_docs_db_to_es.json rename to nifi/user_templates/opensearch_ingest_docs_db_to_es.json diff --git a/pyproject.toml b/pyproject.toml index 2f17afdc7..342cec353 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.ruff] line-length = 120 -exclude = ["nifi/user-scripts/legacy_scripts"] +exclude = ["nifi/user_scripts/legacy_scripts"] target-version = "py311" indent-width = 4 @@ -25,15 +25,23 @@ fixable = ["ALL"] [tool.mypy] plugins = ["pydantic.mypy"] +python_version = "3.11" ignore_missing_imports = true strict = false files = "." mypy_path = "./typings/" +warn_unused_configs = true -[tool.isort] -line_length = 120 -skip = ["venv", "venv-test", "envs", "docker", "models"] +[tool.setuptools.packages.find] +include = ["nifi*"] +exclude = ["*egg-info*", "nifi/conf*", "nifi/drivers*", "nifi/user_schemas*", "nifi/user_templates*"] + +[project] +name = "cogstack_nifi" +version = "0.1.0" +requires-python = ">=3.11" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" -[tool.flake8] -max-line-length = 120 -exclude = ["venv", "venv-test", "envs", "docker", "models"] diff --git a/security/env/users_database.env b/security/env/users_database.env index 9da92b17c..b2140eccf 100644 --- a/security/env/users_database.env +++ b/security/env/users_database.env @@ -3,8 +3,8 @@ POSTGRES_USER_SAMPLES=test POSTGRES_PASSWORD_SAMPLES=test # Production DATABASE user -POSTGRES_USER=admin -POSTGRES_PASSWORD=admin +DATABASE_USER=admin +DATABASE_PASSWORD=admin # Production DATABASE MSSQL user MSSQL_SA_USER=sa diff --git a/security/env/users_elasticsearch.env b/security/env/users_elasticsearch.env index 918334a3f..3ba87ca1a 100644 --- a/security/env/users_elasticsearch.env +++ b/security/env/users_elasticsearch.env @@ -42,4 +42,3 @@ ES_LOGSTASH_PASS=kibanaserver ES_KIBANARO_PASS=kibanaserver ES_READALL_PASS=kibanaserver ES_SNAPSHOTRESTORE_PASS=kibanaserver -