diff --git a/.env.template b/.env.template index 286c3cda..85567f6f 100644 --- a/.env.template +++ b/.env.template @@ -13,7 +13,7 @@ SPEECH_SYNTHESIS_LANGUAGE="Add the text-to-speech synthesis language. Ex: es-MX" SPEECH_SYNTHESIS_VOICE_NAME="Add the neural voice name for speech synthesis. Ex: es-MX-BeatrizNeural" # Document storage settings -STORAGE_ACCOUNT="Add the Azure Storage account name for document storage" +STORAGE_ACCOUNT_NAME="Add the Azure Storage account name for document storage" # Logging configuration LOGLEVEL="Add logging level. Ex: DEBUG, INFO, WARNING, ERROR" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..077a2012 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Use the latest LTS version of Node.js +FROM node:18-alpine + +RUN apk add g++ make python3 py3-pip git + +# Set the working directory inside the container +WORKDIR /temp + +# Copy the rest of your application files +COPY frontend/ . + +# Install dependencies +RUN npm install +RUN npm run build + +RUN python3 -m venv ~/pyvenv --system-site-packages +RUN . ~/pyvenv/bin/activate +RUN pip install --upgrade pip --break-system-packages + +WORKDIR /app + +RUN cp -r /backend /app + +COPY backend/ . + +ADD backend/host-keys.json /runtime/secrets/host.json + +ENV FUNCTIONS_SECRETS_PATH=/runtime/secrets + +#USER ContainerAdministrator +#RUN icacls "/runtime/secrets" /t /grant Users:M +#USER ContainerUser +#ENV AzureWebJobsSecretStorageType=files + +RUN pip install -r requirements.txt --break-system-packages + +# Expose the port your app runs on +EXPOSE 8000 + +# Define the command to run your app +CMD ["python3", "app.py"] +#CMD ["gunicorn", "-c", "python:config.gunicorn", "backend.app:create_app()"] \ No newline at end of file diff --git a/backend/app.py b/backend/app.py index 4c2c8ed5..97fb2a6a 100644 --- a/backend/app.py +++ b/backend/app.py @@ -6,7 +6,6 @@ import uuid import requests -from azure.identity import ManagedIdentityCredential, AzureCliCredential, ChainedTokenCredential from azure.storage.blob import BlobServiceClient from dotenv import load_dotenv from flask import Flask, Response, jsonify, request, session, redirect, url_for @@ -15,39 +14,40 @@ from flask_session import Session from werkzeug.middleware.proxy_fix import ProxyFix -# Import the asynchronous secret retrieval function -from keyvault import get_secret - load_dotenv() +from configuration import Configuration +config = Configuration() + # Helper functions for reading environment variables def read_env_variable(var_name, default=None): - value = os.getenv(var_name, default) + value = config.get_value(var_name, default) return value.strip() if value else default def read_env_list(var_name): - value = os.getenv(var_name, "") + value = config.get_value(var_name, "") return [item.strip() for item in value.split(",") if item.strip()] def read_env_boolean(var_name, default=False): - value = os.getenv(var_name, str(default)).strip().lower() + value = config.get_value(var_name, str(default)).strip().lower() return value in ['true', '1', 'yes'] # Read Environment Variables SPEECH_REGION = read_env_variable('SPEECH_REGION') ORCHESTRATOR_ENDPOINT = read_env_variable('ORCHESTRATOR_ENDPOINT') -STORAGE_ACCOUNT = read_env_variable('STORAGE_ACCOUNT') -LOGLEVEL = read_env_variable('LOGLEVEL', 'INFO').upper() +STORAGE_ACCOUNT_NAME = read_env_variable('STORAGE_ACCOUNT_NAME') +LOGLEVEL = read_env_variable('LOGLEVEL', 'DEBUG').upper() +LOGLEVEL = getattr(logging, LOGLEVEL, logging.INFO) # MSAL / OIDC configuration for custom authentication ENABLE_AUTHENTICATION = read_env_boolean('ENABLE_AUTHENTICATION') FORWARD_ACCESS_TOKEN_TO_ORCHESTRATOR = read_env_boolean('FORWARD_ACCESS_TOKEN_TO_ORCHESTRATOR') OTHER_AUTH_SCOPES = read_env_list('OTHER_AUTH_SCOPES') -CLIENT_ID = os.getenv("CLIENT_ID", "your_client_id") -APP_SERVICE_CLIENT_SECRET_NAME = os.getenv("APP_SERVICE_CLIENT_SECRET_NAME", "appServiceClientSecretKey") -FLASK_SECRET_KEY_NAME = os.getenv("FLASK_SECRET_KEY_NAME", "flaskSecretKey") -AUTHORITY = os.getenv("AUTHORITY", "https://login.microsoftonline.com/your_tenant_id") -REDIRECT_PATH = os.getenv("REDIRECT_PATH", "/getAToken") # Must match the Azure AD app registration redirect URI. +CLIENT_ID = config.get_value("CLIENT_ID", "your_client_id") +APP_SERVICE_CLIENT_SECRET_NAME = config.get_value("APP_SERVICE_CLIENT_SECRET_NAME", "appServiceClientSecretKey") +FLASK_SECRET_KEY_NAME = config.get_value("FLASK_SECRET_KEY_NAME", "flaskSecretKey") +AUTHORITY = config.get_value("AUTHORITY", "https://login.microsoftonline.com/your_tenant_id") +REDIRECT_PATH = config.get_value("REDIRECT_PATH", "/getAToken") # Must match the Azure AD app registration redirect URI. SCOPE = [ "User.Read" ] @@ -68,42 +68,43 @@ def read_env_boolean(var_name, default=False): # Load secrets from Key Vault using the asynchronous function at startup. # This avoids having to call asyncio.run() repeatedly in your helper functions. # ------------------------------------------------------------------------------ -FLASK_SECRET_KEY = get_secret(FLASK_SECRET_KEY_NAME) -APP_SERVICE_CLIENT_SECRET = get_secret(APP_SERVICE_CLIENT_SECRET_NAME) +FLASK_SECRET_KEY = read_env_variable(FLASK_SECRET_KEY_NAME) +APP_SERVICE_CLIENT_SECRET = read_env_variable(APP_SERVICE_CLIENT_SECRET_NAME) # Obtain the token using Managed Identity def get_managed_identity_token(): - credential = ChainedTokenCredential( - ManagedIdentityCredential(), - AzureCliCredential() - ) - token = credential.get_token("https://management.azure.com/.default").token + token = config.credential.get_token("https://management.azure.com/.default").token return token def get_function_key(): - subscription_id = os.getenv('AZURE_SUBSCRIPTION_ID') - resource_group = os.getenv('AZURE_RESOURCE_GROUP_NAME') - function_app_name = os.getenv('AZURE_ORCHESTRATOR_FUNC_NAME') - token = get_managed_identity_token() - logging.info("[webbackend] Obtaining function key.") - - # URL to get all function keys, including the default one - requestUrl = f"https://management.azure.com/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Web/sites/{function_app_name}/functions/orc/listKeys?api-version=2022-03-01" - - requestHeaders = { - "Authorization": f"Bearer {token}", - "Content-Type": "application/json" - } - - response = requests.post(requestUrl, headers=requestHeaders) - response_json = json.loads(response.content.decode('utf-8')) - - try: - # Assuming you want to get the 'default' key - function_key = response_json['default'] - except KeyError as e: - function_key = None - logging.error(f"[webbackend] Error when getting function key. Details: {str(e)}.") + + function_key = config.get_value('AZURE_ORCHESTRATOR_FUNC_KEY') + + if (function_key == None) or (function_key == ""): + + subscription_id = config.get_value('AZURE_SUBSCRIPTION_ID') + resource_group = config.get_value('AZURE_RESOURCE_GROUP_NAME') + function_app_name = config.get_value('AZURE_ORCHESTRATOR_FUNC_NAME') + token = get_managed_identity_token() + logging.info("[webbackend] Obtaining function key.") + + # URL to get all function keys, including the default one + requestUrl = f"https://management.azure.com/subscriptions/{subscription_id}/resourceGroups/{resource_group}/providers/Microsoft.Web/sites/{function_app_name}/functions/orc/listKeys?api-version=2022-03-01" + + requestHeaders = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + } + + response = requests.post(requestUrl, headers=requestHeaders) + response_json = json.loads(response.content.decode('utf-8')) + + try: + # Assuming you want to get the 'default' key + function_key = response_json['default'] + except KeyError as e: + function_key = None + logging.error(f"[webbackend] Error when getting function key. Details: {str(e)}.") return function_key @@ -350,11 +351,24 @@ def chatgpt(): payload['access_token'] = access_token headers = { - 'Content-Type': 'application/json', - 'x-functions-key': function_key + 'Content-Type': 'application/json' } + + if function_key != None: + headers['x-functions-key'] = function_key + logging.info(f"[webbackend] calling orchestrator at: {ORCHESTRATOR_ENDPOINT}") response = requests.post(url, headers=headers, json=payload) + + if (response.status_code != 200): + logging.error(f"[webbackend] Error from orchestrator: {response.status_code} - {response.content}") + response = { + "answer": "Error in application backend.", + "thoughts": "", + "conversation_id": conversation_id + } + return jsonify(response) + logging.info(f"[webbackend] response: {response.text[:100]}...") return response.text except Exception as e: @@ -395,10 +409,10 @@ def getGptSpeechToken(): @app.route("/api/get-storage-account", methods=["GET"]) def getStorageAccount(): - if not STORAGE_ACCOUNT: - return jsonify({"error": "Add STORAGE_ACCOUNT to frontend app settings"}), 500 + if not STORAGE_ACCOUNT_NAME: + return jsonify({"error": "Add STORAGE_ACCOUNT_NAME to frontend app settings"}), 500 try: - return json.dumps({'storageaccount': STORAGE_ACCOUNT}) + return json.dumps({'storageaccount': STORAGE_ACCOUNT_NAME}) except Exception as e: logging.exception("[webbackend] exception in /api/get-storage-account") return jsonify({"error": str(e)}), 500 @@ -408,13 +422,9 @@ def getBlob(): blob_name = unquote(request.json["blob_name"]) logging.info(f"Starting getBlob function for blob: {blob_name}") try: - client_credential = ChainedTokenCredential( - ManagedIdentityCredential(), - AzureCliCredential() - ) blob_service_client = BlobServiceClient( - f"https://{STORAGE_ACCOUNT}.blob.core.windows.net", - client_credential + f"https://{STORAGE_ACCOUNT_NAME}.blob.core.windows.net", + config.credential ) blob_client = blob_service_client.get_blob_client(container='documents', blob=blob_name) blob_data = blob_client.download_blob() @@ -427,4 +437,4 @@ def getBlob(): return jsonify({"error": str(e)}), 500 if __name__ == "__main__": - app.run(host='0.0.0.0', port=8000) + app.run(host='0.0.0.0', port=8000) \ No newline at end of file diff --git a/backend/config.py b/backend/config.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/configuration/__init__.py b/backend/configuration/__init__.py new file mode 100644 index 00000000..fdd1e28e --- /dev/null +++ b/backend/configuration/__init__.py @@ -0,0 +1 @@ +from .configuration import Configuration \ No newline at end of file diff --git a/backend/configuration/configuration.py b/backend/configuration/configuration.py new file mode 100644 index 00000000..254da70b --- /dev/null +++ b/backend/configuration/configuration.py @@ -0,0 +1,112 @@ +import os +import logging +from azure.identity import DefaultAzureCredential +from azure.appconfiguration.provider import ( + AzureAppConfigurationKeyVaultOptions, + load +) + +from tenacity import retry, wait_random_exponential, stop_after_attempt, RetryError + +class Configuration: + + credential = None + + def __init__(self): + + try: + self.tenant_id = os.environ.get('AZURE_TENANT_ID', "*") + except Exception as e: + raise e + + self.credential = DefaultAzureCredential( + additionally_allowed_tenants=self.tenant_id, + exclude_environment_credential=True, + exclude_managed_identity_credential=False, + exclude_cli_credential=False, + exclude_powershell_credential=True, + exclude_shared_token_cache_credential=True, + exclude_developer_cli_credential=True, + exclude_interactive_browser_credential=True + ) + + try: + app_config_uri = os.environ['APP_CONFIGURATION_URI'] + self.config = load(endpoint=app_config_uri, credential=self.credential,key_vault_options=AzureAppConfigurationKeyVaultOptions(credential=self.credential)) + except Exception as e: + try: + logging.log("error", f"Unable to connect to Azure App Configuration. Please check APP_CONFIGURATION_URI setting. {e}") + connection_string = os.environ["AZURE_APPCONFIG_CONNECTION_STRING"] + # Connect to Azure App Configuration using a connection string. + self.config = load(connection_string=connection_string, key_vault_options=AzureAppConfigurationKeyVaultOptions(credential=self.credential)) + except Exception as e: + raise Exception(f"Unable to connect to Azure App Configuration. Please check your connection string or endpoint. {e}") + + # Connect to Azure App Configuration. + + def get_value(self, key: str, default: str = None) -> str: + + if key is None: + raise Exception('The key parameter is required for get_value().') + + value = None + + allow_env_vars = False + if "allow_environment_variables" in os.environ: + allow_env_vars = bool(os.environ[ + "allow_environment_variables" + ]) + + if allow_env_vars is True: + value = os.environ.get(key) + + if value is None: + try: + value = self.get_config_with_retry(name=key) + except Exception as e: + pass + + if value is not None: + return value + else: + if default is not None: + return default + + raise Exception(f'The configuration variable {key} not found.') + + def retry_before_sleep(self, retry_state): + # Log the outcome of each retry attempt. + message = f"""Retrying {retry_state.fn}: + attempt {retry_state.attempt_number} + ended with: {retry_state.outcome}""" + if retry_state.outcome.failed: + ex = retry_state.outcome.exception() + message += f"; Exception: {ex.__class__.__name__}: {ex}" + if retry_state.attempt_number < 1: + logging.info(message) + else: + logging.warning(message) + + @retry( + wait=wait_random_exponential(multiplier=1, max=5), + stop=stop_after_attempt(5), + before_sleep=retry_before_sleep + ) + def get_config_with_retry(self, name): + try: + return self.config[name] + except RetryError: + pass + + # Helper functions for reading environment variables + def read_env_variable(self, var_name, default=None): + value = self.get_value(var_name, default) + return value.strip() if value else default + + def read_env_list(self, var_name): + value = self.get_value(var_name, "") + return [item.strip() for item in value.split(",") if item.strip()] + + def read_env_boolean(self, var_name, default=False): + value = self.get_value(var_name, str(default)).strip().lower() + return value in ['true', '1', 'yes'] \ No newline at end of file diff --git a/backend/host-keys.json b/backend/host-keys.json new file mode 100644 index 00000000..d4cf6a27 --- /dev/null +++ b/backend/host-keys.json @@ -0,0 +1,14 @@ +{ + "masterKey": { + "name": "master", + "value": "test", + "encrypted": false + }, + "functionKeys": [ + { + "name": "default", + "value": "test", + "encrypted": false + } + ] +} \ No newline at end of file diff --git a/backend/keyvault.py b/backend/keyvault.py deleted file mode 100644 index 9035220f..00000000 --- a/backend/keyvault.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import logging -import re -from azure.identity.aio import ManagedIdentityCredential, AzureCliCredential, ChainedTokenCredential -from azure.keyvault.secrets.aio import SecretClient as AsyncSecretClient -from azure.identity import ManagedIdentityCredential, AzureCliCredential, ChainedTokenCredential -from azure.keyvault.secrets import SecretClient -from azure.core.exceptions import ResourceNotFoundError, ClientAuthenticationError - -########################################################## -# KEY VAULT -########################################################## - -async def async_get_secret(secretName): - try: - keyVaultName = os.environ["AZURE_KEY_VAULT_NAME"] - KVUri = f"https://{keyVaultName}.vault.azure.net" - async with ChainedTokenCredential( - ManagedIdentityCredential(), - AzureCliCredential() - ) as credential: - async with AsyncSecretClient(vault_url=KVUri, credential=credential) as client: - retrieved_secret = await client.get_secret(secretName) - value = retrieved_secret.value - return value - except KeyError: - logging.info("Environment variable AZURE_KEY_VAULT_NAME not found.") - return None - except ClientAuthenticationError: - logging.info("Authentication failed. Please check your credentials.") - return None - except ResourceNotFoundError: - logging.info(f"Secret '{secretName}' not found in the Key Vault.") - return None - except Exception as e: - logging.info(f"An unexpected error occurred: {e}") - return None - - -def get_secret(secretName): - try: - keyVaultName = os.environ["AZURE_KEY_VAULT_NAME"] - KVUri = f"https://{keyVaultName}.vault.azure.net" - - # Create the chained credential using synchronous classes. - credential = ChainedTokenCredential( - ManagedIdentityCredential(), - AzureCliCredential() - ) - - # Create and use the SecretClient within a context manager. - with SecretClient(vault_url=KVUri, credential=credential) as client: - retrieved_secret = client.get_secret(secretName) - value = retrieved_secret.value - - return value - - except KeyError: - logging.info("Environment variable AZURE_KEY_VAULT_NAME not found.") - return None - except ClientAuthenticationError: - logging.info("Authentication failed. Please check your credentials.") - return None - except ResourceNotFoundError: - logging.info(f"Secret '{secretName}' not found in the Key Vault.") - return None - except Exception as e: - logging.info(f"An unexpected error occurred: {e}") - return None diff --git a/backend/requirements.txt b/backend/requirements.txt index e52244d7..59b0cadf 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -7,4 +7,7 @@ python-dotenv==1.0.0 azure-identity azure-keyvault-secrets azure-storage-blob==12.19.0 +azure-appconfiguration==1.7.1 +azure-appconfiguration-provider==2.0.1 Flask-Session==0.8.0 +gunicorn diff --git a/frontend/src/api/api.ts b/frontend/src/api/api.ts index 5851e351..125e6d16 100644 --- a/frontend/src/api/api.ts +++ b/frontend/src/api/api.ts @@ -43,7 +43,7 @@ export function getCitationFilePath(citation: string): string { xhr.send(); if (xhr.status > 299) { - console.log("Please check if STORAGE_ACCOUNT is in frontend app settings"); + console.log("Please check if STORAGE_ACCOUNT_NAME is in frontend app settings"); return storage_account } else { const parsedResponse = JSON.parse(xhr.responseText);