diff --git a/.cspell.json b/.cspell.json index 1a47ffe0..3f79c94b 100644 --- a/.cspell.json +++ b/.cspell.json @@ -4,7 +4,7 @@ "cache": { "useCache": false }, - "ignorePaths": ["node_modules/**", "**/node_modules/**", "packages/**", "**/packages/**", "vendor/**", "**/vendor/**", "dist/**", "build/**", ".git/**", "**/.terraform/**", "**/.terraform.lock.hcl", ".vscode/**", ".copilot-tracking/**", ".github/copilot-instructions.md", ".github/instructions/**", ".github/prompts/**", ".github/agents/**", "venv/**", "**/*.min.js", "**/*.min.css", "package-lock.json", "**/package-lock.json", "Cargo.lock", "**/Cargo.lock"], + "ignorePaths": ["node_modules/**", "**/node_modules/**", "packages/**", "**/packages/**", "vendor/**", "**/vendor/**", "dist/**", "build/**", ".git/**", "**/.terraform/**", "**/.terraform.lock.hcl", ".vscode/**", ".copilot-tracking/**", ".github/copilot-instructions.md", ".github/instructions/**", ".github/prompts/**", ".github/agents/**", "venv/**", "**/*.min.js", "**/*.min.css", "package-lock.json", "**/package-lock.json", "**/packages.lock.json", "Cargo.lock", "**/Cargo.lock"], "ignoreRegExpList": ["/#.*/g", "/^authors?:.*(?:\\r?\\n\\s*-.*)*$/gmi"], "dictionaryDefinitions": [ { diff --git a/.cspell/project-specific.txt b/.cspell/project-specific.txt index 537e5d28..bc26ab4f 100644 --- a/.cspell/project-specific.txt +++ b/.cspell/project-specific.txt @@ -2,14 +2,18 @@ AADSTS Burstable COMMITMSG DCAM +EOBS Fanuc GHCP Hikvision Keycloak Linfa Multimodal +Obtenez Ollama +Ouvrir SARIF +Standardised TMDL WIQL Workback @@ -32,12 +36,14 @@ azureuser bicepconfig bicepparam bluenviron +chipsets cloudapp colorbars commitish conseils corax curlimages +denoising dimproducts dimstore dlqc @@ -45,6 +51,7 @@ docsmcp docstool docstrings dorny +easyops edgeai edgeserver efrecon @@ -66,6 +73,8 @@ isengineering jointable jspx kalypso +leakdet +libonnxruntime libopencv logissue managedidentity @@ -73,6 +82,7 @@ mcpservers mediamtx minioadmin mobilenet +mobilenetv mqttui myuniqueeventhub namespacing @@ -83,6 +93,7 @@ notebookutils ocation octocat onelake +optimises orientationmeasure oxsecurity pipx @@ -124,6 +135,7 @@ testpassword tfstate tftest timescaledb +tinyyolov toolkits traceidratio ullaakut @@ -135,16 +147,5 @@ workstreams wowza xychart yolov -acsalocalsharedtestfile -asyncua -conseils -denoising -émojis -interactifs -libonnxruntime -mobilenetv -Obtenez -Ouvrir -tinyyolov -ullaakut youracr +émojis diff --git a/.npmrc b/.npmrc index 61655dc0..61ed3138 100644 --- a/.npmrc +++ b/.npmrc @@ -15,4 +15,6 @@ package-lock=true # Use color in npm output color=true # Set log level to warn by default -loglevel=warn \ No newline at end of file +loglevel=warn +# Disable postinstall scripts for supply chain security hardening +ignore-scripts=true diff --git a/.ruff.toml b/.ruff.toml index 2dfa3672..d360d92c 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -6,6 +6,7 @@ select = ["E", "F", "W", "I", "N", "UP", "S", "B", "A", "C4"] [lint.per-file-ignores] "**/tests/**" = ["S101", "S108", "S603", "S607"] +"**/test_*.py" = ["S101"] "**/*simulator*/**" = ["S311", "S104"] "**/*simulator*.py" = ["S311", "S104"] "**/services/**/app.py" = ["S311", "S104"] diff --git a/blueprints/azure-local/terraform/README.md b/blueprints/azure-local/terraform/README.md index ce57141b..c486f3cd 100644 --- a/blueprints/azure-local/terraform/README.md +++ b/blueprints/azure-local/terraform/README.md @@ -56,7 +56,7 @@ Deploys the cloud and edge resources required to run Azure IoT Operations on an | azure\_local\_control\_plane\_count | Number of control plane nodes for Azure Local cluster | `number` | `1` | no | | azure\_local\_control\_plane\_vm\_size | VM size for control plane nodes in Azure Local cluster | `string` | `"Standard_A4_v2"` | no | | azure\_local\_node\_pool\_count | Number of worker nodes in the default node pool for Azure Local cluster | `number` | `1` | no | -| azure\_local\_node\_pool\_vm\_size | VM size for worker nodes in Azure Local cluster | `string` | `"Standard_D8s_v3"` | no | +| azure\_local\_node\_pool\_vm\_size | VM size for worker nodes in Azure Local cluster | `string` | `"Standard_D8s_v6"` | no | | azure\_local\_pod\_cidr | CIDR range for Kubernetes pods in Azure Local cluster | `string` | `"10.244.0.0/16"` | no | | custom\_locations\_oid | Resource ID of the custom location for the Azure Stack HCI cluster | `string` | `null` | no | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | diff --git a/blueprints/azure-local/terraform/main.tf b/blueprints/azure-local/terraform/main.tf index 8cb8031e..33ba2456 100644 --- a/blueprints/azure-local/terraform/main.tf +++ b/blueprints/azure-local/terraform/main.tf @@ -58,6 +58,8 @@ module "cloud_security_identity" { should_enable_purge_protection = var.should_enable_key_vault_purge_protection should_create_aks_identity = false should_create_ml_workload_identity = false + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_observability" { @@ -102,7 +104,9 @@ module "cloud_messaging" { resource_prefix = var.resource_prefix instance = var.instance - should_create_azure_functions = var.should_create_azure_functions + should_create_azure_functions = var.should_create_azure_functions + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "azure_local_host" { diff --git a/blueprints/azure-local/terraform/variables.tf b/blueprints/azure-local/terraform/variables.tf index eddc94e2..c4acde80 100644 --- a/blueprints/azure-local/terraform/variables.tf +++ b/blueprints/azure-local/terraform/variables.tf @@ -101,7 +101,7 @@ variable "azure_local_control_plane_vm_size" { variable "azure_local_node_pool_vm_size" { type = string description = "VM size for worker nodes in Azure Local cluster" - default = "Standard_D8s_v3" + default = "Standard_D8s_v6" } variable "azure_local_pod_cidr" { diff --git a/blueprints/azureml/terraform/README.md b/blueprints/azureml/terraform/README.md index 8dc3d3a5..d4e0400f 100644 --- a/blueprints/azureml/terraform/README.md +++ b/blueprints/azureml/terraform/README.md @@ -70,7 +70,7 @@ This blueprint provides Azure Machine Learning capabilities with optional founda | nat\_gateway\_zones | Availability zones for NAT gateway resources when zone-redundancy is required (example: ['1','2']) | `list(string)` | `[]` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string, "Deallocate") gpu_driver = optional(string, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | postgresql\_admin\_password | Administrator password for PostgreSQL server. (Otherwise, generated when postgresql\_should\_generate\_admin\_password is true). | `string` | `null` | no | | postgresql\_admin\_username | Administrator username for PostgreSQL server | `string` | `"pgadmin"` | no | | postgresql\_databases | Map of databases to create with collation and charset | ```map(object({ collation = string charset = string }))``` | `null` | no | @@ -137,7 +137,7 @@ This blueprint provides Azure Machine Learning capabilities with optional founda | vm\_host\_count | Number of VM hosts to create for multi-node scenarios | `number` | `1` | no | | vm\_max\_bid\_price | Maximum hourly price in USD for Spot VM. Set to -1 (recommended) to pay current spot price without price-based eviction. Custom values support up to 5 decimal places. Only applies when vm\_priority is Spot | `number` | `-1` | no | | vm\_priority | VM priority: Regular (production, guaranteed capacity) or Spot (cost-optimized, up to 90% savings, can be evicted). Recommended: Spot for dev/test GPU workloads | `string` | `"Regular"` | no | -| vm\_sku\_size | VM SKU size for the host. Examples: Standard\_D8s\_v3 (general purpose), Standard\_NV36ads\_A10\_v5 (GPU workload) | `string` | `"Standard_D8s_v3"` | no | +| vm\_sku\_size | VM SKU size for the host. Examples: Standard\_D8s\_v6 (general purpose), Standard\_NV36ads\_A10\_v5 (GPU workload) | `string` | `"Standard_D8s_v6"` | no | | vm\_user\_principals | Map of Azure AD principals for Virtual Machine User Login role (standard access). Keys are descriptive identifiers (e.g., `user@company.com`), values are principal object IDs. | `map(string)` | `{}` | no | | vpn\_gateway\_azure\_ad\_config | Azure AD configuration for VPN Gateway authentication. tenant\_id is required when vpn\_gateway\_should\_use\_azure\_ad\_auth is true. audience defaults to Microsoft-registered app. issuer will default to `https://sts.windows.net/{tenant_id}/` when not provided | ```object({ tenant_id = optional(string) audience = optional(string, "c632b3df-fb67-4d84-bdcf-b95ad541b5c8") issuer = optional(string) })``` | `{}` | no | | vpn\_gateway\_config | VPN Gateway configuration including SKU, generation, client address pool, and supported protocols | ```object({ sku = optional(string, "VpnGw1") generation = optional(string, "Generation1") client_address_pool = optional(list(string), ["192.168.200.0/24"]) protocols = optional(list(string), ["OpenVPN", "IkeV2"]) })``` | `{}` | no | diff --git a/blueprints/azureml/terraform/variables.tf b/blueprints/azureml/terraform/variables.tf index 307420c9..3bec4301 100644 --- a/blueprints/azureml/terraform/variables.tf +++ b/blueprints/azureml/terraform/variables.tf @@ -118,8 +118,8 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "subnet_address_prefixes_aks" { @@ -756,8 +756,8 @@ variable "vm_host_count" { variable "vm_sku_size" { type = string - description = "VM SKU size for the host. Examples: Standard_D8s_v3 (general purpose), Standard_NV36ads_A10_v5 (GPU workload)" - default = "Standard_D8s_v3" + description = "VM SKU size for the host. Examples: Standard_D8s_v6 (general purpose), Standard_NV36ads_A10_v5 (GPU workload)" + default = "Standard_D8s_v6" } variable "vm_priority" { diff --git a/blueprints/dual-peered-single-node-cluster/terraform/README.md b/blueprints/dual-peered-single-node-cluster/terraform/README.md index 246dd560..346e76b2 100644 --- a/blueprints/dual-peered-single-node-cluster/terraform/README.md +++ b/blueprints/dual-peered-single-node-cluster/terraform/README.md @@ -84,18 +84,19 @@ Each cluster operates independently but can communicate through the peered virtu | cluster\_a\_min\_count | The minimum number of nodes which should exist in the default node pool for Cluster A. Valid values are between 0 and 1000. | `number` | `null` | no | | cluster\_a\_node\_count | Number of nodes for the agent pool in the AKS cluster for Cluster A. | `number` | `1` | no | | cluster\_a\_node\_pools | Additional node pools for the AKS cluster for Cluster A. Map key is used as the node pool name. | ```map(object({ node_count = number vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) }))``` | `{}` | no | -| cluster\_a\_node\_vm\_size | VM size for the agent pool in the AKS cluster for Cluster A. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| cluster\_a\_node\_vm\_size | VM size for the agent pool in the AKS cluster for Cluster A. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | cluster\_a\_subnet\_address\_prefixes\_acr | Address prefixes for the ACR subnet. | `list(string)` | ```[ "10.1.2.0/24" ]``` | no | | cluster\_a\_subnet\_address\_prefixes\_aks | Address prefixes for the AKS subnet. | `list(string)` | ```[ "10.1.3.0/24" ]``` | no | | cluster\_a\_subnet\_address\_prefixes\_aks\_pod | Address prefixes for the AKS pod subnet. | `list(string)` | ```[ "10.1.4.0/24" ]``` | no | | cluster\_a\_virtual\_network\_config | Configuration for Cluster A virtual network including address space and subnet prefix. | ```object({ address_space = string subnet_address_prefix = string })``` | ```{ "address_space": "10.1.0.0/16", "subnet_address_prefix": "10.1.1.0/24" }``` | no | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | | cluster\_b\_dns\_prefix | DNS prefix for the AKS cluster for Cluster B. This is used to create a unique DNS name for the cluster. If not provided, a default value will be generated. | `string` | `null` | no | | cluster\_b\_enable\_auto\_scaling | Should enable auto-scaler for the default node pool for Cluster B. | `bool` | `false` | no | | cluster\_b\_max\_count | The maximum number of nodes which should exist in the default node pool for Cluster B. Valid values are between 0 and 1000. | `number` | `null` | no | | cluster\_b\_min\_count | The minimum number of nodes which should exist in the default node pool for Cluster B. Valid values are between 0 and 1000. | `number` | `null` | no | | cluster\_b\_node\_count | Number of nodes for the agent pool in the AKS cluster for Cluster B. | `number` | `1` | no | | cluster\_b\_node\_pools | Additional node pools for the AKS cluster for Cluster B. Map key is used as the node pool name. | ```map(object({ node_count = number vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) }))``` | `{}` | no | -| cluster\_b\_node\_vm\_size | VM size for the agent pool in the AKS cluster for Cluster B. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| cluster\_b\_node\_vm\_size | VM size for the agent pool in the AKS cluster for Cluster B. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | cluster\_b\_subnet\_address\_prefixes\_acr | Address prefixes for the ACR subnet. | `list(string)` | ```[ "10.2.2.0/24" ]``` | no | | cluster\_b\_subnet\_address\_prefixes\_aks | Address prefixes for the AKS subnet. | `list(string)` | ```[ "10.2.3.0/24" ]``` | no | | cluster\_b\_subnet\_address\_prefixes\_aks\_pod | Address prefixes for the AKS pod subnet. | `list(string)` | ```[ "10.2.4.0/24" ]``` | no | diff --git a/blueprints/dual-peered-single-node-cluster/terraform/main.tf b/blueprints/dual-peered-single-node-cluster/terraform/main.tf index 2e49efb3..ac0da6c3 100644 --- a/blueprints/dual-peered-single-node-cluster/terraform/main.tf +++ b/blueprints/dual-peered-single-node-cluster/terraform/main.tf @@ -187,6 +187,7 @@ module "cluster_a_edge_cncf_cluster" { should_deploy_arc_machines = false should_get_custom_locations_oid = var.should_get_custom_locations_oid custom_locations_oid = var.custom_locations_oid + cluster_admin_group_oid = var.cluster_admin_group_oid // Key Vault for script retrieval key_vault = module.cluster_a_cloud_security_identity.key_vault @@ -440,6 +441,7 @@ module "cluster_b_edge_cncf_cluster" { should_deploy_arc_machines = false should_get_custom_locations_oid = var.should_get_custom_locations_oid custom_locations_oid = var.custom_locations_oid + cluster_admin_group_oid = var.cluster_admin_group_oid // Key Vault for script retrieval key_vault = module.cluster_b_cloud_security_identity.key_vault diff --git a/blueprints/dual-peered-single-node-cluster/terraform/variables.tf b/blueprints/dual-peered-single-node-cluster/terraform/variables.tf index 1064569c..b4d7e887 100644 --- a/blueprints/dual-peered-single-node-cluster/terraform/variables.tf +++ b/blueprints/dual-peered-single-node-cluster/terraform/variables.tf @@ -131,6 +131,12 @@ variable "aio_namespace" { default = "azure-iot-operations" } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_get_custom_locations_oid" { type = bool description = <<-EOF @@ -177,8 +183,8 @@ variable "cluster_a_node_count" { variable "cluster_a_node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster for Cluster A. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster for Cluster A. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "cluster_a_enable_auto_scaling" { @@ -232,8 +238,8 @@ variable "cluster_b_node_count" { variable "cluster_b_node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster for Cluster B. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster for Cluster B. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "cluster_b_enable_auto_scaling" { diff --git a/blueprints/full-multi-node-cluster/terraform/README.md b/blueprints/full-multi-node-cluster/terraform/README.md index 955f341c..2ad0ca09 100644 --- a/blueprints/full-multi-node-cluster/terraform/README.md +++ b/blueprints/full-multi-node-cluster/terraform/README.md @@ -91,6 +91,7 @@ with the single-node blueprint while preserving multi-node specific capabilities | azureml\_should\_enable\_public\_network\_access | Whether to enable public network access to the Azure Machine Learning workspace | `bool` | `true` | no | | certificate\_subject | Certificate subject information for auto-generated certificates | ```object({ common_name = optional(string, "Full Multi Node VPN Gateway Root Certificate") organization = optional(string, "Edge AI Accelerator") organizational_unit = optional(string, "IT") country = optional(string, "US") province = optional(string, "WA") locality = optional(string, "Redmond") })``` | `{}` | no | | certificate\_validity\_days | Validity period in days for auto-generated certificates | `number` | `365` | no | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | | cluster\_server\_host\_machine\_username | Username for the Arc or VM host machines that receive kube-config during setup Otherwise, resource\_prefix when the user exists on the machine | `string` | `null` | no | | cluster\_server\_ip | IP address for the cluster server used by node machines when should\_use\_arc\_machines is true | `string` | `null` | no | | custom\_akri\_connectors | List of custom Akri connector templates with user-defined endpoint types and container images. Supports built-in types (rest, media, onvif, sse) or custom types with custom\_endpoint\_type and custom\_image\_name. Built-in connectors default to mcr.microsoft.com/azureiotoperations/akri-connectors/connector\_type:0.5.1. | ```list(object({ name = string type = string // "rest", "media", "onvif", "sse", "custom" // Custom Connector Fields (required when type = "custom") custom_endpoint_type = optional(string) // e.g., "Contoso.Modbus", "Acme.CustomProtocol" custom_image_name = optional(string) // e.g., "my_acr.azurecr.io/custom-connector" custom_endpoint_version = optional(string, "1.0") // Runtime Configuration (defaults applied based on connector type) registry = optional(string) // Defaults: mcr.microsoft.com for built-in types image_tag = optional(string) // Defaults: 0.5.1 for built-in types, latest for custom replicas = optional(number, 1) image_pull_policy = optional(string) // Default: IfNotPresent // Diagnostics log_level = optional(string) // Default: info (lowercase: trace, debug, info, warning, error, critical) // MQTT Override (uses shared config if not provided) mqtt_config = optional(object({ host = string audience = string ca_configmap = string keep_alive_seconds = optional(number, 60) max_inflight_messages = optional(number, 100) session_expiry_seconds = optional(number, 600) })) // Optional Advanced Fields aio_min_version = optional(string) aio_max_version = optional(string) allocation = optional(object({ policy = string // "Bucketized" bucket_size = number // 1-100 })) additional_configuration = optional(map(string)) secrets = optional(list(object({ secret_alias = string secret_key = string secret_ref = string }))) trust_settings = optional(object({ trust_list_secret_ref = string })) }))``` | `[]` | no | @@ -112,7 +113,7 @@ with the single-node blueprint while preserving multi-node specific capabilities | nat\_gateway\_zones | Availability zones for NAT gateway resources when zone redundancy is required (example: ['1','2']) | `list(string)` | `[]` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster; map key is used as the node pool name | ```map(object({ node_count = number vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster | `string` | `"Standard_D8ds_v6"` | no | | onboard\_identity\_type | Identity type to use for onboarding the cluster to Azure Arc. Allowed values: - id: User-assigned managed identity (default for VM-based deployments) - sp: Service principal - skip: Skip identity creation (use when Arc machines already have system-assigned identity) | `string` | `"id"` | no | | postgresql\_admin\_password | Administrator password for PostgreSQL server. (Otherwise, generated when postgresql\_should\_generate\_admin\_password is true). | `string` | `null` | no | | postgresql\_admin\_username | Administrator username for PostgreSQL server | `string` | `"pgadmin"` | no | diff --git a/blueprints/full-multi-node-cluster/terraform/main.tf b/blueprints/full-multi-node-cluster/terraform/main.tf index ee8cd61d..f27f3fae 100644 --- a/blueprints/full-multi-node-cluster/terraform/main.tf +++ b/blueprints/full-multi-node-cluster/terraform/main.tf @@ -103,6 +103,8 @@ module "cloud_security_identity" { should_create_aks_identity = var.should_create_aks_identity should_create_ml_workload_identity = var.azureml_should_create_ml_workload_identity should_create_secret_sync_identity = var.should_deploy_aio + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_vpn_gateway" { @@ -243,7 +245,9 @@ module "cloud_messaging" { resource_prefix = var.resource_prefix instance = var.instance - should_create_azure_functions = var.should_create_azure_functions + should_create_azure_functions = var.should_create_azure_functions + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_vm_host" { @@ -287,6 +291,8 @@ module "cloud_acr" { public_network_access_enabled = var.acr_public_network_access_enabled should_enable_data_endpoints = var.acr_data_endpoint_enabled should_enable_export_policy = var.acr_export_policy_enabled + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_kubernetes" { @@ -361,6 +367,7 @@ module "cloud_azureml" { should_enable_nat_gateway = var.should_enable_managed_outbound_access should_enable_public_network_access = var.azureml_should_enable_public_network_access should_create_compute_cluster = var.azureml_should_create_compute_cluster + compute_cluster_node_public_ip_enabled = !var.azureml_should_enable_private_endpoint ml_workload_identity = try(module.cloud_security_identity.ml_workload_identity, null) ml_workload_subjects = var.azureml_ml_workload_subjects @@ -430,6 +437,7 @@ module "edge_cncf_cluster" { should_generate_cluster_server_token = true should_get_custom_locations_oid = var.should_get_custom_locations_oid should_add_current_user_cluster_admin = var.should_add_current_user_cluster_admin + cluster_admin_group_oid = var.cluster_admin_group_oid custom_locations_oid = var.custom_locations_oid cluster_server_host_machine_username = var.cluster_server_host_machine_username diff --git a/blueprints/full-multi-node-cluster/terraform/variables.tf b/blueprints/full-multi-node-cluster/terraform/variables.tf index d06591cb..edaece70 100644 --- a/blueprints/full-multi-node-cluster/terraform/variables.tf +++ b/blueprints/full-multi-node-cluster/terraform/variables.tf @@ -95,6 +95,12 @@ variable "should_add_current_user_cluster_admin" { default = true } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_get_custom_locations_oid" { type = bool description = <<-EOT @@ -402,7 +408,7 @@ variable "node_count" { variable "node_vm_size" { type = string description = "VM size for the agent pool in the AKS cluster" - default = "Standard_D8ds_v5" + default = "Standard_D8ds_v6" } variable "enable_auto_scaling" { diff --git a/blueprints/full-single-node-cluster/bicep/main.bicep b/blueprints/full-single-node-cluster/bicep/main.bicep index b2326ee6..ca60059e 100644 --- a/blueprints/full-single-node-cluster/bicep/main.bicep +++ b/blueprints/full-single-node-cluster/bicep/main.bicep @@ -694,7 +694,13 @@ output messaging object = { ? cloudMessaging.outputs.eventHubNamespaceName : 'Not deployed' } - +@description('Alert notification pipeline resources. Bicep deployment does not currently wire the 045-notification component; output is stubbed for parity with Terraform.') +output notification object = { + logicApp: 'Not deployed' + closeLogicApp: 'Not deployed' + closeSessionEndpoint: 'Not deployed' + storageAccount: 'Not deployed' +} @description('Map of dataflow graph resources by name.') output dataflowGraphs string[] = edgeMessaging.outputs.dataflowGraphNames diff --git a/blueprints/full-single-node-cluster/terraform/README.md b/blueprints/full-single-node-cluster/terraform/README.md index e9affc06..550e223c 100644 --- a/blueprints/full-single-node-cluster/terraform/README.md +++ b/blueprints/full-single-node-cluster/terraform/README.md @@ -25,6 +25,7 @@ for a single-node cluster deployment, including observability, messaging, and da | cloud\_managed\_redis | ../../../src/000-cloud/036-managed-redis/terraform | n/a | | cloud\_messaging | ../../../src/000-cloud/040-messaging/terraform | n/a | | cloud\_networking | ../../../src/000-cloud/050-networking/terraform | n/a | +| cloud\_notification | ../../../src/000-cloud/045-notification/terraform | n/a | | cloud\_observability | ../../../src/000-cloud/020-observability/terraform | n/a | | cloud\_postgresql | ../../../src/000-cloud/035-postgresql/terraform | n/a | | cloud\_resource\_group | ../../../src/000-cloud/000-resource-group/terraform | n/a | @@ -74,6 +75,8 @@ for a single-node cluster deployment, including observability, messaging, and da | azureml\_should\_enable\_public\_network\_access | Whether to enable public network access to the Azure Machine Learning workspace | `bool` | `true` | no | | certificate\_subject | Certificate subject information for auto-generated certificates | ```object({ common_name = optional(string, "Full Single Node VPN Gateway Root Certificate") organization = optional(string, "Edge AI Accelerator") organizational_unit = optional(string, "IT") country = optional(string, "US") province = optional(string, "WA") locality = optional(string, "Redmond") })``` | `{}` | no | | certificate\_validity\_days | Validity period in days for auto-generated certificates | `number` | `365` | no | +| closure\_message\_template | HTML message body for session-closure Teams notifications. Supports Logic App expression syntax for dynamic fields | `string` | `"

Session closed for event.

"` | no | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | | custom\_akri\_connectors | List of custom Akri connector templates with user-defined endpoint types and container images. Supports built-in types (rest, media, onvif, sse) or custom types with custom\_endpoint\_type and custom\_image\_name. Built-in connectors default to mcr.microsoft.com/azureiotoperations/akri-connectors/connector\_type:0.5.1. | ```list(object({ name = string type = string // "rest", "media", "onvif", "sse", "custom" // Custom Connector Fields (required when type = "custom") custom_endpoint_type = optional(string) // e.g., "Contoso.Modbus", "Acme.CustomProtocol" custom_image_name = optional(string) // e.g., "my_acr.azurecr.io/custom-connector" custom_endpoint_version = optional(string, "1.0") // Runtime Configuration (defaults applied based on connector type) registry = optional(string) // Defaults: mcr.microsoft.com for built-in types image_tag = optional(string) // Defaults: 0.5.1 for built-in types, latest for custom replicas = optional(number, 1) image_pull_policy = optional(string) // Default: IfNotPresent // Diagnostics log_level = optional(string) // Default: info (lowercase: trace, debug, info, warning, error, critical) // MQTT Override (uses shared config if not provided) mqtt_config = optional(object({ host = string audience = string ca_configmap = string keep_alive_seconds = optional(number, 60) max_inflight_messages = optional(number, 100) session_expiry_seconds = optional(number, 600) })) // Optional Advanced Fields aio_min_version = optional(string) aio_max_version = optional(string) allocation = optional(object({ policy = string // "Bucketized" bucket_size = number // 1-100 })) additional_configuration = optional(map(string)) secrets = optional(list(object({ secret_alias = string secret_key = string secret_ref = string }))) trust_settings = optional(object({ trust_list_secret_ref = string })) }))``` | `[]` | no | | custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant If none is provided, the script attempts to retrieve this value which requires 'Application.Read.All' or 'Directory.Read.All' permissions ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | | dataflow\_endpoints | List of dataflow endpoints to create with their type-specific configurations | ```list(object({ name = string endpointType = string hostType = optional(string) dataExplorerSettings = optional(object({ authentication = object({ method = string systemAssignedManagedIdentitySettings = optional(object({ audience = optional(string) })) userAssignedManagedIdentitySettings = optional(object({ clientId = string scope = optional(string) tenantId = string })) }) batching = optional(object({ latencySeconds = optional(number) maxMessages = optional(number) })) database = string host = string })) dataLakeStorageSettings = optional(object({ authentication = object({ accessTokenSettings = optional(object({ secretRef = string })) method = string systemAssignedManagedIdentitySettings = optional(object({ audience = optional(string) })) userAssignedManagedIdentitySettings = optional(object({ clientId = string scope = optional(string) tenantId = string })) }) batching = optional(object({ latencySeconds = optional(number) maxMessages = optional(number) })) host = string })) fabricOneLakeSettings = optional(object({ authentication = object({ method = string systemAssignedManagedIdentitySettings = optional(object({ audience = optional(string) })) userAssignedManagedIdentitySettings = optional(object({ clientId = string scope = optional(string) tenantId = string })) }) batching = optional(object({ latencySeconds = optional(number) maxMessages = optional(number) })) host = string names = object({ lakehouseName = string workspaceName = string }) oneLakePathType = string })) kafkaSettings = optional(object({ authentication = object({ method = string saslSettings = optional(object({ saslType = string secretRef = string })) systemAssignedManagedIdentitySettings = optional(object({ audience = optional(string) })) userAssignedManagedIdentitySettings = optional(object({ clientId = string scope = optional(string) tenantId = string })) x509CertificateSettings = optional(object({ secretRef = string })) }) batching = optional(object({ latencyMs = optional(number) maxBytes = optional(number) maxMessages = optional(number) mode = optional(string) })) cloudEventAttributes = optional(string) compression = optional(string) consumerGroupId = optional(string) copyMqttProperties = optional(string) host = string kafkaAcks = optional(string) partitionStrategy = optional(string) tls = optional(object({ mode = optional(string) trustedCaCertificateConfigMapRef = optional(string) })) })) localStorageSettings = optional(object({ persistentVolumeClaimRef = string })) mqttSettings = optional(object({ authentication = object({ method = string serviceAccountTokenSettings = optional(object({ audience = string })) systemAssignedManagedIdentitySettings = optional(object({ audience = optional(string) })) userAssignedManagedIdentitySettings = optional(object({ clientId = string scope = optional(string) tenantId = string })) x509CertificateSettings = optional(object({ secretRef = string })) }) clientIdPrefix = optional(string) cloudEventAttributes = optional(string) host = optional(string) keepAliveSeconds = optional(number) maxInflightMessages = optional(number) protocol = optional(string) qos = optional(number) retain = optional(string) sessionExpirySeconds = optional(number) tls = optional(object({ mode = optional(string) trustedCaCertificateConfigMapRef = optional(string) })) })) openTelemetrySettings = optional(object({ authentication = object({ method = string anonymousSettings = optional(any) serviceAccountTokenSettings = optional(object({ audience = string })) x509CertificateSettings = optional(object({ secretRef = string })) }) batching = optional(object({ latencySeconds = optional(number) maxMessages = optional(number) })) host = string tls = optional(object({ mode = optional(string) trustedCaCertificateConfigMapRef = optional(string) })) })) }))``` | `[]` | no | @@ -90,7 +93,10 @@ for a single-node cluster deployment, including observability, messaging, and da | nat\_gateway\_zones | Availability zones for NAT gateway resources when zone redundancy is required (example: ['1','2']) | `list(string)` | `[]` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster; map key is used as the node pool name | ```map(object({ node_count = number vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster | `string` | `"Standard_D8ds_v6"` | no | +| notification\_event\_schema | JSON schema object for parsing Event Hub events in the Logic App Parse\_Event action | `any` | `{}` | no | +| notification\_message\_template | HTML template for new-event Teams notifications. Supports Terraform template variable: close\_session\_url. Supports Logic App expression syntax for dynamic event fields | `string` | `"

New alert event detected.

"` | no | +| notification\_partition\_key\_field | Caller's event schema field name to use as the Table Storage partition key for session-state deduplication lookups (e.g. "event\_id", "asset\_id"). Must be set by the scenario tfvars. | `string` | `"event_id"` | no | | postgresql\_admin\_password | Administrator password for PostgreSQL server. (Otherwise, generated when postgresql\_should\_generate\_admin\_password is true). | `string` | `null` | no | | postgresql\_admin\_username | Administrator username for PostgreSQL server | `string` | `"pgadmin"` | no | | postgresql\_databases | Map of databases to create with collation and charset | ```map(object({ collation = string charset = string }))``` | `null` | no | @@ -119,6 +125,7 @@ for a single-node cluster deployment, including observability, messaging, and da | should\_deploy\_aio | Whether to deploy Azure IoT Operations and its dependent edge components (assets, edge messaging). When false, deploys Arc-connected cluster with extensions and observability only | `bool` | `true` | no | | should\_deploy\_azureml | Whether to deploy the Azure Machine Learning workspace and optional compute cluster | `bool` | `false` | no | | should\_deploy\_edge\_azureml | Whether to deploy the Azure Machine Learning edge extension when Azure ML is enabled | `bool` | `false` | no | +| should\_deploy\_notification | Whether to deploy the 045-notification Logic App for alert deduplication and Teams posting | `bool` | `false` | no | | should\_deploy\_postgresql | Whether to deploy PostgreSQL Flexible Server component | `bool` | `false` | no | | should\_deploy\_redis | Whether to deploy Azure Managed Redis component | `bool` | `false` | no | | should\_deploy\_resource\_sync\_rules | Whether to deploy resource sync rules | `bool` | `true` | no | @@ -140,6 +147,9 @@ for a single-node cluster deployment, including observability, messaging, and da | should\_include\_acr\_registry\_endpoint | Whether to include the deployed ACR as a registry endpoint with System Assigned Managed Identity authentication | `bool` | `false` | no | | storage\_account\_is\_hns\_enabled | Whether to enable hierarchical namespace on the storage account when Azure Machine Learning is not deployed; automatically forced to false when should\_deploy\_azureml is true | `bool` | `true` | no | | tags | Tags to apply to all resources in this blueprint | `map(string)` | `{}` | no | +| teams\_group\_id | Microsoft 365 Group ID (Team ID) for posting to a Teams channel. Required when teams\_post\_location is 'Channel' | `string` | `null` | no | +| teams\_post\_location | Teams posting location type for the notification message: 'Channel' for a Teams channel or 'Group chat' for a group chat | `string` | `"Channel"` | no | +| teams\_recipient\_id | Teams chat or channel thread ID for posting event notifications | `string` | `null` | no | | use\_existing\_resource\_group | Whether to use an existing resource group with the provided or computed name instead of creating a new one | `bool` | `false` | no | | vpn\_gateway\_config | VPN gateway configuration including SKU, generation, client address pool, and supported protocols | ```object({ sku = optional(string, "VpnGw1") generation = optional(string, "Generation1") client_address_pool = optional(list(string), ["192.168.200.0/24"]) protocols = optional(list(string), ["OpenVPN", "IkeV2"]) })``` | `{}` | no | | vpn\_gateway\_should\_generate\_ca | Whether to generate a new CA certificate; when false, uses an existing certificate from Key Vault | `bool` | `true` | no | @@ -151,44 +161,46 @@ for a single-node cluster deployment, including observability, messaging, and da ## Outputs -| Name | Description | -|----------------------------------|------------------------------------------------------------------------------| -| acr\_network\_posture | Azure Container Registry network posture metadata. | -| ai\_foundry | Azure AI Foundry account resources. | -| ai\_foundry\_deployments | Azure AI Foundry model deployments. | -| ai\_foundry\_projects | Azure AI Foundry project resources. | -| arc\_connected\_cluster | Azure Arc connected cluster resources. | -| assets | IoT asset resources. | -| azure\_iot\_operations | Azure IoT Operations deployment details. | -| azureml\_compute\_cluster | Azure Machine Learning compute cluster resources. | -| azureml\_extension | Azure Machine Learning extension for AKS cluster integration. | -| azureml\_inference\_cluster | Azure Machine Learning inference cluster compute target for AKS integration. | -| azureml\_workspace | Azure Machine Learning workspace resources. | -| cluster\_connection | Commands and information to connect to the deployed cluster. | -| container\_registry | Azure Container Registry resources. | -| data\_storage | Data storage resources. | -| dataflow\_endpoints | Map of dataflow endpoint resources by name. | -| dataflow\_graphs | Map of dataflow graph resources by name. | -| dataflows | Map of dataflow resources by name. | -| deployment\_summary | Summary of the deployment configuration. | -| event\_grid\_topic\_endpoint | Event Grid topic endpoint. | -| event\_grid\_topic\_name | Event Grid topic name. | -| eventhub\_name | Event Hub name. | -| eventhub\_namespace\_name | Event Hub namespace name. | -| function\_app | Azure Function App for alert notifications. | -| kubernetes | Azure Kubernetes Service resources. | -| managed\_redis | Azure Managed Redis cache object. | -| managed\_redis\_connection\_info | Azure Managed Redis connection information. | -| nat\_gateway | NAT gateway resource when managed outbound access is enabled. | -| nat\_gateway\_public\_ips | Public IP resources associated with the NAT gateway keyed by name. | -| observability | Monitoring and observability resources. | -| postgresql\_connection\_info | PostgreSQL connection information. | -| postgresql\_databases | Map of PostgreSQL databases. | -| postgresql\_server | PostgreSQL Flexible Server object. | -| private\_resolver\_dns\_ip | Private Resolver DNS IP address for VPN client configuration. | -| security\_identity | Security and identity resources. | -| vm\_host | Virtual machine host resources. | -| vpn\_client\_connection\_info | VPN client connection information including download URLs. | -| vpn\_gateway | VPN Gateway configuration when enabled. | -| vpn\_gateway\_public\_ip | VPN Gateway public IP address for client configuration. | +| Name | Description | +|----------------------------------|-------------------------------------------------------------------------------------------------------------------| +| acr\_network\_posture | Azure Container Registry network posture metadata. | +| ai\_foundry | Azure AI Foundry account resources. | +| ai\_foundry\_deployments | Azure AI Foundry model deployments. | +| ai\_foundry\_projects | Azure AI Foundry project resources. | +| arc\_connected\_cluster | Azure Arc connected cluster resources. | +| assets | IoT asset resources. | +| azure\_iot\_operations | Azure IoT Operations deployment details. | +| azureml\_compute\_cluster | Azure Machine Learning compute cluster resources. | +| azureml\_extension | Azure Machine Learning extension for AKS cluster integration. | +| azureml\_inference\_cluster | Azure Machine Learning inference cluster compute target for AKS integration. | +| azureml\_workspace | Azure Machine Learning workspace resources. | +| cluster\_connection | Commands and information to connect to the deployed cluster. | +| container\_registry | Azure Container Registry resources. | +| data\_storage | Data storage resources. | +| dataflow\_endpoints | Map of dataflow endpoint resources by name. | +| dataflow\_graphs | Map of dataflow graph resources by name. | +| dataflows | Map of dataflow resources by name. | +| deployment\_summary | Summary of the deployment configuration. | +| event\_grid\_topic\_endpoint | Event Grid topic endpoint. | +| event\_grid\_topic\_name | Event Grid topic name. | +| eventhub\_name | Event Hub name. | +| eventhub\_namespace\_name | Event Hub namespace name. | +| function\_app | Azure Function App for alert notifications. | +| kubernetes | Azure Kubernetes Service resources. | +| managed\_redis | Azure Managed Redis cache object. | +| managed\_redis\_connection\_info | Azure Managed Redis connection information. | +| messaging | Cloud messaging resources (aggregate, mirrors bicep/main.bicep `messaging` output for cross-IaC contract parity). | +| nat\_gateway | NAT gateway resource when managed outbound access is enabled. | +| nat\_gateway\_public\_ips | Public IP resources associated with the NAT gateway keyed by name. | +| notification | Alert notification pipeline resources. | +| observability | Monitoring and observability resources. | +| postgresql\_connection\_info | PostgreSQL connection information. | +| postgresql\_databases | Map of PostgreSQL databases. | +| postgresql\_server | PostgreSQL Flexible Server object. | +| private\_resolver\_dns\_ip | Private Resolver DNS IP address for VPN client configuration. | +| security\_identity | Security and identity resources. | +| vm\_host | Virtual machine host resources. | +| vpn\_client\_connection\_info | VPN client connection information including download URLs. | +| vpn\_gateway | VPN Gateway configuration when enabled. | +| vpn\_gateway\_public\_ip | VPN Gateway public IP address for client configuration. | diff --git a/blueprints/full-single-node-cluster/terraform/leak-detection.tfvars.example b/blueprints/full-single-node-cluster/terraform/leak-detection.tfvars.example new file mode 100644 index 00000000..5986c874 --- /dev/null +++ b/blueprints/full-single-node-cluster/terraform/leak-detection.tfvars.example @@ -0,0 +1,254 @@ +/* + * Leak Detection Scenario on the full-single-node-cluster blueprint + * + * This is NOT a separate blueprint. It is an example tfvars overlay for the + * `full-single-node-cluster` Terraform that configures the alert dataflow + * routing, Azure Functions for alert processing, and the 045-notification + * Logic App pipeline for Teams-based leak detection alerts with session + * deduplication. + */ + +// Core Parameters +environment = "dev" +location = "eastus2" +resource_prefix = "aio" +instance = "001" + +// Use existing resource group when layering on a previous deployment +use_existing_resource_group = false + +// Enable the Akri Media Connector template on the IoT Operations instance +should_enable_akri_media_connector = true + +// Camera and media source devices +namespaced_devices = [ + { + name = "warehouse-camera-01" + enabled = true + endpoints = { + inbound = { + "warehouse-camera-endpoint" = { + endpoint_type = "Microsoft.Media" + address = "rtsp://192.168.1.100:554/stream1" + authentication = { + method = "UsernamePassword" + usernamePasswordCredentials = { + usernameSecretName = "camera-credentials-username" + passwordSecretName = "camera-credentials-password" + } + } + } + } + } + }, + { + name = "loading-dock-camera-01" + enabled = true + endpoints = { + inbound = { + "loading-dock-endpoint" = { + endpoint_type = "Microsoft.Media" + address = "rtsp://192.168.1.101:554/stream1" + authentication = { + method = "Anonymous" + } + } + } + } + } +] + +// Media capture task assets +namespaced_assets = [ + { + name = "warehouse-camera-01-snapshots" + display_name = "Warehouse Camera 01 Snapshots" + device_ref = { + device_name = "warehouse-camera-01" + endpoint_name = "warehouse-camera-endpoint" + } + description = "Snapshot capture from warehouse camera for AI processing" + attributes = { + assetType = "media-snapshots" + location = "Warehouse Main Entrance" + } + datasets = [ + { + name = "snapshots" + data_source = "" + dataset_configuration = "{\"taskType\":\"snapshot-to-mqtt\",\"intervalSeconds\":5,\"quality\":85}" + data_points = [] + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "warehouse/camera-01/snapshots" + } + } + ] + } + ] + }, + { + name = "warehouse-camera-01-clips" + display_name = "Warehouse Camera 01 Video Clips" + device_ref = { + device_name = "warehouse-camera-01" + endpoint_name = "warehouse-camera-endpoint" + } + description = "Video clip recording from warehouse camera" + attributes = { + assetType = "media-clips" + location = "Warehouse Main Entrance" + } + datasets = [ + { + name = "clips" + data_source = "" + dataset_configuration = "{\"taskType\":\"clip-to-fs\",\"durationSeconds\":30,\"storagePath\":\"/clips\"}" + data_points = [] + destinations = [] + } + ] + }, + { + name = "loading-dock-camera-01-snapshots" + display_name = "Loading Dock Camera Snapshots" + device_ref = { + device_name = "loading-dock-camera-01" + endpoint_name = "loading-dock-endpoint" + } + description = "Periodic snapshot capture from loading dock camera" + attributes = { + assetType = "media-snapshots" + location = "Loading Dock" + } + datasets = [ + { + name = "snapshots" + data_source = "" + dataset_configuration = "{\"taskType\":\"snapshot-to-fs\",\"intervalSeconds\":10,\"quality\":90,\"storagePath\":\"/snapshots\"}" + data_points = [] + destinations = [] + } + ] + } +] + +// Alert Event Hub +alert_eventhub_name = "evh-aio-alerts-dev-001" + +// Alert Notification Function App +should_create_azure_functions = true +function_app_settings = { + "NOTIFICATION_WEBHOOK_URL" = "https://your-teams-or-slack-webhook-url" + "ALERT_SEVERITY_THRESHOLD" = "high" + "ALERT_EVENTHUB_CONSUMER_GROUP" = "fn-notifications" +} + +eventhubs = { + "evh-aio-sample" = {} + "evh-aio-alerts-dev-001" = { + message_retention = 1 + partition_count = 2 + consumer_groups = { + "fn-notifications" = { user_metadata = "Alert notification function consumer group" } + "notification" = { user_metadata = "Logic App notification consumer" } + } + } +} + +// Notification Pipeline (045-notification) +should_deploy_notification = true + +// Teams chat or channel thread ID (replace with your actual ID) +teams_recipient_id = "REPLACE_WITH_TEAMS_CHAT_OR_CHANNEL_ID" + +notification_event_schema = { + "type" = "object" + "properties" = { + "camera_id" = { "type" = "string" } + "timestamp" = { "type" = "string" } + "confidence" = { "type" = "number" } + "leak_type" = { "type" = "string" } + } +} + +notification_message_template = <<-EOT +

Leak Detected

+

Camera: @{body('Parse_Event')?['camera_id']}

+

Type: @{body('Parse_Event')?['leak_type']}

+

Confidence: @{body('Parse_Event')?['confidence']}

+

Time: @{body('Parse_Event')?['timestamp']}

+

Close Session

+EOT + +closure_message_template = <<-EOT +

Leak Session Closed

+

Camera: @{triggerBody()?['camera_id']}

+

Session closed at @{utcNow()}

+EOT + +notification_partition_key_field = "camera_id" + +// Alert Dataflow Endpoint +dataflow_endpoints = [ + { + name = "alert-eventhub-endpoint" + endpointType = "Kafka" + kafkaSettings = { + host = "evhns-aio-aio-dev-001.servicebus.windows.net:9093" + batching = { + latencyMs = 0 + maxMessages = 100 + } + authentication = { + method = "SystemAssignedManagedIdentity" + systemAssignedManagedIdentitySettings = {} + } + tls = { + mode = "Enabled" + } + } + } +] + +// Alert Dataflow +dataflows = [ + { + name = "alert-eventhub-dataflow" + operations = [ + { + operationType = "Source" + name = "source" + sourceSettings = { + endpointRef = "default" + serializationFormat = "Json" + dataSources = [ + "edge-ai/+/+/+/inference/+/+/high", + "edge-ai/+/+/+/alerts/triggers" + ] + } + }, + { + operationType = "BuiltInTransformation" + name = "passthrough" + builtInTransformationSettings = { + serializationFormat = "Json" + map = [{ + inputs = ["*"] + output = "*" + }] + } + }, + { + operationType = "Destination" + name = "destination" + destinationSettings = { + endpointRef = "alert-eventhub-endpoint" + dataDestination = "evh-aio-alerts-dev-001" + } + } + ] + } +] diff --git a/blueprints/full-single-node-cluster/terraform/main.tf b/blueprints/full-single-node-cluster/terraform/main.tf index 8222ca3a..54e44c2e 100644 --- a/blueprints/full-single-node-cluster/terraform/main.tf +++ b/blueprints/full-single-node-cluster/terraform/main.tf @@ -19,6 +19,7 @@ locals { function_app_computed_settings = var.should_create_azure_functions ? { "EventHubConnection__fullyQualifiedNamespace" = "${local.eventhub_namespace_name}.servicebus.windows.net" "EventHubConnection__credential" = "managedidentity" + "EventHubConnection__clientId" = module.cloud_messaging.function_identity.client_id "ALERT_EVENTHUB_NAME" = local.alert_eventhub_name "ALERT_EVENTHUB_CONSUMER_GROUP" = var.alert_eventhub_consumer_group } : {} @@ -95,6 +96,8 @@ module "cloud_security_identity" { should_create_aks_identity = var.should_create_aks_identity should_create_ml_workload_identity = var.azureml_should_create_ml_workload_identity should_create_secret_sync_identity = var.should_deploy_aio + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_vpn_gateway" { @@ -243,6 +246,35 @@ module "cloud_messaging" { eventhubs = local.eventhubs function_app_settings = merge(var.function_app_settings, local.function_app_computed_settings) + + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true +} + +module "cloud_notification" { + count = var.should_deploy_notification ? 1 : 0 + source = "../../../src/000-cloud/045-notification/terraform" + + depends_on = [module.cloud_messaging] + + environment = var.environment + location = var.location + resource_prefix = var.resource_prefix + instance = var.instance + + resource_group = module.cloud_resource_group.resource_group + + eventhub_namespace = module.cloud_messaging.eventhub_namespace + eventhub_name = local.alert_eventhub_name + storage_account = module.cloud_data.storage_account + + event_schema = var.notification_event_schema + notification_message_template = var.notification_message_template + closure_message_template = var.closure_message_template + partition_key_field = var.notification_partition_key_field + teams_recipient_id = var.teams_recipient_id + teams_group_id = var.teams_group_id + teams_post_location = var.teams_post_location } module "cloud_vm_host" { @@ -283,6 +315,8 @@ module "cloud_acr" { public_network_access_enabled = var.acr_public_network_access_enabled should_enable_data_endpoints = var.acr_data_endpoint_enabled should_enable_export_policy = var.acr_export_policy_enabled + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_kubernetes" { @@ -351,6 +385,7 @@ module "cloud_azureml" { should_enable_nat_gateway = var.should_enable_managed_outbound_access should_enable_public_network_access = var.azureml_should_enable_public_network_access should_create_compute_cluster = var.azureml_should_create_compute_cluster + compute_cluster_node_public_ip_enabled = !var.azureml_should_enable_private_endpoint ml_workload_identity = try(module.cloud_security_identity.ml_workload_identity, null) ml_workload_subjects = var.azureml_ml_workload_subjects @@ -409,6 +444,7 @@ module "edge_cncf_cluster" { should_deploy_arc_machines = false should_get_custom_locations_oid = var.should_get_custom_locations_oid should_add_current_user_cluster_admin = var.should_add_current_user_cluster_admin + cluster_admin_group_oid = var.cluster_admin_group_oid custom_locations_oid = var.custom_locations_oid // Key Vault for script retrieval diff --git a/blueprints/full-single-node-cluster/terraform/onvif-connector-assets.tfvars.example b/blueprints/full-single-node-cluster/terraform/onvif-connector-assets.tfvars.example index 40f859ec..91401585 100644 --- a/blueprints/full-single-node-cluster/terraform/onvif-connector-assets.tfvars.example +++ b/blueprints/full-single-node-cluster/terraform/onvif-connector-assets.tfvars.example @@ -6,8 +6,8 @@ * * The ONVIF connector enables standardized IP camera integration with support for: * - Device discovery and capability introspection (Profile S/T) - * - PTZ (Pan-Tilt-Zoom) control via MQTT commands - * - Event monitoring (motion detection, tampering alerts) + * - PTZ (Pan-Tilt-Zoom) control via management_groups with actions + * - Event monitoring (motion detection, tampering alerts) via event_groups * - Media stream URI retrieval (H.264, JPEG, H.265) * * Usage: @@ -111,11 +111,11 @@ namespaced_devices = [ * ONVIF Connector Assets * * Defines the actual assets that reference the devices above and configure: - * - PTZ commands for camera control (via MQTT topics) - * - Event subscriptions (motion detection, tampering alerts) + * - PTZ control via management_groups with actions (ONVIF PTZ service calls) + * - Event subscriptions via event_groups (motion detection, tampering alerts) * - Media profile configurations * - * ONVIF assets support both commands (PTZ control) and events (camera notifications). + * ONVIF assets support both management_groups (PTZ control) and event_groups (camera notifications). */ namespaced_assets = [ // Warehouse Camera Asset - Full PTZ Control + Events @@ -146,110 +146,128 @@ namespaced_assets = [ max_resolution = "1920x1080" } - // PTZ Commands - Control camera movement via MQTT - commands = [ + // PTZ management groups - Control camera movement via ONVIF PTZ service + management_groups = [ { - name = "pan_right" - display_name = "Pan Camera Right" - description = "Pan camera to the right" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/pan" - payload = jsonencode({ - direction = "right" - speed = 0.5 - }) - }, - { - name = "pan_left" - display_name = "Pan Camera Left" - description = "Pan camera to the left" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/pan" - payload = jsonencode({ - direction = "left" - speed = 0.5 - }) - }, - { - name = "tilt_up" - display_name = "Tilt Camera Up" - description = "Tilt camera upward" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/tilt" - payload = jsonencode({ - direction = "up" - speed = 0.5 - }) - }, - { - name = "tilt_down" - display_name = "Tilt Camera Down" - description = "Tilt camera downward" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/tilt" - payload = jsonencode({ - direction = "down" - speed = 0.5 - }) - }, - { - name = "zoom_in" - display_name = "Zoom In" - description = "Zoom camera in" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/zoom" - payload = jsonencode({ - direction = "in" - speed = 0.3 - }) - }, - { - name = "zoom_out" - display_name = "Zoom Out" - description = "Zoom camera out" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/zoom" - payload = jsonencode({ - direction = "out" - speed = 0.3 - }) - }, - { - name = "goto_home" - display_name = "Return to Home Position" - description = "Move camera to preset home position" - topic = "cameras/company/cloud/region/environment/warehouse-01/ptz/home" - payload = jsonencode({}) + name = "ptz-controls" + data_source = "ptz" + actions = [ + { + name = "pan_right" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "right" + speed = 0.5 + }) + }, + { + name = "pan_left" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "left" + speed = 0.5 + }) + }, + { + name = "tilt_up" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "up" + speed = 0.5 + }) + }, + { + name = "tilt_down" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "down" + speed = 0.5 + }) + }, + { + name = "zoom_in" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "in" + speed = 0.3 + }) + }, + { + name = "zoom_out" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "out" + speed = 0.3 + }) + }, + { + name = "stop" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/Stop" + action_configuration = jsonencode({}) + }, + { + name = "goto_home" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/GotoHomePosition" + action_configuration = jsonencode({}) + } + ] } ] - // ONVIF Events - Camera notifications (motion, tampering) - events = [ + // ONVIF event groups - Camera notifications (motion, tampering) + event_groups = [ { - name = "MOTION_DETECTED" - event_notifier = "motion" - destinations = [ + name = "camera-events" + events = [ { - target = "Mqtt" - configuration = { - topic = "cameras/company/cloud/region/environment/warehouse-01/events/motion" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "TAMPERING_ALERT" - event_notifier = "tampering" - destinations = [ + name = "motion-detected" + data_source = "motion" + event_configuration = jsonencode({ + topic = "tns1:VideoAnalytics/MotionDetection" + capability_id = "http://onvif.org/onvif/ver10/events/wsdl/EventsBinding/PullPointSubscription" + }) + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "cameras/company/cloud/region/environment/warehouse-01/events/motion" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "cameras/company/cloud/region/environment/warehouse-01/events/tampering" - retain = "Never" - qos = "Qos1" - } + name = "tampering-alert" + data_source = "tampering" + event_configuration = jsonencode({ + topic = "tns1:VideoSource/Tampering" + capability_id = "http://onvif.org/onvif/ver10/events/wsdl/EventsBinding/PullPointSubscription" + }) + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "cameras/company/cloud/region/environment/warehouse-01/events/tampering" + retain = "Never" + qos = "Qos1" + } + } + ] } ] } ] - // No datasets for ONVIF connectors - use events and commands + // No datasets for ONVIF connectors - use event_groups and management_groups datasets = [] default_events_configuration = "{\"publishingInterval\":5000,\"samplingInterval\":5000,\"queueSize\":10}" @@ -278,35 +296,48 @@ namespaced_assets = [ max_resolution = "2688x1520" } - // No PTZ commands for fixed cameras - commands = [] + // No PTZ management groups for fixed cameras + management_groups = [] - events = [ + event_groups = [ { - name = "MOTION_DETECTED" - event_notifier = "motion" - destinations = [ + name = "camera-events" + events = [ { - target = "Mqtt" - configuration = { - topic = "cameras/company/cloud/region/environment/perimeter-02/events/motion" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "TAMPERING_ALERT" - event_notifier = "tampering" - destinations = [ + name = "motion-detected" + data_source = "motion" + event_configuration = jsonencode({ + topic = "tns1:VideoAnalytics/MotionDetection" + capability_id = "http://onvif.org/onvif/ver10/events/wsdl/EventsBinding/PullPointSubscription" + }) + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "cameras/company/cloud/region/environment/perimeter-02/events/motion" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "cameras/company/cloud/region/environment/perimeter-02/events/tampering" - retain = "Never" - qos = "Qos1" - } + name = "tampering-alert" + data_source = "tampering" + event_configuration = jsonencode({ + topic = "tns1:VideoSource/Tampering" + capability_id = "http://onvif.org/onvif/ver10/events/wsdl/EventsBinding/PullPointSubscription" + }) + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "cameras/company/cloud/region/environment/perimeter-02/events/tampering" + retain = "Never" + qos = "Qos1" + } + } + ] } ] } @@ -339,20 +370,29 @@ namespaced_assets = [ max_resolution = "3840x2160" } - commands = [] + management_groups = [] - events = [ + event_groups = [ { - name = "MOTION_DETECTED" - event_notifier = "motion" - destinations = [ + name = "camera-events" + events = [ { - target = "Mqtt" - configuration = { - topic = "cameras/company/cloud/region/environment/assembly-03/events/motion" - retain = "Never" - qos = "Qos1" - } + name = "motion-detected" + data_source = "motion" + event_configuration = jsonencode({ + topic = "tns1:VideoAnalytics/MotionDetection" + capability_id = "http://onvif.org/onvif/ver10/events/wsdl/EventsBinding/PullPointSubscription" + }) + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "cameras/company/cloud/region/environment/assembly-03/events/motion" + retain = "Never" + qos = "Qos1" + } + } + ] } ] } @@ -375,12 +415,14 @@ namespaced_assets = [ * - UsernamePassword: HTTP Digest Auth (most common) * - X509Certificate: Client certificate auth (most secure) * - * 3. PTZ Commands: - * - Only include PTZ commands for cameras with PTZ capabilities - * - Speed values range from 0.0 (slowest) to 1.0 (fastest) + * 3. PTZ Management Groups: + * - Only include management_groups with PTZ actions for cameras with PTZ capabilities + * - Each action requires action_type ("Call", "Read", or "Write") and target_uri + * - Speed values in action_configuration range from 0.0 (slowest) to 1.0 (fastest) * - Direction values: pan (left/right), tilt (up/down), zoom (in/out) * - * 4. Event Types: + * 4. Event Groups: + * - Each event requires a data_source matching the ONVIF event source * - motion: Motion detection events from camera analytics * - tampering: Camera tampering/obstruction alerts * - Custom events may vary by camera manufacturer @@ -405,5 +447,5 @@ namespaced_assets = [ * 8. Testing: * - Use local Docker Compose environment first (src/500-application/510-onvif-connector) * - Verify ONVIF compliance with ONVIF Device Test Tool - * - Test PTZ commands with mosquitto_pub before production deployment + * - Test PTZ actions with mosquitto_pub before production deployment */ diff --git a/blueprints/full-single-node-cluster/terraform/outputs.tf b/blueprints/full-single-node-cluster/terraform/outputs.tf index ac68a0be..492c289f 100644 --- a/blueprints/full-single-node-cluster/terraform/outputs.tf +++ b/blueprints/full-single-node-cluster/terraform/outputs.tf @@ -159,6 +159,31 @@ output "function_app" { value = try(module.cloud_messaging.function_app, null) } +output "messaging" { + description = "Cloud messaging resources (aggregate, mirrors bicep/main.bicep `messaging` output for cross-IaC contract parity)." + value = { + event_grid_topic_endpoint = try(module.cloud_messaging.eventgrid.endpoint, "Not deployed") + event_grid_topic_name = try(module.cloud_messaging.eventgrid.topic_name, "Not deployed") + eventhub_name = try(module.cloud_messaging.eventhubs[0].eventhub_name, "Not deployed") + eventhub_namespace_name = try(module.cloud_messaging.eventhubs[0].namespace_name, "Not deployed") + } +} + +/* + * Notification Outputs + */ + +output "notification" { + description = "Alert notification pipeline resources." + value = { + logic_app = try(module.cloud_notification[0].logic_app, null) + close_logic_app = try(module.cloud_notification[0].close_logic_app, null) + close_session_endpoint = try(module.cloud_notification[0].close_session_endpoint, null) + storage_account = try(module.cloud_notification[0].storage_account, null) + } + sensitive = true +} + /* * Dataflow Outputs */ diff --git a/blueprints/full-single-node-cluster/terraform/sse-connector-assets.tfvars.example b/blueprints/full-single-node-cluster/terraform/sse-connector-assets.tfvars.example index 0bb39820..fa9bbac5 100644 --- a/blueprints/full-single-node-cluster/terraform/sse-connector-assets.tfvars.example +++ b/blueprints/full-single-node-cluster/terraform/sse-connector-assets.tfvars.example @@ -105,81 +105,86 @@ namespaced_assets = [ analytics_type = "leak-detection" } - // SSE Connectors use Events instead of Datasets for real-time streaming - events = [ + // SSE Connectors use event_groups instead of datasets for real-time streaming + event_groups = [ { - name = "HEARTBEAT" - event_notifier = "HEARTBEAT" - destinations = [ + name = "sse-events" + events = [ { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/analytics-camera-01/heartbeat" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "ALERT" - event_notifier = "ALERT" - destinations = [ + name = "heartbeat" + data_source = "HEARTBEAT" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/analytics-camera-01/heartbeat" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/analytics-camera-01/alert" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "ALERT_DLQC" - event_notifier = "ALERT_DLQC" - destinations = [ + name = "alert" + data_source = "ALERT" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/analytics-camera-01/alert" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/analytics-camera-01/alert-dlqc" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "ANALYTICS_ENABLED" - event_notifier = "ANALYTICS_ENABLED" - destinations = [ + name = "alert-dlqc" + data_source = "ALERT_DLQC" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/analytics-camera-01/alert-dlqc" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/analytics-camera-01/analytics-enabled" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "ANALYTICS_DISABLED" - event_notifier = "ANALYTICS_DISABLED" - destinations = [ + name = "analytics-enabled" + data_source = "ANALYTICS_ENABLED" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/analytics-camera-01/analytics-enabled" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/analytics-camera-01/analytics-disabled" - retain = "Never" - qos = "Qos1" - } + name = "analytics-disabled" + data_source = "ANALYTICS_DISABLED" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/analytics-camera-01/analytics-disabled" + retain = "Never" + qos = "Qos1" + } + } + ] } ] } ] - // No datasets for SSE connectors - use events instead + // No datasets for SSE connectors - use event_groups instead datasets = [] default_events_configuration = "{\"publishingInterval\":1000,\"samplingInterval\":1000,\"queueSize\":10}" @@ -203,32 +208,37 @@ namespaced_assets = [ location = "datacenter-1" } - events = [ + event_groups = [ { - name = "notification" - event_notifier = "notification" - destinations = [ + name = "sse-events" + events = [ { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/generic-sse/notifications" - retain = "Never" - qos = "Qos1" - } - } - ] - }, - { - name = "status-change" - event_notifier = "status" - destinations = [ + name = "notification" + data_source = "notification" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/generic-sse/notifications" + retain = "Never" + qos = "Qos1" + } + } + ] + }, { - target = "Mqtt" - configuration = { - topic = "events/company/cloud/region/environment/generic-sse/status-changes" - retain = "Never" - qos = "Qos1" - } + name = "status-change" + data_source = "status" + destinations = [ + { + target = "Mqtt" + configuration = { + topic = "events/company/cloud/region/environment/generic-sse/status-changes" + retain = "Never" + qos = "Qos1" + } + } + ] } ] } diff --git a/blueprints/full-single-node-cluster/terraform/variables.tf b/blueprints/full-single-node-cluster/terraform/variables.tf index ff68d8ce..2829f440 100644 --- a/blueprints/full-single-node-cluster/terraform/variables.tf +++ b/blueprints/full-single-node-cluster/terraform/variables.tf @@ -66,6 +66,12 @@ variable "should_add_current_user_cluster_admin" { default = true } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_get_custom_locations_oid" { type = bool description = <<-EOT @@ -344,6 +350,64 @@ variable "function_app_settings" { sensitive = true } +/* + * Notification Parameters (045-notification) + */ + +variable "should_deploy_notification" { + type = bool + description = "Whether to deploy the 045-notification Logic App for alert deduplication and Teams posting" + default = false +} + +variable "closure_message_template" { + type = string + description = "HTML message body for session-closure Teams notifications. Supports Logic App expression syntax for dynamic fields" + default = "

Session closed for event.

" +} + +variable "notification_event_schema" { + type = any + description = "JSON schema object for parsing Event Hub events in the Logic App Parse_Event action" + default = {} +} + +variable "notification_message_template" { + type = string + description = "HTML template for new-event Teams notifications. Supports Terraform template variable: close_session_url. Supports Logic App expression syntax for dynamic event fields" + default = "

New alert event detected.

" +} + +variable "notification_partition_key_field" { + type = string + description = "Caller's event schema field name to use as the Table Storage partition key for session-state deduplication lookups (e.g. \"event_id\", \"asset_id\"). Must be set by the scenario tfvars." + default = "event_id" +} + +variable "teams_recipient_id" { + type = string + description = "Teams chat or channel thread ID for posting event notifications" + sensitive = true + default = null +} + +variable "teams_group_id" { + type = string + description = "Microsoft 365 Group ID (Team ID) for posting to a Teams channel. Required when teams_post_location is 'Channel'" + default = null +} + +variable "teams_post_location" { + type = string + description = "Teams posting location type for the notification message: 'Channel' for a Teams channel or 'Group chat' for a group chat" + default = "Channel" + + validation { + condition = contains(["Channel", "Group chat"], var.teams_post_location) + error_message = "teams_post_location must be 'Channel' or 'Group chat'" + } +} + /* * Azure Kubernetes Service Parameters */ @@ -384,7 +448,7 @@ variable "node_pools" { variable "node_vm_size" { type = string description = "VM size for the agent pool in the AKS cluster" - default = "Standard_D8ds_v5" + default = "Standard_D8ds_v6" } variable "should_create_aks" { diff --git a/blueprints/full-single-node-cluster/tests/outputs.go b/blueprints/full-single-node-cluster/tests/outputs.go index 102caf34..35f62f2b 100644 --- a/blueprints/full-single-node-cluster/tests/outputs.go +++ b/blueprints/full-single-node-cluster/tests/outputs.go @@ -21,6 +21,7 @@ type BlueprintOutputs struct { DataStorage map[string]any `output:"data_storage"` ContainerRegistry map[string]any `output:"container_registry"` Messaging map[string]any `output:"messaging"` + Notification map[string]any `output:"notification"` VmHost any `output:"vm_host"` ArcConnectedCluster map[string]any `output:"arc_connected_cluster"` ClusterConnection map[string]any `output:"cluster_connection"` diff --git a/blueprints/minimum-single-node-cluster/terraform/README.md b/blueprints/minimum-single-node-cluster/terraform/README.md index ad8d52a1..0b7564db 100644 --- a/blueprints/minimum-single-node-cluster/terraform/README.md +++ b/blueprints/minimum-single-node-cluster/terraform/README.md @@ -30,19 +30,20 @@ It includes only the essential components and minimizes resource usage. ## Inputs -| Name | Description | Type | Default | Required | -|---------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|:--------:| -| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | -| location | Azure region where all resources will be deployed | `string` | n/a | yes | -| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | -| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | -| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | -| namespaced\_assets | List of namespaced assets with enhanced configuration support | ```list(object({ name = string display_name = optional(string) device_ref = optional(object({ device_name = string endpoint_name = string })) asset_endpoint_profile_ref = optional(string) default_datasets_configuration = optional(string) default_streams_configuration = optional(string) default_events_configuration = optional(string) description = optional(string) documentation_uri = optional(string) enabled = optional(bool, true) hardware_revision = optional(string) manufacturer = optional(string) manufacturer_uri = optional(string) model = optional(string) product_code = optional(string) serial_number = optional(string) software_revision = optional(string) attributes = optional(map(string), {}) datasets = optional(list(object({ name = string data_points = list(object({ data_point_configuration = optional(string) data_source = string name = string observability_mode = optional(string) rest_sampling_interval_ms = optional(number) rest_mqtt_topic = optional(string) rest_include_state_store = optional(bool) rest_state_store_key = optional(string) })) dataset_configuration = optional(string) data_source = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) type_ref = optional(string) })), []) streams = optional(list(object({ name = string stream_configuration = optional(string) type_ref = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) })), []) event_groups = optional(list(object({ name = string data_source = optional(string) event_group_configuration = optional(string) type_ref = optional(string) default_destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) events = list(object({ name = string data_source = string event_configuration = optional(string) type_ref = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) })) })), []) management_groups = optional(list(object({ name = string data_source = optional(string) management_group_configuration = optional(string) type_ref = optional(string) default_topic = optional(string) default_timeout_in_seconds = optional(number, 100) actions = list(object({ name = string action_type = string target_uri = string topic = optional(string) timeout_in_seconds = optional(number) action_configuration = optional(string) type_ref = optional(string) })) })), []) }))``` | `[]` | no | -| namespaced\_devices | List of namespaced devices to create. Otherwise, an empty list. | ```list(object({ name = string enabled = optional(bool, true) endpoints = object({ outbound = optional(object({ assigned = object({}) }), { assigned = {} }) inbound = map(object({ endpoint_type = string address = string version = optional(string, null) additionalConfiguration = optional(string) authentication = object({ method = string usernamePasswordCredentials = optional(object({ usernameSecretName = string passwordSecretName = string })) x509Credentials = optional(object({ certificateSecretName = string })) }) trustSettings = optional(object({ trustList = string })) })) }) }))``` | `[]` | no | -| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | -| should\_create\_anonymous\_broker\_listener | Whether to enable an insecure anonymous AIO MQ Broker Listener. Should only be used for dev or test environments | `bool` | `false` | no | -| should\_deploy\_aio | Whether to deploy Azure IoT Operations and its dependent edge components (assets). When false, deploys Arc-connected cluster with extensions only | `bool` | `true` | no | -| should\_enable\_private\_endpoints | Whether to enable private endpoints for Key Vault and Storage Account | `bool` | `false` | no | -| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | `true` | no | -| vm\_sku\_size | Size of the VM | `string` | `"Standard_D4_v4"` | no | +| Name | Description | Type | Default | Required | +|---------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------|:--------:| +| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | +| location | Azure region where all resources will be deployed | `string` | n/a | yes | +| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | +| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | +| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | +| namespaced\_assets | List of namespaced assets with enhanced configuration support | ```list(object({ name = string display_name = optional(string) device_ref = optional(object({ device_name = string endpoint_name = string })) asset_endpoint_profile_ref = optional(string) default_datasets_configuration = optional(string) default_streams_configuration = optional(string) default_events_configuration = optional(string) description = optional(string) documentation_uri = optional(string) enabled = optional(bool, true) hardware_revision = optional(string) manufacturer = optional(string) manufacturer_uri = optional(string) model = optional(string) product_code = optional(string) serial_number = optional(string) software_revision = optional(string) attributes = optional(map(string), {}) datasets = optional(list(object({ name = string data_points = list(object({ data_point_configuration = optional(string) data_source = string name = string observability_mode = optional(string) rest_sampling_interval_ms = optional(number) rest_mqtt_topic = optional(string) rest_include_state_store = optional(bool) rest_state_store_key = optional(string) })) dataset_configuration = optional(string) data_source = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) type_ref = optional(string) })), []) streams = optional(list(object({ name = string stream_configuration = optional(string) type_ref = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) })), []) event_groups = optional(list(object({ name = string data_source = optional(string) event_group_configuration = optional(string) type_ref = optional(string) default_destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) events = list(object({ name = string data_source = string event_configuration = optional(string) type_ref = optional(string) destinations = optional(list(object({ target = string configuration = object({ topic = optional(string) retain = optional(string) qos = optional(string) }) })), []) })) })), []) management_groups = optional(list(object({ name = string data_source = optional(string) management_group_configuration = optional(string) type_ref = optional(string) default_topic = optional(string) default_timeout_in_seconds = optional(number, 100) actions = list(object({ name = string action_type = string target_uri = string topic = optional(string) timeout_in_seconds = optional(number) action_configuration = optional(string) type_ref = optional(string) })) })), []) }))``` | `[]` | no | +| namespaced\_devices | List of namespaced devices to create. Otherwise, an empty list. | ```list(object({ name = string enabled = optional(bool, true) endpoints = object({ outbound = optional(object({ assigned = object({}) }), { assigned = {} }) inbound = map(object({ endpoint_type = string address = string version = optional(string, null) additionalConfiguration = optional(string) authentication = object({ method = string usernamePasswordCredentials = optional(object({ usernameSecretName = string passwordSecretName = string })) x509Credentials = optional(object({ certificateSecretName = string })) }) trustSettings = optional(object({ trustList = string })) })) }) }))``` | `[]` | no | +| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | +| should\_create\_anonymous\_broker\_listener | Whether to enable an insecure anonymous AIO MQ Broker Listener. Should only be used for dev or test environments | `bool` | `false` | no | +| should\_deploy\_aio | Whether to deploy Azure IoT Operations and its dependent edge components (assets). When false, deploys Arc-connected cluster with extensions only | `bool` | `true` | no | +| should\_enable\_private\_endpoints | Whether to enable private endpoints for Key Vault and Storage Account | `bool` | `false` | no | +| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | `true` | no | +| vm\_sku\_size | Size of the VM | `string` | `"Standard_D4s_v6"` | no | diff --git a/blueprints/minimum-single-node-cluster/terraform/main.tf b/blueprints/minimum-single-node-cluster/terraform/main.tf index b5363f33..61437bd7 100644 --- a/blueprints/minimum-single-node-cluster/terraform/main.tf +++ b/blueprints/minimum-single-node-cluster/terraform/main.tf @@ -106,6 +106,7 @@ module "edge_cncf_cluster" { should_get_custom_locations_oid = var.should_get_custom_locations_oid custom_locations_oid = var.custom_locations_oid should_add_current_user_cluster_admin = var.should_add_current_user_cluster_admin + cluster_admin_group_oid = var.cluster_admin_group_oid key_vault = module.cloud_security_identity.key_vault } diff --git a/blueprints/minimum-single-node-cluster/terraform/variables.tf b/blueprints/minimum-single-node-cluster/terraform/variables.tf index 0950415b..ea81fd2b 100644 --- a/blueprints/minimum-single-node-cluster/terraform/variables.tf +++ b/blueprints/minimum-single-node-cluster/terraform/variables.tf @@ -66,6 +66,12 @@ variable "should_add_current_user_cluster_admin" { default = true } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_enable_private_endpoints" { type = bool description = "Whether to enable private endpoints for Key Vault and Storage Account" @@ -92,7 +98,7 @@ variable "vm_sku_size" { type = string // Minimize resource usage - set smaller VM size description = "Size of the VM" - default = "Standard_D4_v4" + default = "Standard_D4s_v6" } variable "namespaced_devices" { diff --git a/blueprints/modules/robotics/terraform/README.md b/blueprints/modules/robotics/terraform/README.md index c0d90afe..5a117428 100644 --- a/blueprints/modules/robotics/terraform/README.md +++ b/blueprints/modules/robotics/terraform/README.md @@ -105,7 +105,7 @@ Adds Azure Machine Learning capabilities with optional foundational resource cre | nat\_gateway\_zones | Availability zones for NAT gateway resources when zone-redundancy is required (example: ['1','2']) | `list(string)` | `[]` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string, "Deallocate") gpu_driver = optional(string, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | postgresql\_admin\_password | Administrator password for PostgreSQL server. (Otherwise, generated when postgresql\_should\_generate\_admin\_password is true). | `string` | `null` | no | | postgresql\_admin\_username | Administrator username for PostgreSQL server | `string` | `"pgadmin"` | no | | postgresql\_databases | Map of databases to create with collation and charset | ```map(object({ collation = string charset = string }))``` | `null` | no | @@ -177,7 +177,7 @@ Adds Azure Machine Learning capabilities with optional foundational resource cre | vm\_host\_count | Number of VM hosts to create for multi-node scenarios | `number` | `1` | no | | vm\_max\_bid\_price | Maximum hourly price in USD for Spot VM. Set to -1 (recommended) to pay current spot price without price-based eviction. Custom values support up to 5 decimal places. Only applies when vm\_priority is Spot | `number` | `-1` | no | | vm\_priority | VM priority: Regular (production, guaranteed capacity) or Spot (cost-optimized, up to 90% savings, can be evicted). Recommended: Spot for dev/test GPU workloads | `string` | `"Regular"` | no | -| vm\_sku\_size | VM SKU size for the host. Examples: Standard\_D8s\_v3 (general purpose), Standard\_NV36ads\_A10\_v5 (GPU workload) | `string` | `"Standard_D8s_v3"` | no | +| vm\_sku\_size | VM SKU size for the host. Examples: Standard\_D8s\_v6 (general purpose), Standard\_NV36ads\_A10\_v5 (GPU workload) | `string` | `"Standard_D8s_v6"` | no | | vm\_user\_principals | Map of Azure AD principals for Virtual Machine User Login role (standard access). Keys are descriptive identifiers (e.g., `user@company.com`), values are principal object IDs. | `map(string)` | `{}` | no | | vpn\_gateway\_azure\_ad\_config | Azure AD configuration for VPN Gateway authentication. tenant\_id is required when vpn\_gateway\_should\_use\_azure\_ad\_auth is true. audience defaults to Microsoft-registered app. issuer will default to `https://sts.windows.net/{tenant_id}/` when not provided | ```object({ tenant_id = optional(string) audience = optional(string, "c632b3df-fb67-4d84-bdcf-b95ad541b5c8") issuer = optional(string) })``` | `{}` | no | | vpn\_gateway\_config | VPN Gateway configuration including SKU, generation, client address pool, and supported protocols | ```object({ sku = optional(string, "VpnGw1") generation = optional(string, "Generation1") client_address_pool = optional(list(string), ["192.168.200.0/24"]) protocols = optional(list(string), ["OpenVPN", "IkeV2"]) })``` | `{}` | no | diff --git a/blueprints/modules/robotics/terraform/main.tf b/blueprints/modules/robotics/terraform/main.tf index 1e2ad384..b5244d6f 100644 --- a/blueprints/modules/robotics/terraform/main.tf +++ b/blueprints/modules/robotics/terraform/main.tf @@ -141,6 +141,8 @@ module "cloud_security_identity" { key_vault_virtual_network_id = try(module.cloud_networking[0].virtual_network.id, data.azurerm_virtual_network.existing[0].id, null) should_enable_public_network_access = var.should_enable_public_network_access should_enable_purge_protection = var.should_enable_key_vault_purge_protection + log_analytics_workspace_id = try(module.cloud_observability[0].log_analytics_workspace.id, null) + should_enable_diagnostic_settings = true } module "cloud_vpn_gateway" { @@ -337,11 +339,13 @@ module "cloud_acr" { should_enable_nat_gateway = var.should_enable_managed_outbound_access nat_gateway = try(module.cloud_networking[0].nat_gateway, null) - allow_trusted_services = var.acr_allow_trusted_services - allowed_public_ip_ranges = var.acr_allowed_public_ip_ranges - public_network_access_enabled = var.acr_public_network_access_enabled - should_enable_data_endpoints = var.acr_data_endpoint_enabled - should_enable_export_policy = var.acr_export_policy_enabled + allow_trusted_services = var.acr_allow_trusted_services + allowed_public_ip_ranges = var.acr_allowed_public_ip_ranges + public_network_access_enabled = var.acr_public_network_access_enabled + should_enable_data_endpoints = var.acr_data_endpoint_enabled + should_enable_export_policy = var.acr_export_policy_enabled + log_analytics_workspace_id = try(module.cloud_observability[0].log_analytics_workspace.id, null) + should_enable_diagnostic_settings = true } module "cloud_kubernetes" { @@ -438,6 +442,8 @@ module "cloud_azureml" { compute_cluster_vm_priority = var.compute_cluster_vm_priority compute_cluster_vm_size = var.compute_cluster_vm_size + compute_cluster_node_public_ip_enabled = !var.should_enable_private_endpoints + key_vault = try(module.cloud_security_identity[0].key_vault, data.azurerm_key_vault.existing[0], null) application_insights = try(module.cloud_observability[0].application_insights, data.azurerm_application_insights.existing[0], null) storage_account = try(module.cloud_data[0].storage_account, data.azurerm_storage_account.existing[0], null) diff --git a/blueprints/modules/robotics/terraform/variables.tf b/blueprints/modules/robotics/terraform/variables.tf index 418a25e5..23aee356 100644 --- a/blueprints/modules/robotics/terraform/variables.tf +++ b/blueprints/modules/robotics/terraform/variables.tf @@ -109,8 +109,8 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "subnet_address_prefixes_aks" { @@ -777,8 +777,8 @@ variable "vm_host_count" { variable "vm_sku_size" { type = string - description = "VM SKU size for the host. Examples: Standard_D8s_v3 (general purpose), Standard_NV36ads_A10_v5 (GPU workload)" - default = "Standard_D8s_v3" + description = "VM SKU size for the host. Examples: Standard_D8s_v6 (general purpose), Standard_NV36ads_A10_v5 (GPU workload)" + default = "Standard_D8s_v6" } variable "vm_priority" { diff --git a/blueprints/only-cloud-single-node-cluster/terraform/README.md b/blueprints/only-cloud-single-node-cluster/terraform/README.md index e19cb05b..5bfe57d4 100644 --- a/blueprints/only-cloud-single-node-cluster/terraform/README.md +++ b/blueprints/only-cloud-single-node-cluster/terraform/README.md @@ -43,7 +43,7 @@ This blueprint deploys a complete end-to-end cloud environment as preparation fo | nat\_gateway\_zones | Availability zones for NAT gateway resources when zone-redundancy is required (example: ['1','2']) | `list(string)` | `[]` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = number vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | resource\_group\_name | Name of the resource group | `string` | `null` | no | | should\_create\_aks | Should create Azure Kubernetes Service. Default is false. | `bool` | `false` | no | | should\_create\_azure\_functions | Whether to create the Azure Functions resources including App Service Plan | `bool` | `false` | no | diff --git a/blueprints/only-cloud-single-node-cluster/terraform/main.tf b/blueprints/only-cloud-single-node-cluster/terraform/main.tf index 3b4ec96b..0db25baf 100644 --- a/blueprints/only-cloud-single-node-cluster/terraform/main.tf +++ b/blueprints/only-cloud-single-node-cluster/terraform/main.tf @@ -38,6 +38,8 @@ module "cloud_security_identity" { should_create_key_vault_private_endpoint = var.should_enable_private_endpoints key_vault_private_endpoint_subnet_id = var.should_enable_private_endpoints ? module.cloud_networking.subnet_id : null key_vault_virtual_network_id = var.should_enable_private_endpoints ? module.cloud_networking.virtual_network.id : null + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_observability" { @@ -76,7 +78,9 @@ module "cloud_messaging" { resource_prefix = var.resource_prefix instance = var.instance - should_create_azure_functions = var.should_create_azure_functions + should_create_azure_functions = var.should_create_azure_functions + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_networking" { @@ -126,6 +130,8 @@ module "cloud_acr" { should_create_acr_private_endpoint = var.should_enable_private_endpoints default_outbound_access_enabled = local.default_outbound_access_enabled should_enable_nat_gateway = var.should_enable_managed_outbound_access + log_analytics_workspace_id = module.cloud_observability.log_analytics_workspace.id + should_enable_diagnostic_settings = true } module "cloud_kubernetes" { diff --git a/blueprints/only-cloud-single-node-cluster/terraform/variables.tf b/blueprints/only-cloud-single-node-cluster/terraform/variables.tf index 871a9770..eb682117 100644 --- a/blueprints/only-cloud-single-node-cluster/terraform/variables.tf +++ b/blueprints/only-cloud-single-node-cluster/terraform/variables.tf @@ -53,8 +53,8 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "enable_auto_scaling" { diff --git a/blueprints/only-output-cncf-cluster-script/terraform/README.md b/blueprints/only-output-cncf-cluster-script/terraform/README.md index c396ef67..f093117b 100644 --- a/blueprints/only-output-cncf-cluster-script/terraform/README.md +++ b/blueprints/only-output-cncf-cluster-script/terraform/README.md @@ -33,25 +33,27 @@ them to Key Vault as secrets for secure storage and retrieval. ## Inputs -| Name | Description | Type | Default | Required | -|--------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------|---------|:--------:| -| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | -| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | -| aio\_resource\_group\_name | The name of the Resource Group that will be used to connect the new cluster to Azure Arc. Otherwise, 'rg-{var.resource\_prefix}-{var.environment}-{var.instance}'. Does not need to exist for output script. | `string` | `null` | no | -| arc\_onboarding\_identity\_name | The Principal ID for the identity that will be used for onboarding the cluster to Arc. | `string` | `null` | no | -| arc\_onboarding\_sp | n/a | ```object({ client_id = string object_id = string client_secret = string })``` | `null` | no | -| cluster\_admin\_oid | The Object ID that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user Object ID if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | -| cluster\_admin\_upn | The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | -| cluster\_server\_host\_machine\_username | Username used for the host machines that will be given kube-config settings on setup. (Otherwise, 'resource\_prefix' if it exists as a user) | `string` | `null` | no | -| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | -| enable\_arc\_auto\_upgrade | Enable or disable auto-upgrades of Arc agents. (Otherwise, 'false' for 'env=prod' else 'true' for all other envs). | `bool` | `null` | no | -| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | -| key\_vault\_name | The name of the Key Vault to store secrets. If not provided, defaults to 'kv-{resource\_prefix}-{environment}-{instance}' | `string` | `null` | no | -| script\_output\_filepath | The location of where to write out the script file. (Otherwise, '{path.root}/out') | `string` | `null` | no | -| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | -| should\_assign\_roles | Whether to assign Key Vault roles to identity or service principal. | `bool` | `false` | no | -| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | `true` | no | -| should\_output\_cluster\_node\_script | Whether to write out the script for setting up cluster node host machines. (Needed for multi-node clusters) | `bool` | `false` | no | -| should\_output\_cluster\_server\_script | Whether to write out the script for setting up the cluster server host machine. | `bool` | `true` | no | -| should\_upload\_to\_key\_vault | Whether to upload the scripts to Key Vault as secrets. | `bool` | `false` | no | +| Name | Description | Type | Default | Required | +|--------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------|----------|:--------:| +| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | +| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| aio\_resource\_group\_name | The name of the Resource Group that will be used to connect the new cluster to Azure Arc. Otherwise, 'rg-{var.resource\_prefix}-{var.environment}-{var.instance}'. Does not need to exist for output script. | `string` | `null` | no | +| arc\_onboarding\_identity\_name | The Principal ID for the identity that will be used for onboarding the cluster to Arc. | `string` | `null` | no | +| arc\_onboarding\_sp | n/a | ```object({ client_id = string object_id = string client_secret = string })``` | `null` | no | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | +| cluster\_admin\_oid | The Object ID that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user Object ID if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | +| cluster\_admin\_oid\_type | The principal type of cluster\_admin\_oid for Azure RBAC assignments. Ignored when using current user (defaults to 'User') | `string` | `"User"` | no | +| cluster\_admin\_upn | The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | +| cluster\_server\_host\_machine\_username | Username used for the host machines that will be given kube-config settings on setup. (Otherwise, 'resource\_prefix' if it exists as a user) | `string` | `null` | no | +| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | +| enable\_arc\_auto\_upgrade | Enable or disable auto-upgrades of Arc agents. (Otherwise, 'false' for 'env=prod' else 'true' for all other envs). | `bool` | `null` | no | +| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | +| key\_vault\_name | The name of the Key Vault to store secrets. If not provided, defaults to 'kv-{resource\_prefix}-{environment}-{instance}' | `string` | `null` | no | +| script\_output\_filepath | The location of where to write out the script file. (Otherwise, '{path.root}/out') | `string` | `null` | no | +| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | +| should\_assign\_roles | Whether to assign Key Vault roles to identity or service principal. | `bool` | `false` | no | +| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | `true` | no | +| should\_output\_cluster\_node\_script | Whether to write out the script for setting up cluster node host machines. (Needed for multi-node clusters) | `bool` | `false` | no | +| should\_output\_cluster\_server\_script | Whether to write out the script for setting up the cluster server host machine. | `bool` | `true` | no | +| should\_upload\_to\_key\_vault | Whether to upload the scripts to Key Vault as secrets. | `bool` | `false` | no | diff --git a/blueprints/only-output-cncf-cluster-script/terraform/main.tf b/blueprints/only-output-cncf-cluster-script/terraform/main.tf index 08910445..5c3b0c34 100644 --- a/blueprints/only-output-cncf-cluster-script/terraform/main.tf +++ b/blueprints/only-output-cncf-cluster-script/terraform/main.tf @@ -48,7 +48,9 @@ module "edge_cncf_cluster" { should_add_current_user_cluster_admin = var.should_add_current_user_cluster_admin should_assign_roles = var.should_assign_roles cluster_admin_oid = var.cluster_admin_oid + cluster_admin_oid_type = var.cluster_admin_oid_type cluster_admin_upn = var.cluster_admin_upn + cluster_admin_group_oid = var.cluster_admin_group_oid script_output_filepath = var.script_output_filepath should_get_custom_locations_oid = var.should_get_custom_locations_oid diff --git a/blueprints/only-output-cncf-cluster-script/terraform/variables.tf b/blueprints/only-output-cncf-cluster-script/terraform/variables.tf index 4c17a02d..ab0f1a68 100644 --- a/blueprints/only-output-cncf-cluster-script/terraform/variables.tf +++ b/blueprints/only-output-cncf-cluster-script/terraform/variables.tf @@ -106,12 +106,28 @@ variable "cluster_admin_oid" { default = null } +variable "cluster_admin_oid_type" { + type = string + description = "The principal type of cluster_admin_oid for Azure RBAC assignments. Ignored when using current user (defaults to 'User')" + default = "User" + validation { + condition = contains(["User", "Group", "ServicePrincipal"], var.cluster_admin_oid_type) + error_message = "Must be one of: User, Group, ServicePrincipal" + } +} + variable "cluster_admin_upn" { type = string description = "The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should_add_current_user_cluster_admin=true')" default = null } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_output_cluster_server_script" { type = bool description = "Whether to write out the script for setting up the cluster server host machine." diff --git a/blueprints/partial-single-node-cluster/terraform/README.md b/blueprints/partial-single-node-cluster/terraform/README.md index 800170b3..b261346a 100644 --- a/blueprints/partial-single-node-cluster/terraform/README.md +++ b/blueprints/partial-single-node-cluster/terraform/README.md @@ -37,6 +37,7 @@ This blueprint will: | environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | | location | Azure region where all resources will be deployed | `string` | n/a | yes | | resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | | custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | | should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | diff --git a/blueprints/partial-single-node-cluster/terraform/main.tf b/blueprints/partial-single-node-cluster/terraform/main.tf index d21dbd87..e67a54ab 100644 --- a/blueprints/partial-single-node-cluster/terraform/main.tf +++ b/blueprints/partial-single-node-cluster/terraform/main.tf @@ -85,6 +85,7 @@ module "edge_cncf_cluster" { should_get_custom_locations_oid = var.should_get_custom_locations_oid custom_locations_oid = var.custom_locations_oid should_add_current_user_cluster_admin = var.should_add_current_user_cluster_admin + cluster_admin_group_oid = var.cluster_admin_group_oid // Key Vault configuration key_vault = module.cloud_security_identity.key_vault diff --git a/blueprints/partial-single-node-cluster/terraform/variables.tf b/blueprints/partial-single-node-cluster/terraform/variables.tf index b16fcc72..51ea351d 100644 --- a/blueprints/partial-single-node-cluster/terraform/variables.tf +++ b/blueprints/partial-single-node-cluster/terraform/variables.tf @@ -63,6 +63,12 @@ variable "should_add_current_user_cluster_admin" { default = true } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "should_enable_private_endpoints" { type = bool description = "Whether to enable private endpoints for Key Vault and Storage Account" diff --git a/blueprints/robotics/terraform/README.md b/blueprints/robotics/terraform/README.md index 7ff56010..154ab164 100644 --- a/blueprints/robotics/terraform/README.md +++ b/blueprints/robotics/terraform/README.md @@ -41,7 +41,7 @@ and optional Azure Machine Learning integration. | min\_count | The minimum number of nodes which should exist in the default node pool. Valid values are between 0 and 1000 | `number` | `null` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name | ```map(object({ node_count = optional(number, null) vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string, "Deallocate") gpu_driver = optional(string, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5 | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6 | `string` | `"Standard_D8ds_v6"` | no | | postgresql\_admin\_password | Administrator password for PostgreSQL server. (Otherwise, generated when postgresql\_should\_generate\_admin\_password is true). | `string` | `null` | no | | postgresql\_admin\_username | Administrator username for PostgreSQL server | `string` | `"pgadmin"` | no | | postgresql\_databases | Map of databases to create with collation and charset | ```map(object({ collation = string charset = string }))``` | `null` | no | @@ -94,7 +94,7 @@ and optional Azure Machine Learning integration. | vm\_host\_count | Number of VM hosts to create | `number` | `1` | no | | vm\_max\_bid\_price | Maximum hourly price for Spot VM (-1 for Azure default) | `number` | `-1` | no | | vm\_priority | VM priority: Regular or Spot for cost optimization | `string` | `"Regular"` | no | -| vm\_sku\_size | VM SKU size for the host | `string` | `"Standard_D8s_v3"` | no | +| vm\_sku\_size | VM SKU size for the host | `string` | `"Standard_D8s_v6"` | no | | vpn\_site\_connections | Site-to-site VPN site definitions for connecting on-premises networks | ```list(object({ name = string address_spaces = list(string) shared_key_reference = string gateway_ip_address = optional(string) gateway_fqdn = optional(string) bgp_asn = optional(number) bgp_peering_address = optional(string) ike_protocol = optional(string, "IKEv2") }))``` | `[]` | no | | vpn\_site\_default\_ipsec\_policy | Fallback IPsec policy applied when vpn\_site\_connections omit ipsec\_policy overrides | ```object({ dh_group = string ike_encryption = string ike_integrity = string ipsec_encryption = string ipsec_integrity = string pfs_group = string sa_datasize_kb = optional(number) sa_lifetime_seconds = optional(number) })``` | `null` | no | | vpn\_site\_shared\_keys | Pre-shared keys for site-to-site VPN connections indexed by connection name | `map(string)` | `{}` | no | diff --git a/blueprints/robotics/terraform/variables.tf b/blueprints/robotics/terraform/variables.tf index d2226a5f..c45295eb 100644 --- a/blueprints/robotics/terraform/variables.tf +++ b/blueprints/robotics/terraform/variables.tf @@ -320,8 +320,8 @@ variable "subnet_address_prefixes_aks_pod" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5" - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6" + default = "Standard_D8ds_v6" } variable "node_count" { @@ -510,7 +510,7 @@ variable "vm_host_count" { variable "vm_sku_size" { type = string description = "VM SKU size for the host" - default = "Standard_D8s_v3" + default = "Standard_D8s_v6" } variable "vm_priority" { diff --git a/docs/getting-started/README.md b/docs/getting-started/README.md index a10c3ef4..8287e8d0 100644 --- a/docs/getting-started/README.md +++ b/docs/getting-started/README.md @@ -47,6 +47,12 @@ Welcome to the AI on Edge Flagship Accelerator! This guide helps you choose the **Perfect for:** Platform engineers, open source contributors, and teams extending platform capabilities +## Scenario Deployment Guides + +End-to-end deployment walkthroughs for specific use cases combining multiple components: + +- **[Leak Detection Scenario on full-single-node-cluster](leak-detection-scenario.md)** — Deploy a vision-based leak detection scenario built on top of the full-single-node-cluster blueprint with edge AI inference, video capture, and cloud alerting (~2 hours) + ## 🎓 Accelerate Your Learning **New to edge AI development?** Our [Learning Platform](../../learning/) provides hands-on training: diff --git a/docs/getting-started/leak-detection-scenario.md b/docs/getting-started/leak-detection-scenario.md new file mode 100644 index 00000000..c945de0b --- /dev/null +++ b/docs/getting-started/leak-detection-scenario.md @@ -0,0 +1,286 @@ +--- +title: Deploy a Leak Detection Scenario on full-single-node-cluster +description: End-to-end deployment of a vision-based leak detection scenario built on the full-single-node-cluster blueprint, using edge AI inference, video capture, and cloud alerting +author: Edge AI Team +ms.date: 2026-03-12 +ms.topic: tutorial +estimated_reading_time: 60 +keywords: + - leak detection + - vision + - inference + - video capture + - edge AI + - scenario deployment +--- + +## Deploy a Leak Detection Scenario on full-single-node-cluster + +This guide walks through a vision-based leak detection scenario built on top of the [`full-single-node-cluster`](../../blueprints/full-single-node-cluster/README.md) blueprint. There is no dedicated `leak-detection` blueprint; the scenario is enabled by applying the `full-single-node-cluster` Terraform with the provided `leak-detection.tfvars.example` and supporting CI/CD scripts. + +The pipeline captures camera frames at the edge, runs AI inference for leak detection, routes alerts to Microsoft Teams, and stores video clips for review. + +**Total time:** ~2 hours (including infrastructure provisioning) + +### Overview + +#### Architecture + +```mermaid +graph LR + CAM[Camera / RTSP Source] + MC[508-media-connector] + INF[507-ai-inference] + MQTT((MQTT Broker)) + DF[130-messaging Dataflows] + EH[Event Hub] + FN[Azure Functions] + NOTIFY[045-notification → Teams] + CAP[503-media-capture] + VQ[520-video-query-api] + BLOB[(Blob Storage)] + + CAM -->|RTSP frames| MC + MC -->|Frames via MQTT| MQTT + MQTT -->|Inference input| INF + INF -->|ALERT events| MQTT + MQTT -->|Dataflow routing| DF + DF -->|Alert events| EH + EH --> FN + EH --> NOTIFY + NOTIFY -->|Teams message| TEAMS[Microsoft Teams] + INF -->|Capture trigger| CAP + CAP -->|Video clips| BLOB + BLOB -->|Query API| VQ +``` + +#### Component Map + +| Component | Name | Role in Pipeline | +|-----------|-------------------|--------------------------------------------------------------| +| 508 | media-connector | Captures RTSP/ONVIF camera frames, publishes to MQTT | +| 507 | ai-inference | Runs ONNX leak detection model on frames, emits ALERT events | +| 503 | media-capture | Records video clips to blob storage on alert trigger | +| 509 | sse-connector | Server-Sent Events connector for real-time UI streaming | +| 130 | messaging | Dataflows routing ALERT events from MQTT to Event Hub | +| 045 | notification | Logic App deduplicating alerts and posting to Teams | +| 040 | messaging (cloud) | Event Hub and Event Grid for cloud-side event processing | +| 520 | video-query-api | REST API for querying stored video captures | + +#### Data Flow + +1. **Camera Ingestion** — 508-media-connector captures frames from ONVIF/RTSP cameras via Akri connectors and publishes them to the MQTT broker +2. **AI Inference** — 507-ai-inference consumes frames, runs the ONNX leak detection model, and publishes ALERT events back to MQTT +3. **Edge Routing** — 130-messaging dataflows route ALERT events from MQTT to Event Hub +4. **Cloud Processing** — Azure Functions process events; 045-notification deduplicates and posts to Teams +5. **Video Capture** — 503-media-capture stores video clips to blob storage for later review via 520-video-query-api + +### Prerequisites + +* **Azure subscription** with Contributor access +* **Azure CLI** authenticated (`az login`) +* **Terraform** >= 1.9.8 +* **Docker** installed and running +* **kubectl** configured for your cluster +* **jq** installed for JSON processing +* **Basic understanding** of Azure IoT Operations — see the [General User Guide](general-user.md) for orientation + +### Phase 1: Deploy Infrastructure + +**Estimated time:** ~20 minutes + provisioning + +The `blueprints/full-single-node-cluster/terraform/` directory contains the infrastructure-as-code for this scenario. A dedicated variable file `leak-detection.tfvars.example` enables the leak-detection-specific components. + +#### Configure Variables + +```bash +source scripts/az-sub-init.sh +cd blueprints/full-single-node-cluster/terraform +cp leak-detection.tfvars.example leak-detection.tfvars +``` + +Edit `leak-detection.tfvars` with your environment values. Key variables to set: + +* `environment` — Deployment environment name (e.g., `dev`) +* `resource_prefix` — Prefix for all resource names (e.g., `leakdet`) +* `location` — Azure region (e.g., `westus3`) +* `instance` — Instance identifier (e.g., `001`) +* `teams_recipient_id` — Your Teams chat or channel thread ID for alert notifications + +#### Deploy + +```bash +terraform init +terraform apply -var-file=leak-detection.tfvars +``` + +#### Verify Outputs + +After deployment completes, verify the key resources: + +```bash +terraform output deployment_summary +``` + +Confirm the following resources are provisioned: + +* Resource group +* Virtual network and subnets +* Key Vault and managed identities +* Storage account and Schema Registry +* Event Hub namespace with alert Event Hub +* Container Registry +* VM host with K3s cluster connected to Arc +* IoT Operations instance with assets and dataflows + +### Phase 2: Build and Push Application Images + +**Estimated time:** ~30 minutes + +Application container images must be built and pushed to the Azure Container Registry created in Phase 1. + +#### Option A: Automated Build + +```bash +cd blueprints/full-single-node-cluster + +../../src/501-ci-cd/scripts/build-leak-detection-images.sh \ + --acr-name "$(cd terraform && terraform output -raw container_registry | jq -r .name)" \ + --resource-group "$(cd terraform && terraform output -raw deployment_summary | jq -r .resource_group)" +``` + +#### Option B: Manual Build + +For each application component (507-ai-inference, 508-media-connector, 503-media-capture, 509-sse-connector): + +```bash +ACR_NAME=$(cd blueprints/full-single-node-cluster/terraform && terraform output -raw container_registry | jq -r .name) + +az acr login --name "$ACR_NAME" + +docker build -t "$ACR_NAME.azurecr.io/507-ai-inference:latest" \ + ../../src/500-application/507-ai-inference/ + +docker push "$ACR_NAME.azurecr.io/507-ai-inference:latest" +``` + +Repeat for each application image. + +#### Verify Images + +```bash +az acr repository list --name "$ACR_NAME" --output table +``` + +### Phase 3: Deploy Kubernetes Workloads + +**Estimated time:** ~15 minutes + +#### Option A: Automated Deployment + +```bash +cd blueprints/full-single-node-cluster + +../../src/501-ci-cd/scripts/deploy-leak-detection-apps.sh +``` + +#### Option B: Manual Deployment + +Apply manifests in dependency order: + +```bash +kubectl apply -f ../../src/500-application/508-media-connector/kubernetes/ +kubectl apply -f ../../src/500-application/507-ai-inference/kubernetes/ +kubectl apply -f ../../src/500-application/503-media-capture/kubernetes/ +kubectl apply -f ../../src/500-application/509-sse-connector/kubernetes/ +``` + +#### Verify Pods + +```bash +kubectl get pods -n azure-iot-operations +``` + +All application pods should reach `Running` status. + +### Phase 4: Configure IoT Operations + +**Estimated time:** ~15 minutes + +#### Camera Asset Definitions + +Camera assets are configured through the 111-assets component deployed in Phase 1. Verify the asset definitions: + +```bash +kubectl get assets -n azure-iot-operations +``` + +#### MQTT Topic Routing + +Verify MQTT topics are configured for the inference pipeline: + +* Input topic: frames from 508-media-connector +* Output topic: ALERT events from 507-ai-inference +* Dataflow routing: ALERT events forwarded to Event Hub + +#### Dataflow Verification + +Confirm the dataflow resources are active: + +```bash +kubectl get dataflows -n azure-iot-operations +``` + +### Phase 5: Validate End-to-End + +**Estimated time:** ~10 minutes + +#### Test Event Flow + +1. Verify camera frames are being captured: + + ```bash + kubectl logs -n azure-iot-operations -l app=media-connector --tail=20 + ``` + +2. Verify inference is processing frames: + + ```bash + kubectl logs -n azure-iot-operations -l app=ai-inference --tail=20 + ``` + +#### Check Notifications + +Trigger a test event and verify the alert appears in the configured Teams channel. The 045-notification Logic App deduplicates alerts by `camera_id` before posting. + +#### Query Stored Video + +After a capture event: + +```bash +curl -s "https:///api/captures?camera_id=" | jq +``` + +Replace `` and `` with values from your deployment. + +### Troubleshooting + +* **ACR authentication failures** — Run `az acr login --name ` and verify the managed identity has `AcrPull` role on the cluster +* **MQTT topic mismatches** — Check the asset definitions in 111-assets match the topic names expected by 507-ai-inference and 508-media-connector +* **kubectl context** — Ensure `kubectl config current-context` points to your Arc-connected K3s cluster +* **Notification webhook not firing** — Verify `teams_recipient_id` in `terraform.tfvars` is a valid Teams chat or channel thread ID +* **Pods in CrashLoopBackOff** — Check container image names match the ACR repository names; verify image pull secrets are configured +* **No alert events in Event Hub** — Confirm the 130-messaging dataflows are active and the MQTT topics are correct + +### Known Limitations + +* The 507-ai-inference component ships with a placeholder ONNX model (~0.001 MB). Real leak detection requires a trained industrial safety model. +* Container image builds are local-only. CI/CD automation for image builds is a follow-on item. +* The blueprint assumes a single-node K3s cluster. Multi-node deployments require the `full-multi-node-cluster` blueprint as a base. + +### Next Steps + +* **Customize the inference model** — Replace the placeholder ONNX model in 507-ai-inference with a trained leak detection model +* **Add camera sources** — Extend 111-assets definitions to include additional ONVIF/RTSP cameras +* **Scale to multi-node** — Use the [full-multi-node-cluster](../../blueprints/full-multi-node-cluster/) blueprint as a base, then layer leak detection components +* **Explore the Learning Platform** — Visit the [Learning Platform](../../learning/) for hands-on katas and training labs diff --git a/docs/solution-adr-library/leak-detection-e2e-pipeline-architecture.md b/docs/solution-adr-library/leak-detection-e2e-pipeline-architecture.md new file mode 100644 index 00000000..9a2f7cdc --- /dev/null +++ b/docs/solution-adr-library/leak-detection-e2e-pipeline-architecture.md @@ -0,0 +1,485 @@ +--- +title: End-to-End Leak Detection Pipeline Architecture for Edge AI +description: Architecture Decision Record for implementing a visual leak detection pipeline using Azure IoT Operations on the edge. Covers the end-to-end architecture from camera ingestion through on-site AI inference to cloud notification, with analysis of substitutable components including inference models, camera connectors, and notification channels. +author: Edge AI Team +ms.date: 2026-03-09 +ms.topic: architecture +estimated_reading_time: 15 +keywords: + - leak-detection + - edge-ai + - azure-iot-operations + - inference-pipeline + - onnx + - yolov8 + - sse-connector + - media-connector + - onvif + - rtsp + - mqtt-broker + - eventhub + - notification + - logic-app + - oil-and-gas + - energy-utilities + - computer-vision + - architecture-decision-record + - adr +--- + +## Status + +- [X] Draft +- [ ] Proposed +- [ ] Accepted +- [ ] Deprecated + +## Context + +Pipeline operators in oil & gas, water utilities, and industrial facilities require continuous, real-time visibility into infrastructure integrity. +Manual inspections are infrequent, cover limited ground, and miss slow-developing leaks. +A single major leak event can cost $100M+ in remediation, fines, and reputational damage. +Operators need to detect leaks faster, respond before incidents escalate, and demonstrate regulatory compliance — all while working within the constraints of remote sites with intermittent connectivity, limited compute, and harsh physical environments. + +The Edge AI accelerator provides reusable infrastructure components for building edge AI solutions on Azure IoT Operations. +This ADR documents how those components are composed into an end-to-end leak detection pipeline — and where the architecture supports substitution so that Forward Deployment Engineers (FDEs) can adapt the pipeline to customer-specific requirements. + +### Business Drivers + +The following drivers shape the architecture (sourced from BDR-001): + +- **Detect leaks faster**: Reduce mean time to detection by ~70% compared to manual inspection cycles +- **Operate without cloud dependency**: Core detection and alerting must function on-site with no cloud round-trip +- **Support model flexibility**: Operators may bring their own models or require vendor-neutral model hosting +- **Accommodate diverse camera setups**: Deployment sites vary in camera types, protocols, and capabilities +- **Build operator trust**: Every alert must include visual evidence (timestamp, camera ID, bounding box, confidence score) +- **Enable manage-by-exception**: Replace routine site visits with continuous AI-based monitoring and Remote Operations Centre awareness + +### Product Design Constraints + +The PDR-001 defines the accelerator as a **narrow, opinionated inference pipeline** — from camera frame to alert — with explicit extensibility points where customers integrate, replace, or extend capabilities. The accelerator owns the detection path; severity classification, escalation, dispatch, and compliance are customer-owned. + +### Scope + +This ADR addresses the architectural question: + +> **How should an FDE architect a visual leak detection pipeline using Azure IoT Operations on the edge, given that the inference model, camera ingestion method, and notification channel are substitutable?** + +The decision covers five pipeline layers: + +1. **Camera ingestion** — How camera feeds enter the system +2. **On-site inference** — How frames are processed for leak detection +3. **On-site messaging** — How components communicate on the edge +4. **Cloud routing** — How detection events reach cloud services +5. **Notification** — How operators are alerted + +This ADR is scoped to **single-node deployments** — one Kubernetes cluster per site running all pipeline components on a single VM. Multi-site and multi-node deployment topologies require additional triage and are not covered here. + +## Decision + +Implement the leak detection pipeline as a five-layer architecture deployed on a single-node Azure IoT Operations cluster, where each layer is independently substitutable: + +```text +┌────────────────────────────────────────────────────────────────────────┐ +│ EDGE (On-Site) │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ +│ │ IP Camera │ │ Analytics │ │ Camera Simulator │ │ +│ │ (RTSP) │ │ Camera (SSE)│ │ (ONVIF/RTSP) │ │ +│ └──────┬───────┘ └──────┬───────┘ └────────────┬─────────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ +│ │ Media │ │ SSE │ │ ONVIF │ │ +│ │ Connector │ │ Connector │ │ Connector │ │ +│ │ (508) │ │ (509) │ │ (510) │ │ +│ └──────┬───────┘ └──────┬───────┘ └────────────┬─────────────┘ │ +│ │ Snapshots │ Events │ Events │ +│ ▼ ▼ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ AIO MQTT Broker (FC-03) │ │ +│ │ Topics: │ │ +│ │ snapshots/{site}/{camera}/image (QoS 0) │ │ +│ │ events/{site}/{camera}/heartbeat (QoS 0) │ │ +│ │ alerts/{site}/{camera}/leak/dlqc (QoS 1) │ │ +│ │ alerts/{site}/{camera}/leak/basic (QoS 1) │ │ +│ │ edge-ai/+/+/+/inference/onnx/# (QoS 1) │ │ +│ └──────────────────────────┬──────────────────────────────────────┘ │ +│ │ │ +│ ┌───────────────────┼────────────────────┐ │ +│ ▼ │ ▼ │ +│ ┌──────────────┐ │ ┌──────────────────────────┐ │ +│ │ AI Edge │ │ │ Media Capture Service │ │ +│ │ Inference │ │ │ (503) │ │ +│ │ (507) │ │ │ Evidence snapshots │ │ +│ │ ONNX model │ │ │ → ACSA cloud storage │ │ +│ └──────┬───────┘ │ └──────────────────────────┘ │ +│ │ Detection results │ │ +│ ▼ │ │ +│ ┌──────────────────────────┴──────────────────────────────────────┐ │ +│ │ AIO Dataflow Engine │ │ +│ │ EventHub dataflows: edge-ai/+/+/+/inference/onnx/# │ │ +│ └──────────────────────────┬──────────────────────────────────────┘ │ +│ │ │ +└─────────────────────────────┼──────────────────────────────────────────┘ + │ Detection events + ▼ +┌────────────────────────────────────────────────────────────────────────┐ +│ CLOUD (Azure) │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────────┐ │ +│ │ Azure │ │ Logic App │ │ Azure Blob Storage │ │ +│ │ Event Hub │───▶│ (Stateful │ │ (Evidence snapshots) │ │ +│ │ │ │ dedup) │ │ │ │ +│ └──────────────┘ └──────┬───────┘ └──────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────┐ │ +│ │ Microsoft │ │ +│ │ Teams │ │ +│ │ (Alert) │ │ +│ └──────────────┘ │ +│ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ Observability: Grafana · Log Analytics · Azure Monitor │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────────────┘ +``` + +### Reference Implementation + +The `blueprints/full-single-node-cluster` blueprint (applied with `leak-detection.tfvars.example`) implements this architecture using: + +| Layer | Reference Implementation | Component | +|-------------------|--------------------------------------------------------------------------|---------------------------------------------------| +| Camera ingestion | ONVIF Camera Simulator (simulated RTSP) + Media Connector (snapshotting) | onvif-camera-simulator, 503-media-capture-service | +| On-site inference | AI Edge Inference with YOLOv8n ONNX model (server-side) | 507-ai-inference | +| On-site messaging | AIO MQTT Broker with structured topic hierarchy | 110-iot-ops | +| Cloud routing | AIO EventHub Dataflows | 130-messaging | +| Notification | Logic App → Microsoft Teams with Table Storage deduplication | 045-notification | + +The current end-to-end pipeline uses a **simulated RTSP camera** — the ONVIF Camera Simulator — which produces real H.264 RTSP streams from JPEG or MP4 sources. +The Media Connector ingests these RTSP streams, extracts JPEG snapshots at a configurable interval, and publishes them to the MQTT broker for server-side inference by the AI Edge Inference service. +The SSE Connector (509-sse-connector) is also deployed and available as an alternative ingestion path for analytics cameras with onboard inference, but the primary detection flow runs through RTSP snapshotting. + +### Substitutable Components + +The architecture is designed so that each layer can be swapped independently. The MQTT broker is the integration backbone — components are decoupled through well-defined topic contracts. + +## Decision Drivers + +1. **Cloud independence for core detection** (TR-01): The on-site pipeline must detect leaks and produce alerts without any cloud connectivity. Cloud is used for notification routing and analytics, not for detection. +2. **Component modularity** (NFR-07, PDR EXT-01 through EXT-08): Each pipeline stage must be independently replaceable so FDEs can adapt to customer camera types, model preferences, and notification requirements. +3. **Operator trust through visual evidence** (BDR §4): Every detection event must carry a timestamp, camera ID, detection type, confidence score, and snapshot with bounding box overlay. +4. **Latency within seconds** (NFR-01): Detection results must be produced within seconds of snapshot extraction, bounded by model inference time. +5. **Alert deduplication** (BDR Q4): A continuous leak must generate a single actionable alert, not repeated notifications. Alert state management (open / acknowledged / closed) prevents operator fatigue. +6. **Disconnected resilience** (NFR-04): If cloud connectivity is lost, detection continues on-site; alerts queue and deliver when connectivity resumes. + +## Considered Options + +### Layer 1: Camera Ingestion + +#### Option A: Media Connector with RTSP Cameras (Server-Side Inference) + +The AIO Media Connector ingests RTSP streams from commodity IP cameras, extracts JPEG snapshots at a configurable interval (default ~5 seconds), and publishes them to MQTT for server-side inference. + +**Pros:** + +- Works with any RTSP-capable IP camera (widest hardware compatibility) +- Proven integration pattern with AIO MQTT broker +- Snapshot interval is configurable; adaptive intervals possible +- Decoupled from inference — multiple models can consume the same snapshot stream +- Camera simulator available for development and demonstration + +**Cons:** + +- Latency limited by snapshot interval (0.5–5 seconds between frames) +- Snapshots may miss fast events occurring between capture intervals +- Higher network bandwidth for JPEG image payloads over MQTT +- Server-side compute bears full inference load + +**Best fit:** Sites with commodity RTSP cameras and no onboard analytics capability. Most common deployment scenario. + +#### Option B: SSE Connector with Analytics Cameras (Camera-Side Inference) + +The SSE Connector maintains a persistent HTTP connection to analytics cameras that perform onboard inference and emit detection events via Server-Sent Events. The connector maps SSE event types (HEARTBEAT, ALERT, ALERT_DLQC) to MQTT topics. + +**Pros:** + +- Near-real-time event delivery (sub-second latency) +- Camera performs inference — reduces edge compute requirements +- Structured event types (HEARTBEAT, ALERT, ALERT_DLQC) with well-defined schemas +- Lower network bandwidth (events, not images) +- Automatic reconnection with built-in SSE retry + +**Cons:** + +- Requires analytics cameras with onboard inference and SSE endpoint (limited hardware selection) +- Camera vendor controls the detection model and confidence thresholds +- Less flexibility to run custom models server-side +- SSE is unidirectional (server to client only) + +**Best fit:** Sites with analytics cameras that have onboard leak detection models and SSE capability. + +#### Option C: ONVIF Connector with PTZ Cameras + +The ONVIF Connector discovers ONVIF-compliant cameras, subscribes to camera events (motion, tampering), controls PTZ operations, and retrieves media stream URIs. Events are published to MQTT. + +**Pros:** + +- Standardised protocol (ONVIF Profile S/T) reduces vendor lock-in +- Device discovery and capability introspection +- PTZ control enables dynamic camera positioning in response to detected events +- Event subscription for motion detection and alarms + +**Cons:** + +- ONVIF event types (motion, tampering) are generic — not leak-specific +- Still requires server-side inference for leak detection +- More complex integration (SOAP-based protocol) +- Not all cameras support the required ONVIF profiles + +**Best fit:** Sites with ONVIF-compliant pan-tilt-zoom cameras where dynamic repositioning adds value to the detection workflow. + +#### Selected Approach: Option A (RTSP + Media Connector) as Primary Detection Path + +The leak detection scenario uses a **simulated RTSP camera** (ONVIF Camera Simulator) with the **Media Connector for snapshotting** as the primary detection path. +The Media Connector extracts JPEG snapshots from the RTSP stream and publishes them to MQTT, where the AI Edge Inference service performs server-side leak detection. +The SSE Connector is deployed alongside as an alternative ingestion path for analytics cameras with onboard detection, but the current end-to-end pipeline exercises the RTSP → snapshot → server-side inference flow. + +FDEs should select the ingestion path based on customer camera capabilities: + +- **Commodity RTSP cameras** (most common): Use Option A for detection and evidence capture — this is the path the reference scenario demonstrates +- **Analytics cameras with SSE**: Use Option B for detection events, Option A for post-event evidence capture +- **ONVIF cameras**: Use Option C for discovery and PTZ, combined with Option A for frame extraction + +### Layer 2: On-Site Inference + +#### Option A: ONNX Runtime with YOLOv8 (Reference Implementation) + +AI Edge Inference service subscribes to MQTT snapshot topics, runs frames through a YOLOv8n ONNX model, and publishes detection results (bounding box, confidence, detection type) back to MQTT. + +**Pros:** + +- ONNX is vendor-neutral and runs on CPU, GPU, or NPU via execution providers +- YOLOv8n is optimised for edge deployment (small model size, fast inference) +- Well-defined model interface contract (input: JPEG image → output: detection JSON) +- Model swap via container redeployment or PVC-based model loading +- Sample water leak detection model provided for demonstration + +**Cons:** + +- General-purpose object detection; not optimised for specific leak types without fine-tuning +- CPU-only inference on standard edge hardware (no GPU acceleration in reference VM) +- Single-model architecture; multi-model requires additional inference instances + +**Best fit:** Most deployments. YOLOv8n provides a strong baseline; customers fine-tune or replace with domain-specific models. + +#### Option B: Analytics Camera Onboard Inference + +Analytics cameras with embedded AI chipsets perform inference on-device and emit structured detection events directly. No server-side inference is required. + +**Pros:** + +- Zero server-side compute for inference +- Camera vendor optimises model for their hardware (dedicated NPU/VPU) +- Lower latency (no frame transfer to server) +- Scales naturally with camera count (each camera is self-contained) + +**Cons:** + +- Camera vendor controls the model; limited flexibility to run custom models +- Detection quality depends on vendor's training data and model updates +- Vendor lock-in for inference capability +- Difficult to run multi-model pipelines or ensemble approaches + +**Best fit:** Sites where the camera vendor provides a validated leak detection model and the operator accepts vendor-managed inference. + +#### Option C: Multi-Model Pipeline (Parallel Inference) + +Multiple inference instances subscribe to the same MQTT snapshot stream, each running a different model (e.g., liquid leak detection + gas plume detection + flame detection). + +**Pros:** + +- Detects multiple hazard types simultaneously +- Models can be developed and updated independently +- Confidence scoring across models enables multi-signal correlation +- Supports the BDR target of up to 85% false positive reduction through correlation + +**Cons:** + +- Linear increase in compute requirements per model +- Results aggregation logic required (customer-owned) +- More complex deployment and monitoring +- Resource contention on constrained edge hardware + +**Best fit:** Sites with sufficient compute capacity and multi-hazard detection requirements. Extends Option A with additional model instances. + +#### Selected Approach: Option A (ONNX/YOLOv8) as Reference + +The reference scenario provides a YOLOv8n ONNX model as the default implementation. The model interface contract — input image format, output schema (detection flag, type, bounding box, confidence), and ONNX packaging — enables customers to substitute their own models (EXT-01). FDEs deploy the sample model for initial demonstration and guide customers through model replacement. + +### Layer 3: On-Site Messaging + +The AIO MQTT Broker is the only considered option. It is the messaging backbone of Azure IoT Operations, operates entirely on-site with no cloud dependency, and provides the decoupling point between all pipeline components. Topic structure follows the UNS (Unified Namespace) pattern established in the accelerator. + +### Layer 4: Cloud Routing + +#### Option A: EventHub Dataflows (Reference Implementation) + +AIO Dataflow Engine routes detection results from MQTT topics to Azure Event Hub for cloud-side processing. EventHub provides high-throughput event ingestion, consumer group isolation, and integration with downstream Azure services. + +**Pros:** + +- High throughput and built-in partitioning +- Consumer groups enable multiple downstream subscribers without contention +- Native integration with Logic Apps, Azure Functions, Stream Analytics, and Fabric RTI +- Retention period configurable for replay and reprocessing + +**Cons:** + +- Requires Event Hub namespace provisioning and management +- Cost scales with throughput units and retention +- Not bidirectional (cloud-to-edge commands require a separate channel) + +#### Option B: EventGrid Dataflows + +AIO Dataflow Engine routes detection results to Azure Event Grid for event-driven cloud processing. + +**Pros:** + +- Native event routing with filtering and fan-out +- Pay-per-event pricing for low-volume workloads +- Built-in dead-lettering and retry + +**Cons:** + +- Lower throughput ceiling than EventHub for high-volume streams +- Less suited for ordered event processing +- Filtering and routing logic adds complexity + +#### Selected Approach: Option A (EventHub Dataflows) + +EventHub Dataflows are the reference implementation. The reference scenario explicitly disables EventGrid dataflows. FDEs may enable EventGrid for customers who need event-driven fan-out to multiple Azure services or prefer pay-per-event pricing. + +### Layer 5: Notification + +#### Option A: Logic App → Microsoft Teams (Reference Implementation) + +A Logic App triggered by EventHub receives detection events, checks alert state in Azure Table Storage to deduplicate ongoing leaks, and delivers alerts to a Microsoft Teams channel with timestamp, camera ID, detection type, confidence score, and snapshot image. + +**Pros:** + +- Low-code integration with Teams (familiar to operators) +- Stateful deduplication prevents alert fatigue from continuous leaks +- "Close leak" action re-arms alerting per camera +- Evidence snapshots persisted to Azure Blob Storage +- No custom code required for the notification path + +**Cons:** + +- Teams dependency (not suitable for organisations without Microsoft 365) +- Logic App execution latency adds seconds to notification delivery +- Alert payload limited by Teams message card format +- Logic App pricing based on connector executions + +**Best fit:** Organisations using Microsoft Teams as their collaboration platform. + +#### Option B: Azure Functions → Email / SMS + +An Azure Function triggered by EventHub processes detection events and delivers notifications via SendGrid (email), Twilio (SMS), or other programmable communication APIs. + +**Pros:** + +- Flexible delivery targets (email, SMS, push notification, webhook) +- Full programmatic control over alert formatting and routing +- Lower per-execution cost than Logic App for high volumes + +**Cons:** + +- Requires custom code development and maintenance +- Third-party service dependencies (SendGrid, Twilio) +- Alert deduplication must be implemented in code +- No visual low-code designer + +**Best fit:** Organisations needing multi-channel notification or not using Microsoft Teams. + +#### Option C: Direct SCADA / Process Control Integration + +Detection events routed from EventHub (or directly from MQTT via edge gateway) into existing process control systems (SCADA, DCS, historian). + +**Pros:** + +- Integrates into the operator's existing operational workflow +- No new notification tool for operators to learn +- Enables automated response (e.g., valve closure, pump shutdown) + +**Cons:** + +- Requires site-specific SCADA integration (OPC UA, Modbus, proprietary APIs) +- Integration complexity varies significantly by customer +- Security boundaries between IT and OT networks complicate deployment +- Not provided by the accelerator; customer-owned integration + +**Best fit:** Mature operations with existing SCADA infrastructure and defined automated response procedures. + +#### Selected Approach: Option A (Logic App → Teams) as Reference + +The reference scenario provides Teams notification with stateful deduplication. FDEs guide customers to extend or replace the notification target (EXT-02) based on their operational tools and collaboration platform. + +## Decision Conclusion + +The leak detection pipeline architecture uses a **layered, MQTT-brokered design** where each layer is decoupled through topic contracts and independently substitutable. The reference implementation is realized as a *scenario* on top of `blueprints/full-single-node-cluster` (using `leak-detection.tfvars.example`) and provides an opinionated starting point: + +| Layer | Reference Choice | Substitution Guidance | +|------------------|--------------------------------------------------------|-------------------------------------------------------------------------------------------| +| Camera ingestion | RTSP Camera Simulator + Media Connector (snapshotting) | Swap to SSE Connector (analytics cameras) or ONVIF Connector based on camera capabilities | +| Inference | YOLOv8n ONNX model via AI Edge Inference | Replace ONNX model file; conform to model interface contract (EXT-01) | +| Messaging | AIO MQTT Broker | Not substitutable — foundational to Azure IoT Operations | +| Cloud routing | EventHub Dataflows | Enable EventGrid Dataflows for event-driven fan-out scenarios | +| Notification | Logic App → Teams (stateful dedup) | Replace with Azure Functions, SCADA integration, or custom webhook (EXT-02) | + +### Key Architectural Principles + +1. **MQTT as the integration backbone**: All on-site components communicate through the AIO MQTT Broker. This decoupling enables independent deployment, scaling, and replacement of pipeline stages. +2. **Cloud-independent detection**: The on-site pipeline (camera → MQTT → inference → MQTT) operates without cloud connectivity. Cloud services handle notification routing and analytics — not detection. +3. **Model interface contract over model lock-in**: The inference service defines an input/output contract (JPEG in, detection JSON out). Any ONNX model conforming to this contract can be deployed without changing the pipeline. +4. **Alert deduplication at the notification layer**: Stateful deduplication in the Logic App (or customer equivalent) ensures one alert per leak event, with explicit close/re-arm actions. This directly addresses the operator trust condition of minimal false alarms. +5. **Evidence capture alongside detection**: Media Capture Service persists snapshot evidence to ACSA-backed cloud storage, providing visual proof independent of the alert delivery mechanism. + +## Consequences + +### Positive + +- **FDEs can adapt to customer environments** without rearchitecting the pipeline — swap camera connectors, replace inference models, or redirect notifications independently +- **Detection operates without cloud** — sites with intermittent connectivity maintain continuous monitoring +- **Sample model accelerates time to demo** — ≤ 2 weeks from engagement start to working demonstration (BDR target) +- **Visual evidence in every alert** builds operator trust — timestamp, camera, bounding box, confidence, and snapshot image +- **Stateful deduplication** prevents alert fatigue from continuous leaks +- **Observability stack** (Grafana, Log Analytics, Azure Monitor) provides system health visibility from day one + +### Negative + +- **Sample model is not production-grade** — customers must bring their own trained model for production deployment; model training and lifecycle management are out of scope +- **Teams notification is a starting point** — operators using SCADA, email, or SMS must implement their own notification integration (EXT-02) +- **Single-node cluster limits** — the reference implementation targets a single VM; multi-camera deployments exceeding hardware capacity require scaling guidance; multi-site deployment topology requires further triage +- **Severity classification is customer-owned** — the accelerator produces a confidence score but does not map it to green/yellow/red thresholds (EXT-03) +- **No real-time video streaming** — the pipeline processes snapshots, not live video; real-time streaming for ROC verification is a desirable capability not included in the initial delivery + +### Neutral + +- **Multi-model pipelines** are supported architecturally (multiple inference instances subscribing to the same MQTT topics) but not implemented in the reference scenario +- **Edge-local event storage** is an open question (PDR OQ-04) — currently detection events are persisted only when they reach cloud; fully disconnected audit review requires additional implementation + +## References + +- [full-single-node-cluster blueprint (host of the leak detection scenario)](../../blueprints/full-single-node-cluster/README.md) + +## Related ADRs + +- [SSE Connector for Real-Time Event Streaming](./sse-connector-real-time-event-streaming.md) +- [Edge Video Streaming and Image Capture](./edge-video-streaming-and-image-capture.md) +- [ONVIF Connector for IP Camera Integration](./onvif-connector-camera-integration.md) +- [AI Edge Inference Dual Backend Architecture](./ai-edge-inference-dual-backend-architecture.md) +- [UNS Asset Metadata Topic Structure](./uns-asset-metadata-topic-structure.md) diff --git a/docs/solution-adr-library/onvif-connector-camera-integration.md b/docs/solution-adr-library/onvif-connector-camera-integration.md index ecb94703..e3e3a036 100644 --- a/docs/solution-adr-library/onvif-connector-camera-integration.md +++ b/docs/solution-adr-library/onvif-connector-camera-integration.md @@ -416,29 +416,43 @@ terraform apply -var-file="onvif-connector-assets.tfvars" Configuration example in `onvif-connector-assets.tfvars.example`: ```hcl -onvif_connector_devices = [ +namespaced_assets = [ { - name = "warehouse-camera-01" - endpoint = "https://192.168.1.100/onvif/device_service" - # username = "admin" - # password = "secure-password" - - assets = [ + name = "warehouse-ptz-control" + display_name = "Warehouse PTZ Camera System" + enabled = true + device_ref = { + device_name = "warehouse-camera-01" + endpoint_name = "warehouse-camera-endpoint" + } + description = "ONVIF PTZ camera for warehouse monitoring with motion detection" + + // PTZ control via management_groups + management_groups = [ { - name = "warehouse-ptz-control" - - commands = [ + name = "ptz-controls" + actions = [ { - name = "pan_right" - topic = "cameras/warehouse/ptz/pan" - payload = jsonencode({direction = "right", speed = 0.5}) + name = "pan_right" + action_type = "Call" + target_uri = "http://onvif.org/onvif/ver20/ptz/wsdl/ContinuousMove" + action_configuration = jsonencode({ + direction = "right" + speed = 0.5 + }) } ] + } + ] + // Camera events via event_groups + event_groups = [ + { + name = "camera-events" events = [ { - name = "MOTION_DETECTED" - event_notifier = "motion" + name = "motion-detected" + data_source = "motion" destinations = [ { target = "Mqtt" @@ -451,6 +465,8 @@ onvif_connector_devices = [ ] } ] + + datasets = [] } ] ``` @@ -466,7 +482,7 @@ The ONVIF Connector leverages the Akri connector module: ### Configuration Variables -Terraform variables in `src/100-edge/110-iot-ops/terraform/variables.akri.tf`: +Terraform variables in `src/100-edge/111-assets/terraform/variables.tf`: ```terraform variable "should_enable_akri_onvif_connector" { @@ -475,33 +491,45 @@ variable "should_enable_akri_onvif_connector" { description = "Deploy Akri ONVIF Connector template" } -variable "onvif_connector_devices" { +variable "namespaced_assets" { type = list(object({ - name = string - description = optional(string) - endpoint = string - username = optional(string) - password = optional(string) - assets = list(object({ - name = string - description = optional(string) - commands = optional(list(object({ - name = string - topic = string - payload = string - }))) - events = optional(list(object({ - name = string - event_notifier = string - destinations = list(object({ - target = string - configuration = map(string) - })) - }))) + name = string + display_name = optional(string) + device_ref = optional(object({ + device_name = string + endpoint_name = string })) + enabled = optional(bool, true) + description = optional(string) + attributes = optional(map(string), {}) + datasets = optional(list(object({...})), []) + event_groups = optional(list(object({ + name = string + events = list(object({ + name = string + data_source = string + destinations = optional(list(object({ + target = string + configuration = object({ + topic = optional(string) + retain = optional(string) + qos = optional(string) + }) + })), []) + })) + })), []) + management_groups = optional(list(object({ + name = string + actions = list(object({ + name = string + action_type = string + target_uri = string + action_configuration = optional(string) + })) + })), []) })) default = [] - description = "ONVIF camera devices and assets" + description = "List of namespaced assets with enhanced configuration support" } ``` diff --git a/src/000-cloud/010-security-identity/terraform/README.md b/src/000-cloud/010-security-identity/terraform/README.md index 280756a3..989c8d4a 100644 --- a/src/000-cloud/010-security-identity/terraform/README.md +++ b/src/000-cloud/010-security-identity/terraform/README.md @@ -46,6 +46,7 @@ access to resources. | key\_vault\_name | The name of the Key Vault to store secrets. If not provided, defaults to 'kv-{resource\_prefix}-{environment}-{instance}' | `string` | `null` | no | | key\_vault\_private\_endpoint\_subnet\_id | The ID of the subnet where the Key Vault private endpoint will be created. Required if should\_create\_key\_vault\_private\_endpoint is true. | `string` | `null` | no | | key\_vault\_virtual\_network\_id | The ID of the virtual network to link to the Key Vault private DNS zone. Required if should\_create\_key\_vault\_private\_endpoint is true. | `string` | `null` | no | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled | `string` | `null` | no | | onboard\_identity\_type | Identity type to use for onboarding the cluster to Azure Arc. Allowed values: - id - sp - skip | `string` | `"id"` | no | | should\_create\_aio\_identity | Whether to create a user-assigned identity for Azure IoT Operations. | `bool` | `true` | no | | should\_create\_aks\_identity | Whether to create a user-assigned identity for AKS cluster when using custom private DNS zones. | `bool` | `false` | no | @@ -54,6 +55,7 @@ access to resources. | should\_create\_key\_vault\_private\_endpoint | Whether to create a private endpoint for the Key Vault. | `bool` | `false` | no | | should\_create\_ml\_workload\_identity | Whether to create a user-assigned identity for AzureML workloads. | `bool` | `false` | no | | should\_create\_secret\_sync\_identity | Whether to create a user-assigned identity for Secret Sync Extension. | `bool` | `true` | no | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for Key Vault | `bool` | `false` | no | | should\_enable\_public\_network\_access | Whether to enable public network access for the Key Vault | `bool` | `true` | no | | should\_enable\_purge\_protection | Whether to enable purge protection for the Key Vault. Enable for production to prevent accidental or malicious secret deletion | `bool` | `false` | no | | should\_use\_current\_user\_key\_vault\_admin | Whether to give the current user the Key Vault Secrets Officer Role. | `bool` | `true` | no | diff --git a/src/000-cloud/010-security-identity/terraform/main.tf b/src/000-cloud/010-security-identity/terraform/main.tf index d3533669..21be803a 100644 --- a/src/000-cloud/010-security-identity/terraform/main.tf +++ b/src/000-cloud/010-security-identity/terraform/main.tf @@ -31,6 +31,8 @@ module "key_vault" { should_enable_public_network_access = var.should_enable_public_network_access should_enable_purge_protection = var.should_enable_purge_protection should_add_key_vault_role_assignment = local.should_add_key_vault_role_assignment + log_analytics_workspace_id = var.log_analytics_workspace_id + should_enable_diagnostic_settings = var.should_enable_diagnostic_settings } module "identity" { diff --git a/src/000-cloud/010-security-identity/terraform/modules/key-vault/README.md b/src/000-cloud/010-security-identity/terraform/modules/key-vault/README.md index 86a9fb2e..f50120e7 100644 --- a/src/000-cloud/010-security-identity/terraform/modules/key-vault/README.md +++ b/src/000-cloud/010-security-identity/terraform/modules/key-vault/README.md @@ -21,6 +21,7 @@ Create or use and existing a Key Vault for Secret Sync Extension | Name | Type | |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| | [azurerm_key_vault.new](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/key_vault) | resource | +| [azurerm_monitor_diagnostic_setting.key_vault](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_diagnostic_setting) | resource | | [azurerm_private_dns_a_record.a_record](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_a_record) | resource | | [azurerm_private_dns_zone.dns_zone](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_zone) | resource | | [azurerm_private_dns_zone_virtual_network_link.vnet_link](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_zone_virtual_network_link) | resource | @@ -37,11 +38,13 @@ Create or use and existing a Key Vault for Secret Sync Extension | key\_vault\_admin\_principal\_id | The Principal ID or Object ID for the admin that will have access to update secrets on the Key Vault. | `string` | n/a | yes | | key\_vault\_name | The name of the Key Vault to store secrets. If not provided, defaults to 'kv-{resource\_prefix}-{environment}-{instance}' | `string` | n/a | yes | | location | Azure region where all resources will be deployed | `string` | n/a | yes | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings | `string` | n/a | yes | | private\_endpoint\_subnet\_id | The ID of the subnet where the private endpoint will be created | `string` | n/a | yes | | resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string })``` | n/a | yes | | resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | | should\_add\_key\_vault\_role\_assignment | Whether to add role assignment to the Key Vault | `bool` | n/a | yes | | should\_create\_private\_endpoint | Whether to create a private endpoint for the Key Vault | `bool` | n/a | yes | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for the Key Vault | `bool` | n/a | yes | | should\_enable\_public\_network\_access | Whether to enable public network access for the Key Vault | `bool` | n/a | yes | | should\_enable\_purge\_protection | Whether to enable purge protection for the Key Vault | `bool` | n/a | yes | | virtual\_network\_id | The ID of the virtual network to link to the private DNS zone | `string` | n/a | yes | diff --git a/src/000-cloud/010-security-identity/terraform/modules/key-vault/main.tf b/src/000-cloud/010-security-identity/terraform/modules/key-vault/main.tf index fa015402..6ad8e684 100644 --- a/src/000-cloud/010-security-identity/terraform/modules/key-vault/main.tf +++ b/src/000-cloud/010-security-identity/terraform/modules/key-vault/main.tf @@ -46,6 +46,26 @@ resource "terraform_data" "defer" { depends_on = [azurerm_role_assignment.user_key_vault_secrets_officer] } +/* + * Diagnostic Settings + */ + +resource "azurerm_monitor_diagnostic_setting" "key_vault" { + count = var.should_enable_diagnostic_settings ? 1 : 0 + + name = "diag-${azurerm_key_vault.new.name}" + target_resource_id = azurerm_key_vault.new.id + log_analytics_workspace_id = var.log_analytics_workspace_id + + enabled_log { + category = "AuditEvent" + } + + enabled_metric { + category = "AllMetrics" + } +} + /* * Private Endpoint */ diff --git a/src/000-cloud/010-security-identity/terraform/modules/key-vault/variables.tf b/src/000-cloud/010-security-identity/terraform/modules/key-vault/variables.tf index 1c31d9e3..54831f75 100644 --- a/src/000-cloud/010-security-identity/terraform/modules/key-vault/variables.tf +++ b/src/000-cloud/010-security-identity/terraform/modules/key-vault/variables.tf @@ -37,3 +37,13 @@ variable "should_enable_purge_protection" { type = bool description = "Whether to enable purge protection for the Key Vault" } + +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings" +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for the Key Vault" +} diff --git a/src/000-cloud/010-security-identity/terraform/variables.tf b/src/000-cloud/010-security-identity/terraform/variables.tf index 2f936b8f..5ab975b6 100644 --- a/src/000-cloud/010-security-identity/terraform/variables.tf +++ b/src/000-cloud/010-security-identity/terraform/variables.tf @@ -38,6 +38,22 @@ variable "should_enable_purge_protection" { default = false } +/* + * Key Vault Diagnostic Settings - Optional + */ + +variable "log_analytics_workspace_id" { + description = "The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled" + type = string + default = null +} + +variable "should_enable_diagnostic_settings" { + description = "Whether to enable diagnostic settings for Key Vault" + type = bool + default = false +} + /* * Key Vault Private Endpoint - Optional */ diff --git a/src/000-cloud/040-messaging/terraform/README.md b/src/000-cloud/040-messaging/terraform/README.md index 93761aaa..d5fdc848 100644 --- a/src/000-cloud/040-messaging/terraform/README.md +++ b/src/000-cloud/040-messaging/terraform/README.md @@ -54,9 +54,11 @@ Azure IoT Operations Dataflow to send and receive data from edge to cloud. | function\_node\_version | The version of Node.js to use | `string` | `"20"` | no | | function\_python\_version | The version of Python to use. | `string` | `null` | no | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled | `string` | `null` | no | | should\_create\_azure\_functions | Whether to create the Azure Functions resources including App Service Plan | `bool` | `false` | no | | should\_create\_eventgrid | Whether to create the Event Grid resources. | `bool` | `true` | no | | should\_create\_eventhub | Whether to create the Event Hubs resources. | `bool` | `true` | no | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for Event Grid and Event Hubs | `bool` | `false` | no | | tags | Tags to apply to all resources | `map(string)` | `{}` | no | ## Outputs @@ -68,5 +70,6 @@ Azure IoT Operations Dataflow to send and receive data from edge to cloud. | eventhub\_namespace | Event Hub namespace configuration | | eventhubs | Event Hub(s) configuration | | function\_app | Function App configuration and details. | +| function\_identity | User Assigned Managed Identity used by the Function App. | | function\_storage\_account | Storage Account used by the Function App. | diff --git a/src/000-cloud/040-messaging/terraform/main.tf b/src/000-cloud/040-messaging/terraform/main.tf index 018662be..ed2bc020 100644 --- a/src/000-cloud/040-messaging/terraform/main.tf +++ b/src/000-cloud/040-messaging/terraform/main.tf @@ -10,14 +10,16 @@ module "eventhub" { source = "./modules/eventhub" - environment = var.environment - resource_prefix = var.resource_prefix - instance = var.instance - resource_group_name = var.resource_group.name - location = var.resource_group.location - aio_uami_principal_id = var.aio_identity.principal_id - capacity = var.eventhub_capacity - eventhubs = var.eventhubs + environment = var.environment + resource_prefix = var.resource_prefix + instance = var.instance + resource_group_name = var.resource_group.name + location = var.resource_group.location + aio_uami_principal_id = var.aio_identity.principal_id + capacity = var.eventhub_capacity + eventhubs = var.eventhubs + log_analytics_workspace_id = var.log_analytics_workspace_id + should_enable_diagnostic_settings = var.should_enable_diagnostic_settings } module "eventgrid" { @@ -36,6 +38,8 @@ module "eventgrid" { capacity = var.eventgrid_capacity eventgrid_max_client_sessions_per_auth_name = var.eventgrid_max_client_sessions topic_name = var.eventgrid_topic_name + log_analytics_workspace_id = var.log_analytics_workspace_id + should_enable_diagnostic_settings = var.should_enable_diagnostic_settings } module "app_service_plan" { diff --git a/src/000-cloud/040-messaging/terraform/modules/azure-functions/README.md b/src/000-cloud/040-messaging/terraform/modules/azure-functions/README.md index 43ee0283..eca546e3 100644 --- a/src/000-cloud/040-messaging/terraform/modules/azure-functions/README.md +++ b/src/000-cloud/040-messaging/terraform/modules/azure-functions/README.md @@ -47,8 +47,9 @@ This module creates the Function App with necessary configuration for messaging ## Outputs -| Name | Description | -|------------------|-----------------------------------------------| -| function\_app | The Function App resource object. | -| storage\_account | The Storage Account used by the Function App. | +| Name | Description | +|--------------------|--------------------------------------------------------------| +| function\_app | The Function App resource object. | +| function\_identity | The User Assigned Managed Identity used by the Function App. | +| storage\_account | The Storage Account used by the Function App. | diff --git a/src/000-cloud/040-messaging/terraform/modules/azure-functions/main.tf b/src/000-cloud/040-messaging/terraform/modules/azure-functions/main.tf index 3bf4b877..900ca6de 100644 --- a/src/000-cloud/040-messaging/terraform/modules/azure-functions/main.tf +++ b/src/000-cloud/040-messaging/terraform/modules/azure-functions/main.tf @@ -77,8 +77,7 @@ resource "azurerm_linux_function_app" "function_app" { app_settings = merge( var.app_settings, { - AZURE_CLIENT_ID = azurerm_user_assigned_identity.function_identity.client_id - EventHubConnection__clientId = azurerm_user_assigned_identity.function_identity.client_id + AZURE_CLIENT_ID = azurerm_user_assigned_identity.function_identity.client_id } ) @@ -124,8 +123,7 @@ resource "azurerm_windows_function_app" "function_app" { app_settings = merge( var.app_settings, { - AZURE_CLIENT_ID = azurerm_user_assigned_identity.function_identity.client_id - EventHubConnection__clientId = azurerm_user_assigned_identity.function_identity.client_id + AZURE_CLIENT_ID = azurerm_user_assigned_identity.function_identity.client_id } ) diff --git a/src/000-cloud/040-messaging/terraform/modules/azure-functions/outputs.tf b/src/000-cloud/040-messaging/terraform/modules/azure-functions/outputs.tf index 70ee6b15..007f740f 100644 --- a/src/000-cloud/040-messaging/terraform/modules/azure-functions/outputs.tf +++ b/src/000-cloud/040-messaging/terraform/modules/azure-functions/outputs.tf @@ -2,6 +2,15 @@ * Function App Outputs */ +output "function_identity" { + description = "The User Assigned Managed Identity used by the Function App." + value = { + id = azurerm_user_assigned_identity.function_identity.id + principal_id = azurerm_user_assigned_identity.function_identity.principal_id + client_id = azurerm_user_assigned_identity.function_identity.client_id + } +} + output "function_app" { description = "The Function App resource object." value = { diff --git a/src/000-cloud/040-messaging/terraform/modules/eventgrid/README.md b/src/000-cloud/040-messaging/terraform/modules/eventgrid/README.md index 3e1b7725..39c8d6d4 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventgrid/README.md +++ b/src/000-cloud/040-messaging/terraform/modules/eventgrid/README.md @@ -18,11 +18,12 @@ Create a new Event Grid namespace and namespace topic and assign the AIO instanc ## Resources -| Name | Type | -|----------------------------------------------------------------------------------------------------------------------------------------------|----------| -| [azapi_resource.eventgrid_namespace_topic_space](https://registry.terraform.io/providers/Azure/azapi/latest/docs/resources/resource) | resource | -| [azurerm_eventgrid_namespace.aio_eg_ns](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/eventgrid_namespace) | resource | -| [azurerm_role_assignment.data_sender](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| Name | Type | +|------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| +| [azapi_resource.eventgrid_namespace_topic_space](https://registry.terraform.io/providers/Azure/azapi/latest/docs/resources/resource) | resource | +| [azurerm_eventgrid_namespace.aio_eg_ns](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/eventgrid_namespace) | resource | +| [azurerm_monitor_diagnostic_setting.eventgrid](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_diagnostic_setting) | resource | +| [azurerm_role_assignment.data_sender](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | ## Inputs @@ -32,8 +33,10 @@ Create a new Event Grid namespace and namespace topic and assign the AIO instanc | environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | n/a | yes | | location | Azure region where all resources will be deployed | `string` | n/a | yes | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings | `string` | n/a | yes | | resource\_group\_name | Name of the resource group | `string` | n/a | yes | | resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for the Event Grid namespace | `bool` | n/a | yes | | capacity | Specifies the Capacity / Throughput Units for a Standard SKU namespace. | `number` | `1` | no | | eventgrid\_max\_client\_sessions\_per\_auth\_name | Specifies the maximum number of client sessions per authentication name. Valid values are from 3 to 100. This parameter should be greater than the number of dataflows | `number` | `8` | no | | topic\_name | Topic template name to create in the Event Grid namespace | `string` | `"default"` | no | diff --git a/src/000-cloud/040-messaging/terraform/modules/eventgrid/main.tf b/src/000-cloud/040-messaging/terraform/modules/eventgrid/main.tf index 4b6d5c8c..34e21526 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventgrid/main.tf +++ b/src/000-cloud/040-messaging/terraform/modules/eventgrid/main.tf @@ -29,6 +29,26 @@ resource "azapi_resource" "eventgrid_namespace_topic_space" { } } +/* + * Diagnostic Settings + */ + +resource "azurerm_monitor_diagnostic_setting" "eventgrid" { + count = var.should_enable_diagnostic_settings ? 1 : 0 + + name = "diag-${azurerm_eventgrid_namespace.aio_eg_ns.name}" + target_resource_id = azurerm_eventgrid_namespace.aio_eg_ns.id + log_analytics_workspace_id = var.log_analytics_workspace_id + + enabled_log { + category_group = "allLogs" + } + + enabled_metric { + category = "AllMetrics" + } +} + resource "azurerm_role_assignment" "data_sender" { scope = azapi_resource.eventgrid_namespace_topic_space.id role_definition_name = "EventGrid TopicSpaces Publisher" diff --git a/src/000-cloud/040-messaging/terraform/modules/eventgrid/variables.tf b/src/000-cloud/040-messaging/terraform/modules/eventgrid/variables.tf index 9409a367..3a2b5f86 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventgrid/variables.tf +++ b/src/000-cloud/040-messaging/terraform/modules/eventgrid/variables.tf @@ -53,3 +53,13 @@ variable "topic_name" { type = string default = "default" } + +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings" +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for the Event Grid namespace" +} diff --git a/src/000-cloud/040-messaging/terraform/modules/eventhub/README.md b/src/000-cloud/040-messaging/terraform/modules/eventhub/README.md index 1220af99..8248982d 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventhub/README.md +++ b/src/000-cloud/040-messaging/terraform/modules/eventhub/README.md @@ -22,20 +22,23 @@ Create a new Event Hub namespace and Event Hub and assign the AIO instance UAMI | [azurerm_eventhub.destination_eh](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/eventhub) | resource | | [azurerm_eventhub_consumer_group.destination_eh_cg](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/eventhub_consumer_group) | resource | | [azurerm_eventhub_namespace.destination_eventhub_namespace](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/eventhub_namespace) | resource | +| [azurerm_monitor_diagnostic_setting.eventhub](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_diagnostic_setting) | resource | | [azurerm_role_assignment.data_sender](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | ## Inputs -| Name | Description | Type | Default | Required | -|--------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|:--------:| -| aio\_uami\_principal\_id | Principal ID of the User Assigned Managed Identity for the Azure IoT Operations instance | `string` | n/a | yes | -| capacity | Specifies the Capacity / Throughput Units for a Standard SKU namespace. | `number` | n/a | yes | -| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | -| eventhubs | Per-Event Hub configuration. Keys are Event Hub names. - **Message retention**: Specifies the number of days to retain events for this Event Hub, from 1 to 7. - **Partition count**: Specifies the number of partitions for the Event Hub. Valid values are from 1 to 32. - **Consumer group user metadata**: A placeholder to store user-defined string data with maximum length 1024. It can be used to store descriptive data, such as list of teams and their contact information, or user-defined configuration settings. | ```map(object({ message_retention = optional(number, 1) partition_count = optional(number, 1) consumer_groups = optional(map(object({ user_metadata = optional(string, null) })), {}) }))``` | n/a | yes | -| instance | Instance identifier for naming resources: 001, 002, etc | `string` | n/a | yes | -| location | Azure region where all resources will be deployed | `string` | n/a | yes | -| resource\_group\_name | Name of the resource group | `string` | n/a | yes | -| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| Name | Description | Type | Default | Required | +|--------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|:--------:| +| aio\_uami\_principal\_id | Principal ID of the User Assigned Managed Identity for the Azure IoT Operations instance | `string` | n/a | yes | +| capacity | Specifies the Capacity / Throughput Units for a Standard SKU namespace. | `number` | n/a | yes | +| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | +| eventhubs | Per-Event Hub configuration. Keys are Event Hub names. - **Message retention**: Specifies the number of days to retain events for this Event Hub, from 1 to 7. - **Partition count**: Specifies the number of partitions for the Event Hub. Valid values are from 1 to 32. - **Consumer group user metadata**: A placeholder to store user-defined string data with maximum length 1024. It can be used to store descriptive data, such as list of teams and their contact information, or user-defined configuration settings. | ```map(object({ message_retention = optional(number, 1) partition_count = optional(number, 1) consumer_groups = optional(map(object({ user_metadata = optional(string, null) })), {}) }))``` | n/a | yes | +| instance | Instance identifier for naming resources: 001, 002, etc | `string` | n/a | yes | +| location | Azure region where all resources will be deployed | `string` | n/a | yes | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings | `string` | n/a | yes | +| resource\_group\_name | Name of the resource group | `string` | n/a | yes | +| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for the Event Hubs namespace | `bool` | n/a | yes | ## Outputs diff --git a/src/000-cloud/040-messaging/terraform/modules/eventhub/main.tf b/src/000-cloud/040-messaging/terraform/modules/eventhub/main.tf index 3936f9e4..0999af1f 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventhub/main.tf +++ b/src/000-cloud/040-messaging/terraform/modules/eventhub/main.tf @@ -45,6 +45,26 @@ resource "azurerm_eventhub_consumer_group" "destination_eh_cg" { depends_on = [azurerm_eventhub.destination_eh] } +/* + * Diagnostic Settings + */ + +resource "azurerm_monitor_diagnostic_setting" "eventhub" { + count = var.should_enable_diagnostic_settings ? 1 : 0 + + name = "diag-${azurerm_eventhub_namespace.destination_eventhub_namespace.name}" + target_resource_id = azurerm_eventhub_namespace.destination_eventhub_namespace.id + log_analytics_workspace_id = var.log_analytics_workspace_id + + enabled_log { + category_group = "allLogs" + } + + enabled_metric { + category = "AllMetrics" + } +} + resource "azurerm_role_assignment" "data_sender" { scope = azurerm_eventhub_namespace.destination_eventhub_namespace.id role_definition_name = "Azure Event Hubs Data Sender" diff --git a/src/000-cloud/040-messaging/terraform/modules/eventhub/variables.tf b/src/000-cloud/040-messaging/terraform/modules/eventhub/variables.tf index ab28c52a..5087c4bc 100644 --- a/src/000-cloud/040-messaging/terraform/modules/eventhub/variables.tf +++ b/src/000-cloud/040-messaging/terraform/modules/eventhub/variables.tf @@ -37,6 +37,16 @@ variable "capacity" { } } +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings" +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for the Event Hubs namespace" +} + variable "eventhubs" { description = <<-EOF Per-Event Hub configuration. Keys are Event Hub names. diff --git a/src/000-cloud/040-messaging/terraform/outputs.tf b/src/000-cloud/040-messaging/terraform/outputs.tf index 021c3bec..63d7d51a 100644 --- a/src/000-cloud/040-messaging/terraform/outputs.tf +++ b/src/000-cloud/040-messaging/terraform/outputs.tf @@ -22,6 +22,11 @@ output "app_service_plan" { value = try(module.app_service_plan[0].app_service_plan, null) } +output "function_identity" { + description = "User Assigned Managed Identity used by the Function App." + value = try(module.azure_functions[0].function_identity, null) +} + output "function_app" { description = "Function App configuration and details." value = try(module.azure_functions[0].function_app, null) diff --git a/src/000-cloud/040-messaging/terraform/variables.tf b/src/000-cloud/040-messaging/terraform/variables.tf index 15ae5048..e0a5dc13 100644 --- a/src/000-cloud/040-messaging/terraform/variables.tf +++ b/src/000-cloud/040-messaging/terraform/variables.tf @@ -115,3 +115,19 @@ variable "tags" { description = "Tags to apply to all resources" default = {} } + +/* + * Diagnostic Settings - Optional + */ + +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled" + default = null +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for Event Grid and Event Hubs" + default = false +} diff --git a/src/000-cloud/045-notification/README.md b/src/000-cloud/045-notification/README.md index 7816fee5..ad6f50d1 100644 --- a/src/000-cloud/045-notification/README.md +++ b/src/000-cloud/045-notification/README.md @@ -87,16 +87,16 @@ Both API connections require manual authorization in the Azure Portal after Terr ### Required Dependencies -| Variable | Type | Description | -|---------------------------------|----------|------------------------------------------------------------------------------------| -| `closure_message_template` | `string` | HTML template for the Teams closure summary message | -| `event_schema` | `any` | JSON schema object for parsing Event Hub event payloads | -| `eventhub_name` | `string` | Name of the Event Hub to subscribe to for events | -| `eventhub_namespace` | `object` | Event Hub namespace with `id` and `name` attributes | -| `notification_message_template` | `string` | HTML template for Teams notification (supports `${close_session_url}` placeholder) | -| `partition_key_field` | `string` | JSON field name from parsed event used as the Table Storage PartitionKey | -| `resource_group` | `object` | Resource group with `name`, `id`, and `location` attributes | -| `teams_recipient_id` | `string` | Teams chat or channel thread ID for posting notifications | +| Variable | Type | Description | +|---------------------------------|----------|--------------------------------------------------------------------------------------------------------------| +| `closure_message_template` | `string` | HTML template for the Teams closure summary message | +| `event_schema` | `any` | JSON schema object for parsing Event Hub event payloads | +| `eventhub_name` | `string` | Name of the Event Hub to subscribe to for events | +| `eventhub_namespace` | `object` | Event Hub namespace with `id` and `name` attributes | +| `notification_message_template` | `string` | HTML template for Teams notification (supports `$${close_session_url}` placeholder, with Terraform escaping) | +| `partition_key_field` | `string` | JSON field name from parsed event used as the Table Storage PartitionKey | +| `resource_group` | `object` | Resource group with `name`, `id`, and `location` attributes | +| `teams_recipient_id` | `string` | Teams chat or channel thread ID for posting notifications | ### Optional Configuration diff --git a/src/000-cloud/045-notification/terraform/README.md b/src/000-cloud/045-notification/terraform/README.md index 9c460b26..6923e523 100644 --- a/src/000-cloud/045-notification/terraform/README.md +++ b/src/000-cloud/045-notification/terraform/README.md @@ -75,7 +75,8 @@ The Teams connection requires user consent after deployment via the Azure Portal | should\_assign\_roles | Whether to create role assignments for the Logic App managed identity | `bool` | `true` | no | | table\_name | Azure Table Storage table name for session state tracking. Otherwise, 'notifications' | `string` | `"notifications"` | no | | tags | Tags to apply to all resources in this module | `map(string)` | `{}` | no | -| teams\_post\_location | Teams posting location type for the notification message. Otherwise, 'Group chat' | `string` | `"Group chat"` | no | +| teams\_group\_id | Microsoft 365 Group ID (Team ID) for posting to a Teams channel. Required when teams\_post\_location is 'Channel' | `string` | `null` | no | +| teams\_post\_location | Teams posting location type for the notification message: 'Channel' for a Teams channel or 'Group chat' for a group chat | `string` | `"Channel"` | no | | update\_entity\_body | Table Storage entity body for updating an existing session record. Otherwise, auto-generated with LastEventAt timestamp and EventCount increment | `any` | `null` | no | ## Outputs diff --git a/src/000-cloud/045-notification/terraform/main.tf b/src/000-cloud/045-notification/terraform/main.tf index 5129a88f..2a7dbf86 100644 --- a/src/000-cloud/045-notification/terraform/main.tf +++ b/src/000-cloud/045-notification/terraform/main.tf @@ -34,6 +34,11 @@ locals { insert_entity_body = coalesce(var.insert_entity_body, local.default_insert_entity_body) update_entity_body = coalesce(var.update_entity_body, local.default_update_entity_body) + + teams_notification_recipient = var.teams_post_location == "Channel" ? jsonencode({ + groupId = var.teams_group_id + channelId = var.teams_recipient_id + }) : jsonencode(var.teams_recipient_id) } // ── Managed API Lookups ────────────────────────────────────── @@ -306,7 +311,7 @@ resource "azurerm_logic_app_action_custom" "for_each_event" { } method = "post" body = { - recipient = var.teams_recipient_id + recipient = jsondecode(local.teams_notification_recipient) messageBody = templatestring(var.notification_message_template, { close_session_url = azapi_resource_action.close_session_callback_url.output.value }) @@ -415,7 +420,7 @@ resource "azurerm_logic_app_action_custom" "post_closure_summary" { } method = "post" body = { - recipient = var.teams_recipient_id + recipient = jsondecode(local.teams_notification_recipient) messageBody = var.closure_message_template } path = "/beta/teams/conversation/message/poster/Flow bot/location/@{encodeURIComponent('${var.teams_post_location}')}" diff --git a/src/000-cloud/045-notification/terraform/variables.deps.tf b/src/000-cloud/045-notification/terraform/variables.deps.tf index 2651d0f5..7230e74a 100644 --- a/src/000-cloud/045-notification/terraform/variables.deps.tf +++ b/src/000-cloud/045-notification/terraform/variables.deps.tf @@ -64,6 +64,12 @@ variable "storage_account" { description = "Storage account for event session state tracking via Table Storage" } +variable "teams_group_id" { + type = string + description = "Microsoft 365 Group ID (Team ID) for posting to a Teams channel. Required when teams_post_location is 'Channel'" + default = null +} + variable "teams_recipient_id" { type = string description = "Teams chat or channel thread ID for posting event notifications" diff --git a/src/000-cloud/045-notification/terraform/variables.tf b/src/000-cloud/045-notification/terraform/variables.tf index 5ee50c75..c222abd8 100644 --- a/src/000-cloud/045-notification/terraform/variables.tf +++ b/src/000-cloud/045-notification/terraform/variables.tf @@ -70,6 +70,11 @@ variable "tags" { variable "teams_post_location" { type = string - description = "Teams posting location type for the notification message. Otherwise, 'Group chat'" - default = "Group chat" + description = "Teams posting location type for the notification message: 'Channel' for a Teams channel or 'Group chat' for a group chat" + default = "Channel" + + validation { + condition = contains(["Channel", "Group chat"], var.teams_post_location) + error_message = "teams_post_location must be 'Channel' or 'Group chat'" + } } diff --git a/src/000-cloud/051-vm-host/terraform/README.md b/src/000-cloud/051-vm-host/terraform/README.md index 6f9ce91a..dee89132 100644 --- a/src/000-cloud/051-vm-host/terraform/README.md +++ b/src/000-cloud/051-vm-host/terraform/README.md @@ -62,7 +62,7 @@ Deploys one or more Linux VMs for Arc-connected K3s cluster | vm\_eviction\_policy | Eviction policy for Spot VMs: Deallocate (VM stopped, disk retained, can restart) or Delete (VM and disks removed, no storage charges). Only used when vm\_priority is Spot | `string` | `"Delete"` | no | | vm\_max\_bid\_price | Maximum price per hour in USD for Spot VM. Set to -1 (default) for no price-based eviction - VM will not be evicted for price reasons. Custom values support up to 5 decimal places (e.g., 0.98765). Only used when vm\_priority is Spot | `number` | `-1` | no | | vm\_priority | VM priority: Regular (production, guaranteed capacity) or Spot (cost-optimized, can be evicted with 30s notice). Spot VMs offer up to 90% cost savings | `string` | `"Regular"` | no | -| vm\_sku\_size | Size of the VM | `string` | `"Standard_D8s_v3"` | no | +| vm\_sku\_size | Size of the VM | `string` | `"Standard_D8s_v6"` | no | | vm\_user\_principals | Map of Azure AD principals for Virtual Machine User Login role (standard access). Keys are descriptive identifiers (e.g., `user@company.com`), values are principal object IDs. | `map(string)` | `{}` | no | | vm\_username | Username for the VM admin account | `string` | `null` | no | diff --git a/src/000-cloud/051-vm-host/terraform/tests/setup/main.tf b/src/000-cloud/051-vm-host/terraform/tests/setup/main.tf index dffc8ad1..0c585e7e 100644 --- a/src/000-cloud/051-vm-host/terraform/tests/setup/main.tf +++ b/src/000-cloud/051-vm-host/terraform/tests/setup/main.tf @@ -49,7 +49,7 @@ output "arc_onboarding_user_assigned_identity" { output "vm_expected_values" { value = { - default_vm_size = "Standard_D8s_v3" + default_vm_size = "Standard_D8s_v6" default_admin_username = local.resource_prefix os_disk_type = "Standard_LRS" vm_publisher = "Canonical" diff --git a/src/000-cloud/051-vm-host/terraform/variables.tf b/src/000-cloud/051-vm-host/terraform/variables.tf index a6cc18eb..fb5ac719 100644 --- a/src/000-cloud/051-vm-host/terraform/variables.tf +++ b/src/000-cloud/051-vm-host/terraform/variables.tf @@ -11,7 +11,7 @@ variable "host_machine_count" { variable "vm_sku_size" { type = string description = "Size of the VM" - default = "Standard_D8s_v3" + default = "Standard_D8s_v6" } variable "vm_username" { diff --git a/src/000-cloud/060-acr/terraform/README.md b/src/000-cloud/060-acr/terraform/README.md index bf9ffc3b..a10d63fe 100644 --- a/src/000-cloud/060-acr/terraform/README.md +++ b/src/000-cloud/060-acr/terraform/README.md @@ -31,10 +31,12 @@ Deploys Azure Container Registry resources | allowed\_public\_ip\_ranges | CIDR ranges permitted to reach the registry public endpoint | `list(string)` | `[]` | no | | default\_outbound\_access\_enabled | Whether to enable default outbound internet access for the ACR subnet | `bool` | `false` | no | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled | `string` | `null` | no | | nat\_gateway | NAT gateway object from the networking component for managed outbound access | ```object({ id = string name = string })``` | `null` | no | | public\_network\_access\_enabled | Whether to enable the registry public endpoint alongside private connectivity | `bool` | `false` | no | | should\_create\_acr\_private\_endpoint | Whether to create a private endpoint for the Azure Container Registry (default false) | `bool` | `false` | no | | should\_enable\_data\_endpoints | Whether to enable dedicated data endpoints for the registry | `bool` | `true` | no | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for ACR | `bool` | `false` | no | | should\_enable\_export\_policy | Whether to allow container image export from the registry. Requires public\_network\_access\_enabled to be true when enabled | `bool` | `false` | no | | should\_enable\_nat\_gateway | Whether to associate the ACR subnet with a NAT gateway for managed outbound egress | `bool` | `false` | no | | sku | SKU name for the resource | `string` | `"Premium"` | no | diff --git a/src/000-cloud/060-acr/terraform/main.tf b/src/000-cloud/060-acr/terraform/main.tf index d42604fa..4ed61911 100644 --- a/src/000-cloud/060-acr/terraform/main.tf +++ b/src/000-cloud/060-acr/terraform/main.tf @@ -47,4 +47,6 @@ module "container_registry" { sku = var.sku should_enable_data_endpoints = var.should_enable_data_endpoints should_enable_export_policy = var.should_enable_export_policy + log_analytics_workspace_id = var.log_analytics_workspace_id + should_enable_diagnostic_settings = var.should_enable_diagnostic_settings } diff --git a/src/000-cloud/060-acr/terraform/modules/container-registry/README.md b/src/000-cloud/060-acr/terraform/modules/container-registry/README.md index 96da2bb0..4902b8d3 100644 --- a/src/000-cloud/060-acr/terraform/modules/container-registry/README.md +++ b/src/000-cloud/060-acr/terraform/modules/container-registry/README.md @@ -20,6 +20,7 @@ Deploys Azure Container Registry with a private endpoint and private DNS zone. | Name | Type | |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| | [azurerm_container_registry.acr](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/container_registry) | resource | +| [azurerm_monitor_diagnostic_setting.acr](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/monitor_diagnostic_setting) | resource | | [azurerm_private_dns_a_record.a_record](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_a_record) | resource | | [azurerm_private_dns_a_record.data_endpoint](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_a_record) | resource | | [azurerm_private_dns_zone.dns_zone](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/private_dns_zone) | resource | @@ -35,11 +36,13 @@ Deploys Azure Container Registry with a private endpoint and private DNS zone. | environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | | instance | Instance identifier for naming resources: 001, 002, etc | `string` | n/a | yes | | location | Azure region where all resources will be deployed | `string` | n/a | yes | +| log\_analytics\_workspace\_id | The ID of the Log Analytics workspace for diagnostic settings | `string` | n/a | yes | | public\_network\_access\_enabled | Whether to enable the registry public endpoint alongside private connectivity | `bool` | n/a | yes | | resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string })``` | n/a | yes | | resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | | should\_create\_acr\_private\_endpoint | Should create a private endpoint for the Azure Container Registry. Default is false. | `bool` | n/a | yes | | should\_enable\_data\_endpoints | Whether to enable dedicated data endpoints for the registry | `bool` | n/a | yes | +| should\_enable\_diagnostic\_settings | Whether to enable diagnostic settings for the container registry | `bool` | n/a | yes | | should\_enable\_export\_policy | Whether to allow container image export from the registry | `bool` | n/a | yes | | sku | SKU name for the resource | `string` | n/a | yes | | snet\_acr | Subnet for the Azure Container Registry private endpoint. | ```object({ id = string })``` | n/a | yes | diff --git a/src/000-cloud/060-acr/terraform/modules/container-registry/main.tf b/src/000-cloud/060-acr/terraform/modules/container-registry/main.tf index ca60850c..f6e51c13 100644 --- a/src/000-cloud/060-acr/terraform/modules/container-registry/main.tf +++ b/src/000-cloud/060-acr/terraform/modules/container-registry/main.tf @@ -38,6 +38,30 @@ resource "azurerm_container_registry" "acr" { } } +/* + * Diagnostic Settings + */ + +resource "azurerm_monitor_diagnostic_setting" "acr" { + count = var.should_enable_diagnostic_settings ? 1 : 0 + + name = "diag-${azurerm_container_registry.acr.name}" + target_resource_id = azurerm_container_registry.acr.id + log_analytics_workspace_id = var.log_analytics_workspace_id + + enabled_log { + category = "ContainerRegistryRepositoryEvents" + } + + enabled_log { + category = "ContainerRegistryLoginEvents" + } + + enabled_metric { + category = "AllMetrics" + } +} + resource "azurerm_private_endpoint" "pep" { count = var.should_create_acr_private_endpoint ? 1 : 0 diff --git a/src/000-cloud/060-acr/terraform/modules/container-registry/variables.tf b/src/000-cloud/060-acr/terraform/modules/container-registry/variables.tf index e46df9eb..04ca9e29 100644 --- a/src/000-cloud/060-acr/terraform/modules/container-registry/variables.tf +++ b/src/000-cloud/060-acr/terraform/modules/container-registry/variables.tf @@ -39,3 +39,13 @@ variable "should_enable_export_policy" { type = bool description = "Whether to allow container image export from the registry" } + +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings" +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for the container registry" +} diff --git a/src/000-cloud/060-acr/terraform/variables.tf b/src/000-cloud/060-acr/terraform/variables.tf index d553e46a..b4bf1268 100644 --- a/src/000-cloud/060-acr/terraform/variables.tf +++ b/src/000-cloud/060-acr/terraform/variables.tf @@ -62,6 +62,22 @@ variable "should_enable_export_policy" { default = false } +/* + * Diagnostic Settings - Optional + */ + +variable "log_analytics_workspace_id" { + type = string + description = "The ID of the Log Analytics workspace for diagnostic settings. If null, diagnostics are not enabled" + default = null +} + +variable "should_enable_diagnostic_settings" { + type = bool + description = "Whether to enable diagnostic settings for ACR" + default = false +} + /* * Outbound Access Controls - Optional */ diff --git a/src/000-cloud/070-kubernetes/terraform/README.md b/src/000-cloud/070-kubernetes/terraform/README.md index 6a0e59a8..5f5b88d6 100644 --- a/src/000-cloud/070-kubernetes/terraform/README.md +++ b/src/000-cloud/070-kubernetes/terraform/README.md @@ -62,7 +62,7 @@ Deploys Azure Kubernetes Service resources | nat\_gateway | NAT gateway object from networking component for managed outbound access | ```object({ id = string name = string })``` | `null` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string, "Deallocate") gpu_driver = optional(string, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | private\_dns\_zone\_id | ID of the private DNS zone for the private cluster. Use 'system' to have AKS manage it, 'none' for no private DNS zone, or a resource ID for custom zone | `string` | `null` | no | | private\_endpoint\_subnet\_id | The ID of the subnet where the private endpoint will be created | `string` | `null` | no | | should\_add\_current\_user\_cluster\_admin | Whether to assign the current logged in user Azure Kubernetes Cluster Admin Role permissions on the cluster when 'cluster\_admin\_oid' is not provided. | `bool` | `true` | no | diff --git a/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/README.md b/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/README.md index 4ae789cc..5b409d1c 100644 --- a/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/README.md +++ b/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/README.md @@ -50,7 +50,7 @@ Supports private clusters with optional private endpoints and DNS zone managemen | min\_count | The minimum number of nodes which should exist in the default node pool. | `number` | n/a | yes | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | n/a | yes | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string vnet_subnet_id = string pod_subnet_id = string node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string) gpu_driver = optional(string, null) }))``` | n/a | yes | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | n/a | yes | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | n/a | yes | | private\_dns\_zone\_id | ID of the private DNS zone for the private cluster. Use 'system' to have AKS manage it, 'none' for no private DNS zone, or a resource ID for custom zone | `string` | n/a | yes | | private\_endpoint\_subnet\_id | The ID of the subnet where the private endpoint will be created | `string` | n/a | yes | | resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string })``` | n/a | yes | diff --git a/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/variables.tf b/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/variables.tf index 0632d09e..6a00705f 100644 --- a/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/variables.tf +++ b/src/000-cloud/070-kubernetes/terraform/modules/aks-cluster/variables.tf @@ -44,7 +44,7 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." } variable "dns_prefix" { diff --git a/src/000-cloud/070-kubernetes/terraform/variables.tf b/src/000-cloud/070-kubernetes/terraform/variables.tf index 1db7dfa0..9742d510 100644 --- a/src/000-cloud/070-kubernetes/terraform/variables.tf +++ b/src/000-cloud/070-kubernetes/terraform/variables.tf @@ -40,8 +40,8 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "enable_auto_scaling" { diff --git a/src/000-cloud/071-aks-host/terraform/README.md b/src/000-cloud/071-aks-host/terraform/README.md index 6a0e59a8..5f5b88d6 100644 --- a/src/000-cloud/071-aks-host/terraform/README.md +++ b/src/000-cloud/071-aks-host/terraform/README.md @@ -62,7 +62,7 @@ Deploys Azure Kubernetes Service resources | nat\_gateway | NAT gateway object from networking component for managed outbound access | ```object({ id = string name = string })``` | `null` | no | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | `1` | no | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string subnet_address_prefixes = list(string) pod_subnet_address_prefixes = list(string) node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string, "Deallocate") gpu_driver = optional(string, null) }))``` | `{}` | no | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | `"Standard_D8ds_v5"` | no | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | `"Standard_D8ds_v6"` | no | | private\_dns\_zone\_id | ID of the private DNS zone for the private cluster. Use 'system' to have AKS manage it, 'none' for no private DNS zone, or a resource ID for custom zone | `string` | `null` | no | | private\_endpoint\_subnet\_id | The ID of the subnet where the private endpoint will be created | `string` | `null` | no | | should\_add\_current\_user\_cluster\_admin | Whether to assign the current logged in user Azure Kubernetes Cluster Admin Role permissions on the cluster when 'cluster\_admin\_oid' is not provided. | `bool` | `true` | no | diff --git a/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/README.md b/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/README.md index 00f556c7..0e809022 100644 --- a/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/README.md +++ b/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/README.md @@ -47,7 +47,7 @@ Supports private clusters with optional private endpoints and DNS zone managemen | min\_count | The minimum number of nodes which should exist in the default node pool. | `number` | n/a | yes | | node\_count | Number of nodes for the agent pool in the AKS cluster. | `number` | n/a | yes | | node\_pools | Additional node pools for the AKS cluster. Map key is used as the node pool name. | ```map(object({ node_count = optional(number, null) vm_size = string vnet_subnet_id = string pod_subnet_id = string node_taints = optional(list(string), []) enable_auto_scaling = optional(bool, false) min_count = optional(number, null) max_count = optional(number, null) priority = optional(string, "Regular") zones = optional(list(string), null) eviction_policy = optional(string) gpu_driver = optional(string, null) }))``` | n/a | yes | -| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v5. | `string` | n/a | yes | +| node\_vm\_size | VM size for the agent pool in the AKS cluster. Default is Standard\_D8ds\_v6. | `string` | n/a | yes | | private\_dns\_zone\_id | ID of the private DNS zone for the private cluster. Use 'system' to have AKS manage it, 'none' for no private DNS zone, or a resource ID for custom zone | `string` | n/a | yes | | private\_endpoint\_subnet\_id | The ID of the subnet where the private endpoint will be created | `string` | n/a | yes | | resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string })``` | n/a | yes | diff --git a/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/variables.tf b/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/variables.tf index 0632d09e..6a00705f 100644 --- a/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/variables.tf +++ b/src/000-cloud/071-aks-host/terraform/modules/aks-cluster/variables.tf @@ -44,7 +44,7 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." } variable "dns_prefix" { diff --git a/src/000-cloud/071-aks-host/terraform/variables.tf b/src/000-cloud/071-aks-host/terraform/variables.tf index 1db7dfa0..9742d510 100644 --- a/src/000-cloud/071-aks-host/terraform/variables.tf +++ b/src/000-cloud/071-aks-host/terraform/variables.tf @@ -40,8 +40,8 @@ variable "node_count" { variable "node_vm_size" { type = string - description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v5." - default = "Standard_D8ds_v5" + description = "VM size for the agent pool in the AKS cluster. Default is Standard_D8ds_v6." + default = "Standard_D8ds_v6" } variable "enable_auto_scaling" { diff --git a/src/000-cloud/072-azure-local-host/terraform/README.md b/src/000-cloud/072-azure-local-host/terraform/README.md index 1a930e6f..cf79b3e1 100644 --- a/src/000-cloud/072-azure-local-host/terraform/README.md +++ b/src/000-cloud/072-azure-local-host/terraform/README.md @@ -52,7 +52,7 @@ Creates Azure Stack HCI (Azure Local) cluster resources. | load\_balancer\_count | Number of load balancers for the cluster (Otherwise, 0). | `number` | `0` | no | | nfs\_csi\_driver\_enabled | Enable NFS CSI driver for persistent storage (Otherwise, false). | `bool` | `false` | no | | node\_pool\_count | Number of worker nodes in the default node pool (Otherwise, 1). | `number` | `1` | no | -| node\_pool\_vm\_size | VM size for worker nodes (Otherwise, 'Standard\_D8s\_v3'). | `string` | `"Standard_D8s_v3"` | no | +| node\_pool\_vm\_size | VM size for worker nodes (Otherwise, 'Standard\_D8s\_v6'). | `string` | `"Standard_D8s_v6"` | no | | pod\_cidr | CIDR range for Kubernetes pods (Otherwise, '10.244.0.0/16'). | `string` | `"10.244.0.0/16"` | no | | smb\_csi\_driver\_enabled | Enable SMB CSI driver for persistent storage (Otherwise, false). | `bool` | `false` | no | | ssh\_public\_key | SSH public key for Linux nodes (Otherwise, generated). | `string` | `null` | no | diff --git a/src/000-cloud/072-azure-local-host/terraform/variables.tf b/src/000-cloud/072-azure-local-host/terraform/variables.tf index 6fdc34b3..449e81bd 100644 --- a/src/000-cloud/072-azure-local-host/terraform/variables.tf +++ b/src/000-cloud/072-azure-local-host/terraform/variables.tf @@ -71,8 +71,8 @@ variable "node_pool_count" { variable "node_pool_vm_size" { type = string - description = "VM size for worker nodes (Otherwise, 'Standard_D8s_v3')." - default = "Standard_D8s_v3" + description = "VM size for worker nodes (Otherwise, 'Standard_D8s_v6')." + default = "Standard_D8s_v6" } variable "kubernetes_version" { diff --git a/src/000-cloud/073-vm-host/terraform/README.md b/src/000-cloud/073-vm-host/terraform/README.md index 11b6f9b8..736f2c45 100644 --- a/src/000-cloud/073-vm-host/terraform/README.md +++ b/src/000-cloud/073-vm-host/terraform/README.md @@ -60,7 +60,7 @@ Deploys one or more Linux VMs for Arc-connected K3s cluster | vm\_eviction\_policy | Eviction policy for Spot VMs: Deallocate (VM stopped, disk retained, can restart) or Delete (VM and disks removed, no storage charges). Only used when vm\_priority is Spot | `string` | `"Delete"` | no | | vm\_max\_bid\_price | Maximum price per hour in USD for Spot VM. Set to -1 (default) for no price-based eviction - VM will not be evicted for price reasons. Custom values support up to 5 decimal places (e.g., 0.98765). Only used when vm\_priority is Spot | `number` | `-1` | no | | vm\_priority | VM priority: Regular (production, guaranteed capacity) or Spot (cost-optimized, can be evicted with 30s notice). Spot VMs offer up to 90% cost savings | `string` | `"Regular"` | no | -| vm\_sku\_size | Size of the VM | `string` | `"Standard_D8s_v3"` | no | +| vm\_sku\_size | Size of the VM | `string` | `"Standard_D8s_v6"` | no | | vm\_user\_principals | Map of Azure AD principals for Virtual Machine User Login role (standard access). Keys are descriptive identifiers (e.g., `user@company.com`), values are principal object IDs. | `map(string)` | `{}` | no | | vm\_username | Username for the VM admin account | `string` | `null` | no | diff --git a/src/000-cloud/073-vm-host/terraform/tests/setup/main.tf b/src/000-cloud/073-vm-host/terraform/tests/setup/main.tf index dffc8ad1..0c585e7e 100644 --- a/src/000-cloud/073-vm-host/terraform/tests/setup/main.tf +++ b/src/000-cloud/073-vm-host/terraform/tests/setup/main.tf @@ -49,7 +49,7 @@ output "arc_onboarding_user_assigned_identity" { output "vm_expected_values" { value = { - default_vm_size = "Standard_D8s_v3" + default_vm_size = "Standard_D8s_v6" default_admin_username = local.resource_prefix os_disk_type = "Standard_LRS" vm_publisher = "Canonical" diff --git a/src/000-cloud/073-vm-host/terraform/variables.tf b/src/000-cloud/073-vm-host/terraform/variables.tf index 498a18b7..bbe5a51d 100644 --- a/src/000-cloud/073-vm-host/terraform/variables.tf +++ b/src/000-cloud/073-vm-host/terraform/variables.tf @@ -11,7 +11,7 @@ variable "host_machine_count" { variable "vm_sku_size" { type = string description = "Size of the VM" - default = "Standard_D8s_v3" + default = "Standard_D8s_v6" } variable "vm_username" { diff --git a/src/100-edge/100-cncf-cluster/README.md b/src/100-edge/100-cncf-cluster/README.md index ec4f096a..f542840d 100644 --- a/src/100-edge/100-cncf-cluster/README.md +++ b/src/100-edge/100-cncf-cluster/README.md @@ -112,7 +112,7 @@ The script performs the following steps: - Install K3s, Azure CLI, kubectl - Login to Azure CLI (Service Principal or Managed Identity) - Connect to Azure Arc and enable features: `custom-locations`, `oidc-issuer`, `workload-identity`, `cluster-connect` and optionally `auto-upgrade` -- Optionally add the provided Azure AD user as a cluster admin to enable `kubectl` access via `connectedk8s proxy` +- Optionally add the provided Entra ID user or group as a cluster admin and assign Azure Arc RBAC roles (`Azure Arc Kubernetes Viewer`, `Azure Arc Enabled Kubernetes Cluster User Role`) to enable `az connectedk8s proxy` - Configure OIDC issuer url for Azure Arc within K3s - Increase limits for Azure container storage within the host machine - In non production environments will install k9s and configure `.bashrc` with auto complete and aliases for development @@ -142,6 +142,25 @@ ENVIRONMENT=dev \ ./k3s-device-setup.sh ``` +## Cluster Admin Access + +By default, the deploying user receives cluster-admin permissions. To grant access to an entire Entra ID group (enabling `az connectedk8s proxy` for all group members), set the following in your Terraform configuration (e.g. `terraform.tfvars`): + +```hcl +cluster_admin_group_oid = "" +``` + +This creates: + +- A Kubernetes `ClusterRoleBinding` with `--group` for in-cluster access +- Azure RBAC role assignments (`Azure Arc Kubernetes Viewer` and `Azure Arc Enabled Kubernetes Cluster User Role`) on the Arc connected cluster resource for `az connectedk8s proxy` access + +Group members can then connect via: + +```sh +az connectedk8s proxy -n -g +``` + --- diff --git a/src/100-edge/100-cncf-cluster/terraform/README.md b/src/100-edge/100-cncf-cluster/terraform/README.md index 76cc778a..69946038 100644 --- a/src/100-edge/100-cncf-cluster/terraform/README.md +++ b/src/100-edge/100-cncf-cluster/terraform/README.md @@ -25,14 +25,18 @@ install extensions for cluster connect and custom locations. ## Resources -| Name | Type | -|----------------------------------------------------------------------------------------------------------------------------------------------------|-------------| -| [terraform_data.defer_azuread_user](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | -| [terraform_data.defer_custom_locations](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | -| [azapi_resource.arc_connected_cluster](https://registry.terraform.io/providers/Azure/azapi/latest/docs/data-sources/resource) | data source | -| [azuread_service_principal.custom_locations](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/service_principal) | data source | -| [azuread_user.current](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/user) | data source | -| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | +| Name | Type | +|--------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| +| [azurerm_role_assignment.arc_cluster_user_group](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| [azurerm_role_assignment.arc_cluster_user_user](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| [azurerm_role_assignment.arc_kubernetes_viewer_group](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| [azurerm_role_assignment.arc_kubernetes_viewer_user](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/role_assignment) | resource | +| [terraform_data.defer_azuread_user](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | +| [terraform_data.defer_custom_locations](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | +| [azapi_resource.arc_connected_cluster](https://registry.terraform.io/providers/Azure/azapi/latest/docs/data-sources/resource) | data source | +| [azuread_service_principal.custom_locations](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/service_principal) | data source | +| [azuread_user.current](https://registry.terraform.io/providers/hashicorp/azuread/latest/docs/data-sources/user) | data source | +| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | ## Modules @@ -48,43 +52,45 @@ install extensions for cluster connect and custom locations. ## Inputs -| Name | Description | Type | Default | Required | -|-------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|---------|:--------:| -| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | -| resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string id = optional(string) })``` | n/a | yes | -| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | -| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | n/a | yes | -| arc\_onboarding\_identity | The User Assigned Managed Identity that will be used for onboarding the cluster to Arc | ```object({ id = string name = string principal_id = string client_id = string tenant_id = string })``` | `null` | no | -| arc\_onboarding\_principal\_ids | The Principal IDs for the identity or service principal that will be used for onboarding the cluster to Arc | `list(string)` | `null` | no | -| arc\_onboarding\_sp | n/a | ```object({ client_id = string object_id = string client_secret = string })``` | `null` | no | -| cluster\_admin\_oid | The Object ID that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user Object ID if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | -| cluster\_admin\_upn | The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | -| cluster\_node\_machine | n/a | ```list(object({ id = string location = string }))``` | `null` | no | -| cluster\_node\_machine\_count | Number of cluster node machines referenced by cluster\_node\_machine when deploying scripts | `number` | `null` | no | -| cluster\_server\_host\_machine\_username | Username used for the host machines that will be given kube-config settings on setup. (Otherwise, 'resource\_prefix' if it exists as a user) | `string` | `null` | no | -| cluster\_server\_ip | The IP Address for the cluster server that the cluster nodes will use to connect. | `string` | `null` | no | -| cluster\_server\_machine | n/a | ```object({ id = string location = string })``` | `null` | no | -| cluster\_server\_token | The token that will be given to the server for the cluster or used by the agent nodes to connect them to the cluster. (ex. ) | `string` | `null` | no | -| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | -| http\_proxy | HTTP proxy URL | `string` | `null` | no | -| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | -| key\_vault | The Key Vault object containing id, name, and vault\_uri properties | ```object({ id = string name = string vault_uri = string })``` | `null` | no | -| key\_vault\_script\_secret\_prefix | Optional prefix for the Key Vault script secret name when should\_use\_script\_from\_secrets\_for\_deploy is true. | `string` | `""` | no | -| private\_key\_pem | Private key for onboarding | `string` | `null` | no | -| script\_output\_filepath | The location of where to write out the script file. (Otherwise, '{path.root}/out') | `string` | `null` | no | -| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | -| should\_assign\_roles | Whether to assign Key Vault roles to identity or service principal. | `bool` | `true` | no | -| should\_deploy\_arc\_agents | Should deploy arc agents using helm charts instead of Azure CLI. | `bool` | `false` | no | -| should\_deploy\_arc\_machines | Should deploy to Arc-connected servers instead of Azure VMs. When true, machine\_id refers to an Arc-connected server ID. | `bool` | `false` | no | -| should\_deploy\_script\_to\_vm | Should deploy the scripts to the provided Azure VMs. | `bool` | `true` | no | -| should\_enable\_arc\_auto\_upgrade | Enable or disable auto-upgrades of Arc agents. (Otherwise, 'false' for 'env=prod' else 'true' for all other envs). | `bool` | `null` | no | -| should\_generate\_cluster\_server\_token | Should generate token used by the server. ('cluster\_server\_token' must be null if this is 'true') | `bool` | `false` | no | -| should\_output\_cluster\_node\_script | Whether to write out the script for setting up cluster node host machines. (Needed for multi-node clusters) | `bool` | `false` | no | -| should\_output\_cluster\_server\_script | Whether to write out the script for setting up the cluster server host machine. | `bool` | `false` | no | -| should\_skip\_az\_cli\_login | Should skip login process with Azure CLI on the server. (Skipping assumes 'az login' has been completed prior to script execution) | `bool` | `false` | no | -| should\_skip\_installing\_az\_cli | Should skip downloading and installing Azure CLI on the server. (Skipping assumes the server will already have the Azure CLI) | `bool` | `false` | no | -| should\_upload\_to\_key\_vault | Whether to upload the scripts to Key Vault as secrets. | `bool` | `true` | no | -| should\_use\_script\_from\_secrets\_for\_deploy | Whether to use the deploy-script-secrets.sh script to fetch and execute deployment scripts from Key Vault | `bool` | `true` | no | +| Name | Description | Type | Default | Required | +|-------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------|----------|:--------:| +| environment | Environment for all resources in this module: dev, test, or prod | `string` | n/a | yes | +| resource\_group | Resource group object containing name and id where resources will be deployed | ```object({ name = string id = optional(string) })``` | n/a | yes | +| resource\_prefix | Prefix for all resources in this module | `string` | n/a | yes | +| should\_get\_custom\_locations\_oid | Whether to get Custom Locations Object ID using Terraform's azuread provider. (Otherwise, provided by 'custom\_locations\_oid' or `az connectedk8s enable-features` for custom-locations on cluster setup if not provided.) | `bool` | n/a | yes | +| arc\_onboarding\_identity | The User Assigned Managed Identity that will be used for onboarding the cluster to Arc | ```object({ id = string name = string principal_id = string client_id = string tenant_id = string })``` | `null` | no | +| arc\_onboarding\_principal\_ids | The Principal IDs for the identity or service principal that will be used for onboarding the cluster to Arc | `list(string)` | `null` | no | +| arc\_onboarding\_sp | n/a | ```object({ client_id = string object_id = string client_secret = string })``` | `null` | no | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | `null` | no | +| cluster\_admin\_oid | The Object ID that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user Object ID if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | +| cluster\_admin\_oid\_type | The principal type of cluster\_admin\_oid for Azure RBAC assignments. Ignored when using current user (defaults to 'User') | `string` | `"User"` | no | +| cluster\_admin\_upn | The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | `null` | no | +| cluster\_node\_machine | n/a | ```list(object({ id = string location = string }))``` | `null` | no | +| cluster\_node\_machine\_count | Number of cluster node machines referenced by cluster\_node\_machine when deploying scripts | `number` | `null` | no | +| cluster\_server\_host\_machine\_username | Username used for the host machines that will be given kube-config settings on setup. (Otherwise, 'resource\_prefix' if it exists as a user) | `string` | `null` | no | +| cluster\_server\_ip | The IP Address for the cluster server that the cluster nodes will use to connect. | `string` | `null` | no | +| cluster\_server\_machine | n/a | ```object({ id = string location = string })``` | `null` | no | +| cluster\_server\_token | The token that will be given to the server for the cluster or used by the agent nodes to connect them to the cluster. (ex. ) | `string` | `null` | no | +| custom\_locations\_oid | The object id of the Custom Locations Entra ID application for your tenant. If none is provided, the script will attempt to retrieve this requiring 'Application.Read.All' or 'Directory.Read.All' permissions. ```sh az ad sp show --id bc313c14-388c-4e7d-a58e-70017303ee3b --query id -o tsv``` | `string` | `null` | no | +| http\_proxy | HTTP proxy URL | `string` | `null` | no | +| instance | Instance identifier for naming resources: 001, 002, etc | `string` | `"001"` | no | +| key\_vault | The Key Vault object containing id, name, and vault\_uri properties | ```object({ id = string name = string vault_uri = string })``` | `null` | no | +| key\_vault\_script\_secret\_prefix | Optional prefix for the Key Vault script secret name when should\_use\_script\_from\_secrets\_for\_deploy is true. | `string` | `""` | no | +| private\_key\_pem | Private key for onboarding | `string` | `null` | no | +| script\_output\_filepath | The location of where to write out the script file. (Otherwise, '{path.root}/out') | `string` | `null` | no | +| should\_add\_current\_user\_cluster\_admin | Gives the current logged in user cluster-admin permissions with the new cluster. | `bool` | `true` | no | +| should\_assign\_roles | Whether to assign Key Vault roles to identity or service principal. | `bool` | `true` | no | +| should\_deploy\_arc\_agents | Should deploy arc agents using helm charts instead of Azure CLI. | `bool` | `false` | no | +| should\_deploy\_arc\_machines | Should deploy to Arc-connected servers instead of Azure VMs. When true, machine\_id refers to an Arc-connected server ID. | `bool` | `false` | no | +| should\_deploy\_script\_to\_vm | Should deploy the scripts to the provided Azure VMs. | `bool` | `true` | no | +| should\_enable\_arc\_auto\_upgrade | Enable or disable auto-upgrades of Arc agents. (Otherwise, 'false' for 'env=prod' else 'true' for all other envs). | `bool` | `null` | no | +| should\_generate\_cluster\_server\_token | Should generate token used by the server. ('cluster\_server\_token' must be null if this is 'true') | `bool` | `false` | no | +| should\_output\_cluster\_node\_script | Whether to write out the script for setting up cluster node host machines. (Needed for multi-node clusters) | `bool` | `false` | no | +| should\_output\_cluster\_server\_script | Whether to write out the script for setting up the cluster server host machine. | `bool` | `false` | no | +| should\_skip\_az\_cli\_login | Should skip login process with Azure CLI on the server. (Skipping assumes 'az login' has been completed prior to script execution) | `bool` | `false` | no | +| should\_skip\_installing\_az\_cli | Should skip downloading and installing Azure CLI on the server. (Skipping assumes the server will already have the Azure CLI) | `bool` | `false` | no | +| should\_upload\_to\_key\_vault | Whether to upload the scripts to Key Vault as secrets. | `bool` | `true` | no | +| should\_use\_script\_from\_secrets\_for\_deploy | Whether to use the deploy-script-secrets.sh script to fetch and execute deployment scripts from Key Vault | `bool` | `true` | no | ## Outputs diff --git a/src/100-edge/100-cncf-cluster/terraform/main.tf b/src/100-edge/100-cncf-cluster/terraform/main.tf index 3c72e611..594b26cc 100644 --- a/src/100-edge/100-cncf-cluster/terraform/main.tf +++ b/src/100-edge/100-cncf-cluster/terraform/main.tf @@ -62,6 +62,55 @@ module "role_assignments" { arc_onboarding_principal_ids = local.arc_onboarding_principal_ids } +/* + * Arc Connected Cluster RBAC - enables 'az connectedk8s proxy' access + */ + +locals { + arc_cluster_id = try(data.azapi_resource.arc_connected_cluster[0].id, null) + cluster_admin_oid = try(coalesce(var.cluster_admin_oid, local.current_user_oid), null) + has_arc_cluster = var.should_deploy_script_to_vm && !var.should_deploy_arc_agents + has_cluster_admin = var.cluster_admin_oid != null || var.should_add_current_user_cluster_admin + should_assign_arc_rbac_user = var.should_assign_roles && local.has_arc_cluster && local.has_cluster_admin + should_assign_arc_rbac_group = var.should_assign_roles && local.has_arc_cluster && var.cluster_admin_group_oid != null +} + +resource "azurerm_role_assignment" "arc_kubernetes_viewer_user" { + count = local.should_assign_arc_rbac_user ? 1 : 0 + + scope = local.arc_cluster_id + role_definition_name = "Azure Arc Kubernetes Viewer" + principal_id = local.cluster_admin_oid + principal_type = var.cluster_admin_oid_type +} + +resource "azurerm_role_assignment" "arc_cluster_user_user" { + count = local.should_assign_arc_rbac_user ? 1 : 0 + + scope = local.arc_cluster_id + role_definition_name = "Azure Arc Enabled Kubernetes Cluster User Role" + principal_id = local.cluster_admin_oid + principal_type = var.cluster_admin_oid_type +} + +resource "azurerm_role_assignment" "arc_kubernetes_viewer_group" { + count = local.should_assign_arc_rbac_group ? 1 : 0 + + scope = local.arc_cluster_id + role_definition_name = "Azure Arc Kubernetes Viewer" + principal_id = var.cluster_admin_group_oid + principal_type = "Group" +} + +resource "azurerm_role_assignment" "arc_cluster_user_group" { + count = local.should_assign_arc_rbac_group ? 1 : 0 + + scope = local.arc_cluster_id + role_definition_name = "Azure Arc Enabled Kubernetes Cluster User Role" + principal_id = var.cluster_admin_group_oid + principal_type = "Group" +} + /* * Ubuntu K3s Cluster Setup */ @@ -78,6 +127,7 @@ module "ubuntu_k3s" { arc_tenant_id = data.azurerm_client_config.current.tenant_id cluster_admin_oid = try(coalesce(var.cluster_admin_oid, local.current_user_oid), null) cluster_admin_upn = try(coalesce(var.cluster_admin_upn, local.current_user_upn), null) + cluster_admin_group_oid = var.cluster_admin_group_oid custom_locations_oid = local.custom_locations_oid should_enable_arc_auto_upgrade = var.should_enable_arc_auto_upgrade environment = var.environment diff --git a/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/README.md b/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/README.md index 27e01074..36ef7a78 100644 --- a/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/README.md +++ b/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/README.md @@ -1,7 +1,7 @@ -# Key Vault Role Assignment +# Role Assignments -Assigns Azure RBAC roles for Key Vault access +Assigns Azure RBAC roles for Arc onboarding and Key Vault access. ## Requirements diff --git a/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/main.tf b/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/main.tf index 788cd597..1466a269 100644 --- a/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/main.tf +++ b/src/100-edge/100-cncf-cluster/terraform/modules/role-assignments/main.tf @@ -1,11 +1,11 @@ /** - * # Key Vault Role Assignment + * # Role Assignments * - * Assigns Azure RBAC roles for Key Vault access + * Assigns Azure RBAC roles for Arc onboarding and Key Vault access. */ /* - * Role Assignments + * Role Assignments - Arc Onboarding */ resource "azurerm_role_assignment" "connected_machine_onboarding" { diff --git a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/README.md b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/README.md index 22248261..bf394fd1 100644 --- a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/README.md +++ b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/README.md @@ -39,6 +39,7 @@ along with installing extensions for cluster connect and custom locations. | arc\_onboarding\_sp | n/a | ```object({ client_id = string object_id = string client_secret = string })``` | n/a | yes | | arc\_resource\_name | The name of the new Azure Arc resource. | `string` | n/a | yes | | arc\_tenant\_id | The ID of the Tenant for the new Azure Arc resource. | `string` | n/a | yes | +| cluster\_admin\_group\_oid | The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy' | `string` | n/a | yes | | cluster\_admin\_oid | The Object ID that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user Object ID if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | n/a | yes | | cluster\_admin\_upn | The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should\_add\_current\_user\_cluster\_admin=true') | `string` | n/a | yes | | cluster\_server\_host\_machine\_username | Username used for the host machines that will be given kube-config settings on setup. (Otherwise, 'resource\_prefix' if it exists as a user) | `string` | n/a | yes | diff --git a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/main.tf b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/main.tf index 0b7c0f3b..908e0956 100644 --- a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/main.tf +++ b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/main.tf @@ -41,20 +41,22 @@ locals { # Server specific environment variables for the k3s node setup. server_env_var = { - CLUSTER_ADMIN_OID = coalesce(var.cluster_admin_oid, "$${CLUSTER_ADMIN_OID}") - CLUSTER_ADMIN_UPN = coalesce(var.cluster_admin_upn, "$${CLUSTER_ADMIN_UPN}") - CLIENT_ID = "$${CLIENT_ID}" - K3S_NODE_TYPE = "server" - SKIP_ARC_CONNECT = "$${SKIP_ARC_CONNECT}" + CLUSTER_ADMIN_OID = coalesce(var.cluster_admin_oid, "$${CLUSTER_ADMIN_OID}") + CLUSTER_ADMIN_UPN = coalesce(var.cluster_admin_upn, "$${CLUSTER_ADMIN_UPN}") + CLUSTER_ADMIN_GROUP_OID = coalesce(var.cluster_admin_group_oid, "$${CLUSTER_ADMIN_GROUP_OID}") + CLIENT_ID = "$${CLIENT_ID}" + K3S_NODE_TYPE = "server" + SKIP_ARC_CONNECT = "$${SKIP_ARC_CONNECT}" } # Agent specific environment variables for the k3s node setup. node_env_var = { - CLUSTER_ADMIN_OID = "$${CLUSTER_ADMIN_OID}" - CLUSTER_ADMIN_UPN = "$${CLUSTER_ADMIN_UPN}" - CLIENT_ID = "$${CLIENT_ID}" - K3S_NODE_TYPE = "agent" - SKIP_ARC_CONNECT = "true" + CLUSTER_ADMIN_OID = "$${CLUSTER_ADMIN_OID}" + CLUSTER_ADMIN_UPN = "$${CLUSTER_ADMIN_UPN}" + CLUSTER_ADMIN_GROUP_OID = "$${CLUSTER_ADMIN_GROUP_OID}" + CLIENT_ID = "$${CLIENT_ID}" + K3S_NODE_TYPE = "agent" + SKIP_ARC_CONNECT = "true" } # Read in script file and remove any carriage returns then split on separator in file '###\n' for parameters. diff --git a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/variables.tf b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/variables.tf index 16416643..30afa6e7 100644 --- a/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/variables.tf +++ b/src/100-edge/100-cncf-cluster/terraform/modules/ubuntu-k3s/variables.tf @@ -80,6 +80,11 @@ variable "cluster_admin_upn" { description = "The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should_add_current_user_cluster_admin=true')" } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" +} + variable "cluster_server_ip" { type = string description = "The IP Address for the cluster server that the cluster nodes will use to connect." diff --git a/src/100-edge/100-cncf-cluster/terraform/variables.tf b/src/100-edge/100-cncf-cluster/terraform/variables.tf index dce33242..f53ceafa 100644 --- a/src/100-edge/100-cncf-cluster/terraform/variables.tf +++ b/src/100-edge/100-cncf-cluster/terraform/variables.tf @@ -117,12 +117,28 @@ variable "cluster_admin_oid" { default = null } +variable "cluster_admin_oid_type" { + type = string + description = "The principal type of cluster_admin_oid for Azure RBAC assignments. Ignored when using current user (defaults to 'User')" + default = "User" + validation { + condition = contains(["User", "Group", "ServicePrincipal"], var.cluster_admin_oid_type) + error_message = "Must be one of: User, Group, ServicePrincipal" + } +} + variable "cluster_admin_upn" { type = string description = "The User Principal Name that will be given cluster-admin permissions with the new cluster. (Otherwise, current logged in user UPN if 'should_add_current_user_cluster_admin=true')" default = null } +variable "cluster_admin_group_oid" { + type = string + description = "The Entra ID group Object ID that will be given cluster-admin permissions and Azure Arc RBAC access for 'az connectedk8s proxy'" + default = null +} + variable "cluster_server_ip" { type = string description = "The IP Address for the cluster server that the cluster nodes will use to connect." diff --git a/src/100-edge/110-iot-ops/scripts/deploy-cluster-admin-oid.sh b/src/100-edge/110-iot-ops/scripts/deploy-cluster-admin-oid.sh deleted file mode 100755 index c1b617a1..00000000 --- a/src/100-edge/110-iot-ops/scripts/deploy-cluster-admin-oid.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# Refer to: https://learn.microsoft.com/azure/azure-arc/kubernetes/cluster-connect?tabs=azure-cli - -# ARC_RESOURCE_GROUP_NAME= -# ARC_RESOURCE_NAME= - -if [[ -n $SHOULD_USE_CURRENT_USER ]]; then - DEPLOY_ADMIN_OID=$(az ad signed-in-user show --query id -o tsv) - echo "DEPLOY_ADMIN_OID=$DEPLOY_ADMIN_OID" - echo "" -fi - -# From a place that has role assignment privs: - -if [[ -n $SHOULD_ASSIGN_ROLES ]]; then - CONNECTED_CLUSTER_ID=$(az resource show -g "$ARC_RESOURCE_GROUP_NAME" -n "$ARC_RESOURCE_NAME" --resource-type "microsoft.kubernetes/connectedclusters" --query id --output tsv) - az role assignment create --role "Azure Arc Kubernetes Viewer" --assignee "$DEPLOY_ADMIN_OID" --scope "$CONNECTED_CLUSTER_ID" - az role assignment create --role "Azure Arc Enabled Kubernetes Cluster User Role" --assignee "$DEPLOY_ADMIN_OID" --scope "$CONNECTED_CLUSTER_ID" -fi - -echo "Adding $DEPLOY_ADMIN_OID as deployment admin" - -kubectl create clusterrolebinding "$(echo "$DEPLOY_ADMIN_OID" | cut -c1-7)-deploy-binding" --clusterrole cluster-admin --user="$DEPLOY_ADMIN_OID" --dry-run=client -o yaml | kubectl apply -f - - -echo "" -echo "az connectedk8s proxy -n $ARC_RESOURCE_NAME -g $ARC_RESOURCE_GROUP_NAME" diff --git a/src/100-edge/111-assets/terraform/main.tf b/src/100-edge/111-assets/terraform/main.tf index 52c319f2..30bc4136 100644 --- a/src/100-edge/111-assets/terraform/main.tf +++ b/src/100-edge/111-assets/terraform/main.tf @@ -478,9 +478,10 @@ resource "azapi_resource" "asset_endpoint_profile" { resource "azapi_resource" "asset" { for_each = local.processed_assets - type = "Microsoft.DeviceRegistry/assets@2026-04-01" - name = each.value.name - parent_id = var.resource_group.id + type = "Microsoft.DeviceRegistry/assets@2026-04-01" + name = each.value.name + parent_id = var.resource_group.id + schema_validation_enabled = false body = { location = var.location diff --git a/src/500-application/505-akri-rest-http-connector/services/sensor-simulator/test_models.py b/src/500-application/505-akri-rest-http-connector/services/sensor-simulator/test_models.py index c5a39d93..4f7f2dad 100644 --- a/src/500-application/505-akri-rest-http-connector/services/sensor-simulator/test_models.py +++ b/src/500-application/505-akri-rest-http-connector/services/sensor-simulator/test_models.py @@ -1,8 +1,6 @@ """Unit tests for Pydantic models in the sensor simulator.""" import pytest -from pydantic import ValidationError - from models import ( DataType, FieldConfig, @@ -10,6 +8,7 @@ FieldValueResponse, SimulatorMetadata, ) +from pydantic import ValidationError class TestFieldConfigValidation: diff --git a/src/500-application/506-ros2-connector/services/ros2-connector/src/message_types/test_base_handler.py b/src/500-application/506-ros2-connector/services/ros2-connector/src/message_types/test_base_handler.py index 2208f2f7..343a7e0f 100644 --- a/src/500-application/506-ros2-connector/services/ros2-connector/src/message_types/test_base_handler.py +++ b/src/500-application/506-ros2-connector/services/ros2-connector/src/message_types/test_base_handler.py @@ -5,7 +5,6 @@ from unittest.mock import MagicMock import pytest - from base_handler import BaseMessageHandler diff --git a/src/500-application/509-sse-connector/services/sse-server/test_events_simulator.py b/src/500-application/509-sse-connector/services/sse-server/test_events_simulator.py index ece89367..90ee1d0f 100644 --- a/src/500-application/509-sse-connector/services/sse-server/test_events_simulator.py +++ b/src/500-application/509-sse-connector/services/sse-server/test_events_simulator.py @@ -1,7 +1,6 @@ """Unit tests for AnalyticsEventSimulator event generation methods.""" import pytest - from events_simulator import AnalyticsEventSimulator diff --git a/src/500-application/510-onvif-connector/services/onvif-camera-simulator/test_onvif_camera.py b/src/500-application/510-onvif-connector/services/onvif-camera-simulator/test_onvif_camera.py index 5be4aee7..5064163c 100644 --- a/src/500-application/510-onvif-connector/services/onvif-camera-simulator/test_onvif_camera.py +++ b/src/500-application/510-onvif-connector/services/onvif-camera-simulator/test_onvif_camera.py @@ -1,10 +1,8 @@ """Unit tests for ONVIFCameraSimulator pure methods.""" -import os import pytest from lxml import etree - from onvif_camera import ONVIFCameraSimulator diff --git a/src/501-ci-cd/scripts/build-leak-detection-images.sh b/src/501-ci-cd/scripts/build-leak-detection-images.sh new file mode 100755 index 00000000..a40bd0ee --- /dev/null +++ b/src/501-ci-cd/scripts/build-leak-detection-images.sh @@ -0,0 +1,148 @@ +#!/bin/bash +set -euo pipefail + +########################################################################### +# Build and Push Leak Detection Images to ACR +########################################################################### +# +# Builds and pushes the complete set of container images required by the +# leak-detection vision pipeline scenario. Each image corresponds to one +# application component deployed at the edge: +# +# - ai-edge-inference ONNX-based vision inference service +# - sse-server Server-Sent Events connector +# - media-capture-service Video capture and storage service +# +# All three images are built in a single invocation to ensure version +# consistency across the pipeline components. +# +# Usage: +# ./build-leak-detection-images.sh --acr-name --resource-group \ +# [--tag ] +# +########################################################################### + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly SCRIPT_DIR +REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +readonly REPO_ROOT + +usage() { + cat <&2 + usage 1 + ;; + esac +done + +if [[ -z "${ACR_NAME}" || -z "${RESOURCE_GROUP}" ]]; then + echo "ERROR: --acr-name and --resource-group are required" >&2 + usage 1 +fi + +readonly ACR_LOGIN="${ACR_NAME}.azurecr.io" + +# Leak-detection pipeline component images: name|dockerfile|context +# All components are built together to maintain version consistency. +readonly -a COMPONENTS=( + "ai-edge-inference|\ +src/500-application/507-ai-inference/\ +services/ai-edge-inference/Dockerfile.acr|\ +src/500-application/507-ai-inference/\ +services/ai-edge-inference" + "sse-server|\ +src/500-application/509-sse-connector/\ +services/sse-server/Dockerfile|\ +src/500-application/509-sse-connector/\ +services/sse-server" + "media-capture-service|\ +src/500-application/503-media-capture-service/\ +services/media-capture-service/Dockerfile|\ +src/500-application/503-media-capture-service/\ +services/media-capture-service" +) + +build_count=0 +fail_count=0 + +echo "=== Logging into ACR: ${ACR_NAME} ===" +az acr login \ + --name "${ACR_NAME}" \ + --resource-group "${RESOURCE_GROUP}" + +for entry in "${COMPONENTS[@]}"; do + IFS='|' read -r img_name dockerfile context <<<"${entry}" + + dockerfile_path="${REPO_ROOT}/${dockerfile}" + context_path="${REPO_ROOT}/${context}" + + if [[ ! -f "${dockerfile_path}" ]]; then + echo "WARN: Dockerfile not found: ${dockerfile_path}" >&2 + echo " Skipping ${img_name}" + continue + fi + + remote_tag="${ACR_LOGIN}/${img_name}:${IMAGE_TAG}" + echo "=== Building ${img_name} (tag: ${IMAGE_TAG}) ===" + + if docker build \ + -t "${remote_tag}" \ + -f "${dockerfile_path}" \ + "${context_path}"; then + echo "=== Pushing ${remote_tag} ===" + docker push "${remote_tag}" + ((build_count++)) + else + echo "ERROR: Build failed for ${img_name}" >&2 + ((fail_count++)) + fi +done + +echo "" +echo "=== Build Summary ===" +echo " Succeeded: ${build_count}" +echo " Failed: ${fail_count}" + +if ((fail_count > 0)); then + exit 1 +fi + +echo "=== All images built and pushed successfully ===" diff --git a/src/501-ci-cd/scripts/deploy-leak-detection-apps.sh b/src/501-ci-cd/scripts/deploy-leak-detection-apps.sh new file mode 100755 index 00000000..06889a5e --- /dev/null +++ b/src/501-ci-cd/scripts/deploy-leak-detection-apps.sh @@ -0,0 +1,253 @@ +#!/bin/bash +set -euo pipefail + +########################################################################### +# Deploy Leak Detection Applications to Kubernetes +########################################################################### +# +# Deploys the leak-detection vision pipeline workloads to a Kubernetes +# cluster after Terraform infrastructure provisioning completes. This +# script handles the full set of edge application deployments for the +# leak-detection scenario: +# +# - ai-edge-inference ONNX inference deployment + service +# - sse-server SSE connector deployment + service +# - media-capture-service Video capture deployment + service +# +# Usage: +# ./deploy-leak-detection-apps.sh --kubeconfig \ +# --acr-login-server [--namespace ] [--dry-run] +# +########################################################################### + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +readonly SCRIPT_DIR +REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +readonly REPO_ROOT + +usage() { + cat <&2 + usage 1 + ;; + esac +done + +if [[ -z "${KUBECONFIG_PATH}" || -z "${ACR_LOGIN_SERVER}" ]]; then + echo "ERROR: --kubeconfig and --acr-login-server required" >&2 + usage 1 +fi + +export KUBECONFIG="${KUBECONFIG_PATH}" + +dry_run_flag="" +if [[ "${DRY_RUN}" == true ]]; then + dry_run_flag="--dry-run=client" + echo "=== DRY RUN MODE ===" +fi + +# Verify cluster connectivity +echo "=== Verifying cluster connectivity ===" +if ! kubectl cluster-info &>/dev/null; then + echo "ERROR: Cannot connect to cluster" >&2 + echo " kubeconfig: ${KUBECONFIG_PATH}" >&2 + exit 1 +fi +echo " Cluster reachable" + +# Ensure namespace exists +echo "=== Ensuring namespace: ${NAMESPACE} ===" +kubectl create namespace "${NAMESPACE}" \ + ${dry_run_flag} \ + --save-config 2>/dev/null || true + +# App paths +readonly APP_509="${REPO_ROOT}/src/500-application/509-sse-connector" +readonly APP_508="${REPO_ROOT}/src/500-application/508-media-connector" +readonly APP_507="${REPO_ROOT}/src/500-application/507-ai-inference" +readonly APP_503="${REPO_ROOT}/src/500-application/503-media-capture-service" + +deploy_count=0 +skip_count=0 + +deploy_kustomize() { + local name="$1" + local app_path="$2" + local charts_dir="${app_path}/charts" + + if [[ ! -d "${charts_dir}" ]]; then + echo " SKIP: No charts/ directory found" + ((skip_count++)) + return + fi + + # Generate patches if gen-patch.sh exists + if [[ -x "${charts_dir}/gen-patch.sh" ]]; then + "${charts_dir}/gen-patch.sh" \ + --acr-name "${ACR_LOGIN_SERVER%%.*}" \ + --image-name "${name}" \ + --image-version "${IMAGE_TAG}" \ + --namespace "${NAMESPACE}" + fi + + kubectl apply -k "${charts_dir}" \ + --namespace "${NAMESPACE}" \ + ${dry_run_flag} + ((deploy_count++)) +} + +deploy_helm() { + local release="$1" + local chart_path="$2" + local image_name="$3" + + if [[ ! -d "${chart_path}" ]]; then + echo " SKIP: Helm chart not found at ${chart_path}" + ((skip_count++)) + return + fi + + local -a helm_args=( + upgrade --install "${release}" "${chart_path}" + --namespace "${NAMESPACE}" + --set "image.repository=${ACR_LOGIN_SERVER}/${image_name}" + --set "image.tag=${IMAGE_TAG}" + ) + + if [[ "${DRY_RUN}" == true ]]; then + helm_args+=(--dry-run) + fi + + helm "${helm_args[@]}" + ((deploy_count++)) +} + +deploy_yaml() { + local manifest="$1" + + if [[ ! -f "${manifest}" ]]; then + echo " SKIP: Manifest not found: ${manifest}" + ((skip_count++)) + return + fi + + kubectl apply -f "${manifest}" \ + --namespace "${NAMESPACE}" \ + ${dry_run_flag} + ((deploy_count++)) +} + +# Deployment order follows dependency chain: +# 509 (event ingestion) → 508 (media connector) → +# 507 (AI inference) → 503 (media capture) + +echo "" +echo "=== Step 1: Deploying 509-sse-connector ===" +deploy_kustomize "sse-server" "${APP_509}" + +echo "" +echo "=== Step 2: Deploying 508-media-connector ===" +if [[ -d "${APP_508}/kubernetes" ]]; then + for manifest in "${APP_508}"/kubernetes/*.yaml; do + deploy_yaml "${manifest}" + done +else + echo " SKIP: No kubernetes/ directory" + ((skip_count++)) +fi + +echo "" +echo "=== Step 3: Deploying 507-ai-inference ===" +deploy_kustomize "ai-edge-inference" "${APP_507}" + +# Deploy model-downloader job if present +model_job="${APP_507}/charts/model-downloader-job.yaml" +if [[ -f "${model_job}" ]]; then + echo " Applying model-downloader job" + kubectl apply -f "${model_job}" \ + --namespace "${NAMESPACE}" \ + ${dry_run_flag} 2>/dev/null || true +fi + +echo "" +echo "=== Step 4: Deploying 503-media-capture-service ===" +deploy_helm \ + "media-capture-service" \ + "${APP_503}/charts/media-capture-service" \ + "media-capture-service" + +# Wait for rollouts (skip in dry-run) +if [[ "${DRY_RUN}" != true ]]; then + echo "" + echo "=== Waiting for rollouts ===" + + readonly -a DEPLOYMENTS=( + "sse-server|120" + "ai-edge-inference|300" + "media-capture-service|300" + ) + + for entry in "${DEPLOYMENTS[@]}"; do + IFS='|' read -r dep_name timeout <<<"${entry}" + echo " Waiting for ${dep_name}..." + kubectl rollout status "deployment/${dep_name}" \ + -n "${NAMESPACE}" \ + --timeout="${timeout}s" || true + done +fi + +echo "" +echo "=== Deployment Summary ===" +echo " Deployed: ${deploy_count}" +echo " Skipped: ${skip_count}" +echo " Dry run: ${DRY_RUN}" +echo "=== Done ==="