From 36648318f354e38d8223b1fa328841fb6304a6d6 Mon Sep 17 00:00:00 2001 From: Benoit Moussaud Date: Fri, 26 Sep 2025 15:28:36 +0000 Subject: [PATCH] feat: Add storageBlobDataContributor role assignment for deployer in Bicep and JSON templates - Introduced a new role assignment for the storageBlobDataContributor role in both Bicep and JSON deployment templates. - Updated the Bicep file to create a role assignment for the deployer's managed identity on the storage account. - Modified the JSON template to include the storageBlobDataContributor role ID and added the corresponding role assignment resource. - Enhanced the document processing Jupyter notebook with detailed logging and output messages for various stages of processing, including document uploads and crash report generation. --- challenge-0/iac/azuredeploy.bicep | 10 + challenge-0/iac/azuredeploy.json | 14 + challenge-1/1.document-processing.ipynb | 573 +++++++++++++++++++++++- 3 files changed, 575 insertions(+), 22 deletions(-) diff --git a/challenge-0/iac/azuredeploy.bicep b/challenge-0/iac/azuredeploy.bicep index 1f533cf..59dccdd 100644 --- a/challenge-0/iac/azuredeploy.bicep +++ b/challenge-0/iac/azuredeploy.bicep @@ -300,6 +300,16 @@ resource embeddingDeployment 'Microsoft.CognitiveServices/accounts/deployments@2 Create RBAC assignments for the AI Hub and Project managed identities */ +var storageBlobDataContributorRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') +resource storageBlobDataContributorRoleAssignement 'Microsoft.Authorization/roleAssignments@2022-04-01'= { + scope: storageAccount + name: guid(resourceGroup().id, storageAccount.id, 'Deployer', storageBlobDataContributorRoleId) + properties: { + roleDefinitionId: storageBlobDataContributorRoleId + principalId: az.deployer().objectId + } +} + // Get the Cognitive Services User role definition var cognitiveServicesUserRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908') diff --git a/challenge-0/iac/azuredeploy.json b/challenge-0/iac/azuredeploy.json index 5075d9c..91bd38e 100644 --- a/challenge-0/iac/azuredeploy.json +++ b/challenge-0/iac/azuredeploy.json @@ -51,6 +51,7 @@ "cosmosDbAccountName": "[format('{0}-cosmos-{1}', variables('prefix'), variables('suffix'))]", "cognitiveServicesUserRoleId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'a97b65f3-24c7-4388-baec-2e87135dc908')]", "searchServiceContributorRoleId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '7ca78c08-252a-4471-8644-bb5ff32d4ba0')]", + "storageBlobDataContributorRoleId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe')]", "aiDeveloperRoleId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '64702f94-c441-49e6-a78b-ef80e0188fee')]", "contributorRoleId": "[subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'b24988ac-6180-42a0-ab88-20f7382dd24c')]" }, @@ -260,6 +261,19 @@ "[resourceId('Microsoft.CognitiveServices/accounts/projects', variables('aiFoundryName'), variables('aiProjectName'))]" ] }, + { + "type": "Microsoft.Authorization/roleAssignments", + "apiVersion": "2022-04-01", + "scope": "[format('Microsoft.Storage/storageAccounts/{0}', variables('storageAccountName'))]", + "name": "[guid(resourceGroup().id, resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName')), 'Deployer', variables('storageBlobDataContributorRoleId'))]", + "properties": { + "roleDefinitionId": "[variables('storageBlobDataContributorRoleId')]", + "principalId": "[deployer().objectId]" + }, + "dependsOn": [ + "[resourceId('Microsoft.Storage/storageAccounts', variables('storageAccountName'))]" + ] + }, { "type": "Microsoft.Authorization/roleAssignments", "apiVersion": "2022-04-01", diff --git a/challenge-1/1.document-processing.ipynb b/challenge-1/1.document-processing.ipynb index 57a98fe..293587e 100644 --- a/challenge-1/1.document-processing.ipynb +++ b/challenge-1/1.document-processing.ipynb @@ -35,9 +35,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ All imports successful!\n" + ] + } + ], "source": [ "import os\n", "import json\n", @@ -55,6 +63,7 @@ "from azure.ai.projects.aio import AIProjectClient as AsyncAIProjectClient\n", "from azure.identity import DefaultAzureCredential\n", "from azure.ai.agents.models import MessageRole, ListSortOrder, AzureAISearchTool, AzureAISearchQueryType\n", + "from azure.identity import DefaultAzureCredential\n", "# OpenAI imports\n", "from openai import AzureOpenAI\n", "\n", @@ -74,9 +83,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Configuration loaded successfully!\n", + "📁 Policies directory: data/policies\n", + "📁 Statements directory: data/statements\n", + "📁 Claims directory: data/claims\n", + "🤖 OpenAI Deployment: gpt-4.1-mini\n" + ] + } + ], "source": [ "# Configuration\n", "class Config:\n", @@ -145,17 +166,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " account url https://msagthacksazhryzauqnocb4.blob.core.windows.net\n", + "✅ Azure clients initialized successfully!\n" + ] + } + ], "source": [ "# Initialize Azure clients\n", "def initialize_clients():\n", " \"\"\"Initialize Azure service clients\"\"\"\n", " try:\n", " # Blob Storage client\n", - " blob_service_client = BlobServiceClient.from_connection_string(\n", - " Config.AZURE_STORAGE_CONNECTION_STRING\n", + " credential = DefaultAzureCredential()\n", + " account_url = f\"https://{Config.AZURE_STORAGE_ACCOUNT_NAME}.blob.core.windows.net\"\n", + " print(f\" account url {account_url}\")\n", + " blob_service_client = BlobServiceClient(\n", + " account_url=account_url,\n", + " credential=credential\n", " )\n", " \n", " # Azure OpenAI client\n", @@ -184,9 +218,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🚀 Running enhanced container creation...\n", + "🔍 Testing storage account connection...\n", + "✅ Connected to storage account successfully\n", + " Account kind: StorageV2\n", + " SKU name: Standard_LRS\n", + "\n", + "🔍 Checking existing containers...\n", + "✅ Found 0 existing containers: []\n", + "🔨 Creating container 'policies'...\n", + "✅ Container 'policies' created successfully\n", + "🔨 Creating container 'claims'...\n", + "✅ Container 'claims' created successfully\n", + "🔨 Creating container 'statements'...\n", + "✅ Container 'statements' created successfully\n", + "🔨 Creating container 'processed-documents'...\n", + "✅ Container 'processed-documents' created successfully\n", + "\n", + "📊 Container Creation Summary:\n", + " Successful: 4 - ['policies', 'claims', 'statements', 'processed-documents']\n", + " Failed: 0 - []\n" + ] + } + ], "source": [ "# Enhanced container creation with multiple authentication methods and diagnostics\n", "def create_containers_enhanced(blob_service_client):\n", @@ -281,9 +342,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Document uploader initialized!\n" + ] + } + ], "source": [ "class DocumentUploader:\n", " def __init__(self, blob_service_client):\n", @@ -362,9 +431,172 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "📄 Uploading Policy Documents...\n", + "==================================================\n", + "📤 Uploading 5 files from data/policies to policies...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Uploading files: 0%| | 0/5 [00:00