diff --git a/Final PS Script.txt b/Final PS Script.txt index 096a278..8ffb117 100644 --- a/Final PS Script.txt +++ b/Final PS Script.txt @@ -1879,6 +1879,339 @@ if (Test-Path -Path $sourceFilePath) { Write-Output "Dataflow Backup and Detail Extraction Process Completed. Excel output saved to $combinedExcelOutputPath" + + + +#### Start of Notebook Backup and Detail Extract #### + +# Function to download and extract Fabric Notebooks +function Export-FabricNotebook { + param ( + [string]$WorkspaceId, + [string]$NotebookId, + [string]$WorkspaceName, + [string]$NotebookName, + [string]$OutputPath, + [string]$AccessToken + ) + + # Clean up names + $cleanWorkspaceName = $WorkspaceName -replace '\[', '(' -replace '\]', ')' + $cleanWorkspaceName = $cleanWorkspaceName -replace "[^a-zA-Z0-9\(\)&,.-]", " " + $cleanWorkspaceName = $cleanWorkspaceName.TrimStart() + + $cleanNotebookName = $NotebookName -replace '\[', '(' -replace '\]', ')' + $cleanNotebookName = $cleanNotebookName -replace "[^a-zA-Z0-9\(\)&,.-]", " " + $cleanNotebookName = $cleanNotebookName.TrimStart() + + $fabricApiBaseUri = "https://api.fabric.microsoft.com/v1/workspaces" + $endpoint = "$fabricApiBaseUri/$WorkspaceId/notebooks/$NotebookId/getDefinition" + + $headers = @{ + "Authorization" = "Bearer $AccessToken" + "Content-Type" = "application/json" + } + + try { + Write-Output "Calling Fabric API: $endpoint" + $response = Invoke-RestMethod -Uri $endpoint -Method POST -Headers $headers -Body "{}" + + if (-not $response.definition.parts) { + Write-Warning "No definition parts found in response for notebook '$NotebookName'" + return $null + } + + Write-Output "Found $($response.definition.parts.Count) part(s) in notebook definition" + + # Create temp directory for extraction + $tempDir = Join-Path $env:TEMP "FabricNotebook_$NotebookId" + if (Test-Path $tempDir) { + Remove-Item -Path $tempDir -Recurse -Force + } + New-Item -Path $tempDir -ItemType Directory | Out-Null + + # Extract files + foreach ($part in $response.definition.parts) { + $fileName = $part.path + $filePath = Join-Path $tempDir $fileName + $payloadType = $part.payloadType + $payload = $part.payload + + if ($payloadType -eq "InlineBase64") { + $bytes = [System.Convert]::FromBase64String($payload) + $fileDir = Split-Path $filePath + if (-not (Test-Path $fileDir)) { + New-Item -ItemType Directory -Path $fileDir -Force | Out-Null + } + [System.IO.File]::WriteAllBytes($filePath, $bytes) + } + } + + # Find the notebook.ipynb file + $notebookFile = Get-ChildItem -Path $tempDir -Filter "notebook.ipynb" -Recurse | Select-Object -First 1 + + if ($notebookFile) { + Write-Output "Found notebook file: $($notebookFile.FullName)" + # Read the notebook file content + $notebookContent = Get-Content -Path $notebookFile.FullName -Raw -Encoding UTF8 + + # Save as .ipynb + $outputFileName = "$cleanWorkspaceName ~ $cleanNotebookName.ipynb" + $outputFilePath = Join-Path $OutputPath $outputFileName + Write-Output "Saving notebook to: $outputFilePath" + $notebookContent | Out-File -FilePath $outputFilePath -Encoding UTF8 + Write-Output "File saved successfully. File size: $((Get-Item $outputFilePath).Length) bytes" + + # Clean up temp directory + Remove-Item -Path $tempDir -Recurse -Force + + return @{ + Content = $notebookContent + FilePath = $outputFilePath + } + } else { + Write-Warning "No notebook.ipynb file found in extracted files for notebook '$NotebookName'" + Write-Output "Files found in temp directory:" + Get-ChildItem -Path $tempDir -Recurse | ForEach-Object { Write-Output " $($_.FullName)" } + Remove-Item -Path $tempDir -Recurse -Force + return $null + } + + } catch { + Write-Warning "Failed to export notebook '$NotebookName' (ID: $NotebookId): $_" + return $null + } +} + +# Function to parse notebook content and extract cells +function Parse-NotebookContent { + param ( + [string]$Content, + [string]$NotebookId, + [string]$NotebookName, + [string]$WorkspaceName, + [datetime]$ReportDate + ) + + $cells = @() + + # Clean up names + $cleanWorkspaceName = $WorkspaceName -replace '\[', '(' -replace '\]', ')' + $cleanWorkspaceName = $cleanWorkspaceName -replace "[^a-zA-Z0-9\(\)&,.-]", " " + $cleanWorkspaceName = $cleanWorkspaceName.TrimStart() + + $cleanNotebookName = $NotebookName -replace '\[', '(' -replace '\]', ')' + $cleanNotebookName = $cleanNotebookName -replace "[^a-zA-Z0-9\(\)&,.-]", " " + $cleanNotebookName = $cleanNotebookName.TrimStart() + + try { + # Parse JSON content + $notebookJson = $Content | ConvertFrom-Json + + # Extract cells from notebook + if ($notebookJson.cells) { + $cellIndex = 0 + foreach ($cell in $notebookJson.cells) { + $cellIndex++ + + $cellType = $cell.cell_type + $cellSource = "" + + # Handle source as array or string + if ($cell.source -is [Array]) { + $cellSource = $cell.source -join "" + } else { + $cellSource = $cell.source + } + + # Skip if empty + if ([string]::IsNullOrWhiteSpace($cellSource)) { + continue + } + + $cells += [PSCustomObject]@{ + "Notebook ID" = $NotebookId + "Notebook Name" = $NotebookName + "Cell Index" = $cellIndex + "Cell Type" = $cellType + "Cell Content" = $cellSource.Trim() + "Report Date" = $ReportDate + "Workspace Name - Notebook Name" = "$cleanWorkspaceName ~ $cleanNotebookName" + } + } + } + } catch { + # If parsing fails, return empty array + Write-Warning "Failed to parse notebook content for '$NotebookName': $_" + } + + Write-Output "Parsed $($cells.Count) cell(s) from notebook '$NotebookName'" + return $cells +} + + + + +Write-Output "Notebook Backup and Detail Extraction Process Started" + +# Define the notebook backups path +$notebookBackupsPath = Join-Path -Path $baseFolderPath -ChildPath "Notebook Backups" + +# Check if the "Notebook Backups" folder exists, if not create it +if (-not (Test-Path -Path $notebookBackupsPath)) { + New-Item -Path $notebookBackupsPath -ItemType Directory +} + +# Create a new folder for the backups +$notebook_new_date_folder = Join-Path -Path $notebookBackupsPath -ChildPath $date +New-Item -Path $notebook_new_date_folder -ItemType Directory -Force + +# Set the base output file path +$notebookBaseOutputFilePath = $notebook_new_date_folder + +# Get the latest folder by date +$latestNotebookDateFolder = Get-ChildItem -Path $notebookBackupsPath -Directory | + Where-Object { $_.Name -match '^\d{4}-\d{2}-\d{2}$' } | # Filter for folders with a date pattern + Sort-Object { [datetime]::Parse($_.Name) } -Descending | # Sort by date, descending + Select-Object -First 1 + +if ($latestNotebookDateFolder) { + # Override with the latest-dated folder + $notebookFolderPath = $latestNotebookDateFolder.FullName + Write-Host "Using the latest dated folder: $notebookFolderPath" + # Get the current date from the folder name + $notebookCurrentDate = [datetime]::Parse($latestNotebookDateFolder.Name) +} else { + # Use current folder if no previous folders found + $notebookFolderPath = $notebook_new_date_folder + Write-Host "No previous dated folders found. Using current folder: $notebookFolderPath" + # Use the current date variable that was set earlier + $notebookCurrentDate = Get-Date +} + +# Set the combined Excel output path +$combinedNotebookExcelOutputPath = Join-Path -Path $notebook_new_date_folder -ChildPath "Notebook Detail.xlsx" + +# Define the headers +$notebookHeaders = @("Notebook ID", "Notebook Name", "Cell Index", "Cell Type", "Cell Content", "Report Date", "Workspace Name - Notebook Name") + +# Initialize a combined DataTable with the specified headers +$combinedNotebookDataTable = New-Object System.Data.DataTable +foreach ($header in $notebookHeaders) { + $combinedNotebookDataTable.Columns.Add($header, [System.String]) +} + +# Loop through all workspaces to fetch Notebooks (Fabric Items API) +foreach ($workspace in $workspacesInfo) { + $workspaceName = $workspace.WorkspaceName + $workspaceId = $workspace.WorkspaceId + + # Set the Fabric API URL for items + $fabricItemsUrl = "https://api.fabric.microsoft.com/v1/workspaces/$workspaceId/items" + + try { + # Get the list of items in the workspace + $fabricItemsResponse = Invoke-PowerBIRestMethod -Method GET -Url $fabricItemsUrl | ConvertFrom-Json + + # Filter for Notebook items + $notebooks = $fabricItemsResponse.value | Where-Object { $_.type -eq 'Notebook' } + + if ($notebooks -and $notebooks.Count -gt 0) { + Write-Output "Found $($notebooks.Count) notebook(s) in workspace: $workspaceName" + + # Iterate through the Notebooks + foreach ($notebook in $notebooks) { + $notebookId = $notebook.id + $notebookName = $notebook.displayName + + Write-Output "Processing notebook: $notebookName" + + # Get current access token + $currentAccessToken = Get-CurrentAccessToken + + # Download and extract Fabric notebook + $fabricNotebookResult = Export-FabricNotebook -WorkspaceId $workspaceId ` + -NotebookId $notebookId ` + -WorkspaceName $workspaceName ` + -NotebookName $notebookName ` + -OutputPath $notebookBaseOutputFilePath ` + -AccessToken $currentAccessToken + + if ($fabricNotebookResult) { + Write-Output "Successfully exported notebook: $notebookName" + # Parse the notebook content + $parsedCells = Parse-NotebookContent -Content $fabricNotebookResult.Content ` + -NotebookId $notebookId ` + -NotebookName $notebookName ` + -WorkspaceName $workspaceName ` + -ReportDate $notebookCurrentDate + + Write-Output "Adding $($parsedCells.Count) cell(s) to DataTable" + # Add parsed cells to combined DataTable + foreach ($cell in $parsedCells) { + $row = $combinedNotebookDataTable.NewRow() + $row["Notebook ID"] = $cell."Notebook ID" + $row["Notebook Name"] = $cell."Notebook Name" + $row["Cell Index"] = $cell."Cell Index" + $row["Cell Type"] = $cell."Cell Type" + $row["Cell Content"] = $cell."Cell Content" + $row["Report Date"] = $cell."Report Date" + $row["Workspace Name - Notebook Name"] = $cell."Workspace Name - Notebook Name" + $combinedNotebookDataTable.Rows.Add($row) + } + } else { + Write-Warning "Failed to export notebook '$notebookName' - no result returned" + } + } + } else { + Write-Output "No notebooks found in workspace: $workspaceName" + } + } catch { + Write-Warning "Failed to fetch notebooks for workspace: $workspaceName. Error: $_" + } +} + +# Check if the combined DataTable has any rows, if not add a dummy row with headers only +if ($combinedNotebookDataTable.Rows.Count -eq 0) { + Write-Warning "No notebook cells were added to DataTable. Adding empty row." + $row = $combinedNotebookDataTable.NewRow() + foreach ($header in $notebookHeaders) { + $row[$header] = "" + } + $combinedNotebookDataTable.Rows.Add($row) +} else { + Write-Output "Total rows in DataTable: $($combinedNotebookDataTable.Rows.Count)" +} + +# Export the combined DataTable to an Excel file +Write-Output "Exporting to Excel: $combinedNotebookExcelOutputPath" +$combinedNotebookDataTable | Export-Excel -Path $combinedNotebookExcelOutputPath -AutoSize +Write-Host "Data exported to $combinedNotebookExcelOutputPath" + +# Copy the Excel file to the base folder for easy access +$notebookFileName = "Notebook Detail.xlsx" +$notebookSourceFilePath = Join-Path -Path $notebook_new_date_folder -ChildPath $notebookFileName +$notebookDestinationFilePath = Join-Path -Path $baseFolderPath -ChildPath $notebookFileName + +# Check if the source file exists +if (Test-Path -Path $notebookSourceFilePath) { + # Remove the destination file if it already exists + if (Test-Path -Path $notebookDestinationFilePath) { + Remove-Item -Path $notebookDestinationFilePath -Force + } + + # Copy the source file to the destination + Copy-Item -Path $notebookSourceFilePath -Destination $notebookDestinationFilePath + Write-Host "Copied Notebook Detail.xlsx to $baseFolderPath" +} else { + Write-Warning "Source file not found: $notebookSourceFilePath" +} + +Write-Output "Notebook Backup and Detail Extraction Process Completed. Excel output saved to $combinedNotebookExcelOutputPath" + + + + # Stop the background job after script completion Stop-Job -Name "TokenRefreshJob" Remove-Job -Name "TokenRefreshJob" diff --git a/README.md b/README.md index 7ea2e8a..2444335 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,13 @@ # Power BI Governance & Impact Analysis Solution ## What It Does -This provides a quick and automated way to identify where and how specific fields, measures, and tables are used across Power BI reports in all workspaces by analyzing the visual object layer. It also backs up and breaks down the details of your models, reports, and dataflows for easy review, giving you an all-in-one **Power BI Governance** solution. +This provides a quick and automated way to identify where and how specific fields, measures, and tables are used across Power BI reports in all workspaces by analyzing the visual object layer. It also backs up and breaks down the details of your models, reports, dataflows, and notebooks for easy review, giving you an all-in-one **Power BI Governance** solution. ### Key Features: - **Impact Analysis**: Fully understand the downstream impact of data model changes, ensuring you don’t accidentally break visuals or dashboards—especially when reports connected to a model span multiple workspaces. - **Used and Unused Objects**: Identify which tables, columns, and measures are actively used and where. Equally as important, see what isn't used and can be safely removed from your model to save space and complexity. -- **Comprehensive Environment Overview**: Gain a clear, detailed view of your entire Power BI environment, including complete breakdowns of your models, reports, and dataflows and their dependencies. -- **Backup Solution**: Automatically backs up every model, report, and dataflow for safekeeping. +- **Comprehensive Environment Overview**: Gain a clear, detailed view of your entire Power BI environment, including complete breakdowns of your models, reports, dataflows, and notebooks and their dependencies. +- **Backup Solution**: Automatically backs up every model, report, dataflow, and notebook for safekeeping. - **User-Friendly Output**: Results are presented in a Power BI model, making them easy to explore, analyze, and share with your team. . @@ -152,27 +152,35 @@ a popup will allow you to choose which workspaces you run this against. Select A - Must have edit rights on the related dataflow. 'Ownership' of the Dataflow is not required. Works with all Pro, Premium Capacity, Fabric Capacity workspaces. 'My Workspace' also included. - image -### 5. Model Connection Details Metadata Extract +### 5. Notebook Backup and Metadata Extract +- Extracts Fabric Notebooks from workspaces, backing up the full notebook definition and extracting cell content. +- Leverages the Fabric Items API to download notebook definitions and parse cell content. +- All backups are saved with the following format: Workspace Name ~ Notebook Name.ipynb in a "Notebook Backups" folder with dated subfolders. +- Extracts cell-level metadata (cell type, content, index) into a "Notebook Detail.xlsx" file similar to Dataflow Detail. +- Works with Fabric Capacity workspaces where Notebooks are available. +- Requires read access to the notebooks in the workspace. + +### 6. Model Connection Details Metadata Extract - Leverages Power BI REST API to gather all model connection details. - Exports the extracted metadata into the same structured excel workbook as the Power BI Environment Information Extract - You must have read permissions on the related model. -### 6. Model Refresh History Metadata Extract +### 7. Model Refresh History Metadata Extract - Leverages Power BI REST API to gather all model refresh history (limited to the same history shown in the Service). - Exports the extracted metadata into the same structured excel workbook as the Power BI Environment Detail Extract - You must have read permissions on the related model. -### 7. Dataflow Connection Details Metadata Extract +### 8. Dataflow Connection Details Metadata Extract - Leverages Power BI REST API to gather all Dataflow connection details. - Exports the extracted metadata into the same structured excel workbook as the Power BI Environment Detail Extract - You must have read permissions on the related Dataflow. -### 8. Dataflow Refresh History Metadata Extract +### 9. Dataflow Refresh History Metadata Extract - Leverages Power BI REST API to gather all Dataflow refresh history (limited to the same history shown in the Service). - Exports the extracted metadata into the same structured excel workbook as the Power BI Environment Detail Extract - You must have read permissions on the related Dataflow. -### 9. Power BI Governance Model +### 10. Power BI Governance Model - Combines extracts into a Semantic Model to allow easy exploring, impact analysis, and governance of all Power BI Reports, Models, and Dataflows across all Workspaces - Works for anyone who runs the script and has at least 1 model and report. Dataflow not required. - Public example (limited due to no filter pane): https://app.powerbi.com/view?r=eyJrIjoiNmMxYWQ2ZTItZDM4ZS00MGM1LTlhMDQtN2I1OTMwMzI0OTg2IiwidCI6ImUyY2Y4N2QyLTYxMjktNGExYS1iZTczLTEzOGQyY2Y5OGJlMiJ9