From e99deb8f0dc8d3fead7baaaf849f2d8ce1d62e8c Mon Sep 17 00:00:00 2001 From: Miguel Soriano Date: Mon, 12 Jan 2026 18:58:38 +0100 Subject: [PATCH 1/4] feat: introduce azure rp registration controller in backend As part of it, we also introduce types to deserialize the azure runtime configuration file, as well as types to interact with the ResourceProviders azure client and read the FPA identity provided to the backend component. --- backend/azure_config_wiring.go | 87 +++++++ backend/fpa_wiring.go | 74 ++++++ backend/go.mod | 5 +- backend/go.sum | 4 + backend/main.go | 28 ++- .../config/v1/types_azure_runtime_config.go | 224 ++++++++++++++++++ .../v1/types_tls_certificates_config.go | 125 ++++++++++ .../pkg/azure/client/fpa_client_builder.go | 75 ++++++ .../azure/client/resource_providers_client.go | 39 +++ .../azure/config/azure_cloud_environment.go | 135 +++++++++++ backend/pkg/azure/config/azure_config.go | 33 +++ backend/pkg/azure/validation/resourceid.go | 201 ++++++++++++++++ backend/pkg/azure/validation/service_url.go | 45 ++++ .../azure_rp_registration_validation.go | 83 +++++++ 14 files changed, 1147 insertions(+), 11 deletions(-) create mode 100644 backend/azure_config_wiring.go create mode 100644 backend/fpa_wiring.go create mode 100644 backend/pkg/apis/config/v1/types_azure_runtime_config.go create mode 100644 backend/pkg/apis/config/v1/types_tls_certificates_config.go create mode 100644 backend/pkg/azure/client/fpa_client_builder.go create mode 100644 backend/pkg/azure/client/resource_providers_client.go create mode 100644 backend/pkg/azure/config/azure_cloud_environment.go create mode 100644 backend/pkg/azure/config/azure_config.go create mode 100644 backend/pkg/azure/validation/resourceid.go create mode 100644 backend/pkg/azure/validation/service_url.go create mode 100644 backend/pkg/controllers/validationcontrollers/validations/azure_rp_registration_validation.go diff --git a/backend/azure_config_wiring.go b/backend/azure_config_wiring.go new file mode 100644 index 0000000000..3159ba2e1f --- /dev/null +++ b/backend/azure_config_wiring.go @@ -0,0 +1,87 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + + "go.opentelemetry.io/otel/trace" + + k8soperation "k8s.io/apimachinery/pkg/api/operation" + + "sigs.k8s.io/yaml" + + apisconfigv1 "github.com/Azure/ARO-HCP/backend/pkg/apis/config/v1" + azureconfig "github.com/Azure/ARO-HCP/backend/pkg/azure/config" +) + +func loadAzureRuntimeConfig(ctx context.Context, path string) (*apisconfigv1.AzureRuntimeConfig, error) { + if len(path) == 0 { + return nil, fmt.Errorf("configuration path is required") + } + + rawBytes, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("error reading file %s: %w", path, err) + } + + var config apisconfigv1.AzureRuntimeConfig + err = yaml.Unmarshal(rawBytes, &config) + if err != nil { + return nil, fmt.Errorf("error unmarshaling file %s: %w", path, err) + } + + validationErrors := config.Validate(ctx, k8soperation.Operation{Type: k8soperation.Create}) + if len(validationErrors) > 0 { + return nil, + fmt.Errorf("error validating file: %s: %w", path, validationErrors.ToAggregate()) + } + + return &config, nil +} + +func buildAzureConfig(azureRuntimeConfig *apisconfigv1.AzureRuntimeConfig, tracerProvider trace.TracerProvider) (*azureconfig.AzureConfig, error) { + cloudEnvironment, err := azureconfig.NewAzureCloudEnvironment(azureRuntimeConfig.CloudEnvironmentName, tracerProvider) + if err != nil { + return nil, fmt.Errorf("error building azure cloud environment configuration: %w", err) + } + + out := &azureconfig.AzureConfig{ + CloudEnvironment: cloudEnvironment, + AzureRuntimeConfig: azureRuntimeConfig, + } + + return out, err +} + +func getAzureConfig(ctx context.Context, azureRuntimeConfigPath string, tracerProvider trace.TracerProvider) (*azureconfig.AzureConfig, error) { + if len(azureRuntimeConfigPath) == 0 { + return nil, nil + } + + azureRuntimeConfig, err := loadAzureRuntimeConfig(ctx, azureRuntimeConfigPath) + if err != nil { + return nil, fmt.Errorf("error loading azure runtime config: %w", err) + } + + azureConfig, err := buildAzureConfig(azureRuntimeConfig, tracerProvider) + if err != nil { + return nil, fmt.Errorf("error building azure configuration: %w", err) + } + + return azureConfig, nil +} diff --git a/backend/fpa_wiring.go b/backend/fpa_wiring.go new file mode 100644 index 0000000000..cf7efb6059 --- /dev/null +++ b/backend/fpa_wiring.go @@ -0,0 +1,74 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "log/slog" + "time" + + "github.com/go-logr/logr" + + azureclient "github.com/Azure/ARO-HCP/backend/pkg/azure/client" + azureconfig "github.com/Azure/ARO-HCP/backend/pkg/azure/config" + "github.com/Azure/ARO-HCP/internal/fpa" + "github.com/Azure/ARO-HCP/internal/utils" +) + +func getFirstPartyApplicationClientBuilder( + ctx context.Context, fpaCertBundlePath string, fpaClientID string, + azureConfig *azureconfig.AzureConfig, +) (azureclient.FirstPartyApplicationClientBuilder, error) { + if len(fpaCertBundlePath) == 0 || len(fpaClientID) == 0 { + return nil, nil + } + + // TODO temporary until internal FPA types have been updated to + // use logr.Logger or just receiving from context. + logrLogger := utils.LoggerFromContext(ctx) + slogLogger := slog.New(logr.ToSlogHandler(logrLogger)) + + // Create FPA TokenCredentials with watching + certReader, err := fpa.NewWatchingFileCertificateReader( + ctx, + fpaCertBundlePath, + 1*time.Minute, + slogLogger, + ) + if err != nil { + return nil, fmt.Errorf("failed to create certificate reader: %w", err) + } + + // We create the FPA token credential retriever here. Then we pass it to the cluster inflights controller, + // which then is used to instantiate a validation that uses the FPA token credential retriever. And then the + // validations uses the retriever to retrieve a token credential based on the information associated to the + // cluster(the tenant of the cluster, the subscription id, ...) + fpaTokenCredRetriever, err := fpa.NewFirstPartyApplicationTokenCredentialRetriever( + slogLogger, + fpaClientID, + certReader, + *azureConfig.CloudEnvironment.AZCoreClientOptions(), + ) + if err != nil { + return nil, fmt.Errorf("failed to create FPA token credential retriever: %w", err) + } + + fpaClientBuilder := azureclient.NewFirstPartyApplicationClientBuilder( + fpaTokenCredRetriever, azureConfig.CloudEnvironment.ARMClientOptions(), + ) + + return fpaClientBuilder, nil +} diff --git a/backend/go.mod b/backend/go.mod index 41eafd2649..5d0fc5305d 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -6,6 +6,7 @@ require ( github.com/Azure/ARO-HCP/internal v0.0.0-00010101000000-000000000000 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos v1.4.1 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel v0.4.0 github.com/go-logr/logr v1.4.3 github.com/openshift-online/ocm-sdk-go v0.1.480 @@ -21,6 +22,7 @@ require ( k8s.io/client-go v0.34.1 k8s.io/klog/v2 v2.130.1 k8s.io/utils v0.0.0-20250820121507-0af2bda4dd1d + sigs.k8s.io/yaml v1.6.0 ) require ( @@ -28,7 +30,7 @@ require ( github.com/Azure/azure-sdk-for-go v68.0.0+incompatible // indirect github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect - github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -116,7 +118,6 @@ require ( sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect - sigs.k8s.io/yaml v1.6.0 // indirect ) replace github.com/Azure/ARO-HCP/internal => ../internal diff --git a/backend/go.sum b/backend/go.sum index da38110c70..33e8f02734 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -12,6 +12,10 @@ github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos v1.4.1 h1:ToPLhnXvatKVN4Zkcx github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos v1.4.1/go.mod h1:Krtog/7tz27z75TwM5cIS8bxEH4dcBUezcq+kGVeZEo= github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0/go.mod h1:LRr2FzBTQlONPPa5HREE5+RjSCTXl7BwOvYOaWTqCaI= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0 h1:akP6VpxJGgQRpDR1P462piz/8OhYLRCreDj48AyNabc= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0/go.mod h1:8wzvopPfyZYPaQUoKW87Zfdul7jmJMDfp/k7YY3oJyA= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE= github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel v0.4.0 h1:RTTsXUJWn0jumeX62Mb153wYXykqnrzYBYDeHp0kiuk= diff --git a/backend/main.go b/backend/main.go index f9591ac94c..b6078c59d6 100644 --- a/backend/main.go +++ b/backend/main.go @@ -42,10 +42,6 @@ import ( "k8s.io/klog/v2" utilsclock "k8s.io/utils/clock" - "github.com/Azure/azure-sdk-for-go/sdk/azcore" - "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" - "github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel" - ocmsdk "github.com/openshift-online/ocm-sdk-go" "github.com/Azure/ARO-HCP/backend/oldoperationscanner" @@ -203,15 +199,23 @@ func Run(cmd *cobra.Command, args []string) error { return fmt.Errorf("could not initialize opentelemetry sdk: %w", err) } + otelTracerProvider := otel.GetTracerProvider() + + azureConfig, err := getAzureConfig(ctx, argAzureRuntimeConfigPath, otelTracerProvider) + if err != nil { + return fmt.Errorf("error getting azure configuration: %w", err) + } + + fpaClientBuilder, err := getFirstPartyApplicationClientBuilder(ctx, argAzureFPACertBundlePath, argAzureFPAClientID, azureConfig) + if err != nil { + return fmt.Errorf("error configuring FPA client builder: %w", err) + } + // Create the database client. cosmosDatabaseClient, err := database.NewCosmosDatabaseClient( argCosmosURL, argCosmosName, - azcore.ClientOptions{ - // FIXME Cloud should be determined by other means. - Cloud: cloud.AzurePublic, - TracingProvider: azotel.NewTracingProvider(otel.GetTracerProvider(), nil), - }, + *azureConfig.CloudEnvironment.PolicyClientOptions(), ) if err != nil { return fmt.Errorf("failed to create the CosmosDB client: %w", err) @@ -390,6 +394,11 @@ func Run(cmd *cobra.Command, args []string) error { dbClient, subscriptionLister, ) + azureRPRegistrationValidationController = validationcontrollers.NewClusterValidationController( + validations.NewAzureResourceProvidersRegistrationValidation(fpaClientBuilder), + dbClient, + subscriptionLister, + ) ) le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ @@ -415,6 +424,7 @@ func Run(cmd *cobra.Command, args []string) error { go cosmosMatchingExternalAuthController.Run(ctx, 20) go cosmosMatchingClusterController.Run(ctx, 20) go alwaysSuccessClusterValidationController.Run(ctx, 20) + go azureRPRegistrationValidationController.Run(ctx, 20) }, OnStoppedLeading: func() { operationsScanner.LeaderGauge.Set(0) diff --git a/backend/pkg/apis/config/v1/types_azure_runtime_config.go b/backend/pkg/apis/config/v1/types_azure_runtime_config.go new file mode 100644 index 0000000000..dbe642db6a --- /dev/null +++ b/backend/pkg/apis/config/v1/types_azure_runtime_config.go @@ -0,0 +1,224 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "context" + "fmt" + "net/url" + "strings" + + "k8s.io/apimachinery/pkg/api/operation" + "k8s.io/apimachinery/pkg/api/validate" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/validation/field" + + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + + "github.com/Azure/ARO-HCP/backend/pkg/azure/validation" +) + +// AzureRuntimeConfig represents user provided Azure related configuration for running the service +type AzureRuntimeConfig struct { + // Cloud environment where the service is running on + CloudEnvironmentName CloudEnvironmentName `json:"cloudEnvironmentName"` + // The ID of the tenant where the service is running on + ServiceTenantID string `json:"tenantID"` + // Azure Container Registry containing OCP Images + OCPImagesACR AzureContainerRegistry `json:"ocpImagesACR"` + // Data plane identities OIDC configuration + DataPlaneIdentitiesOIDCConfiguration DataPlaneIdentitiesOIDCConfiguration `json:"dataPlaneIdentitiesOIDCConfiguration"` + // ManagedIdentitiesDataPlaneAudienceResource is the endpoint used to connect with the + // Managed Identities Resource Provider (MI RP). The scheme must be https. + // The system's certificate store is used to verify the OIDC issuer's certificate. + ManagedIdentitiesDataPlaneAudienceResource string `json:"managedIdentitiesDataPlaneAudienceResource"` + // TLSCertificatesConfig holds the configuration used to generate TLS + // certificates for user-facing apis, such as kube-apiserver and ingress. + // This config is optional. When provided (and with enabled: true), TLS + // certificates will be provisioned in Azure Key Vault for the kube-apiserver + // and ingress. When not provided (or when enabled: false), the default + // Hypershift generated certificates are used instead, and Azure Key Vault + // generation is skipped entirely. + TLSCertificatesConfig TLSCertificatesConfig `json:"tlsCertificatesConfig"` +} + +// Validate performs validation on the AzureRuntimeConfig properties +func (c AzureRuntimeConfig) Validate(ctx context.Context, op operation.Operation) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, c.CloudEnvironmentName.Validate(ctx, op, field.NewPath("cloudEnvironment"))...) + + errs = append(errs, validate.RequiredValue(ctx, op, field.NewPath("tenantID"), &c.ServiceTenantID, nil)...) + + errs = append(errs, c.OCPImagesACR.Validate(ctx, op, field.NewPath("ocpImagesACR"))...) + + errs = append(errs, c.DataPlaneIdentitiesOIDCConfiguration.Validate(ctx, op, field.NewPath("dataPlaneIdentitiesOIDCConfiguration"))...) + + errs = append(errs, c.validateManagedIdentitiesDataPlaneAudienceResource( + ctx, op, field.NewPath("managedIdentitiesDataPlaneAudienceResource"))..., + ) + + errs = append(errs, c.TLSCertificatesConfig.Validate(ctx, op, field.NewPath("tlsCertificatesConfig"))...) + + return errs +} + +func (c AzureRuntimeConfig) validateManagedIdentitiesDataPlaneAudienceResource(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath, &c.ManagedIdentitiesDataPlaneAudienceResource, nil)...) + + if len(c.ManagedIdentitiesDataPlaneAudienceResource) > 0 { + u, err := url.Parse(c.ManagedIdentitiesDataPlaneAudienceResource) + if err == nil { + if u.Scheme != "https" { + errs = append(errs, field.Invalid(fldPath, c.ManagedIdentitiesDataPlaneAudienceResource, + "attribute must have a 'https' scheme")) + } + } else { + errs = append(errs, field.Invalid(fldPath, c.ManagedIdentitiesDataPlaneAudienceResource, + fmt.Sprintf("attribute is not a valid url: %v", err))) + } + } + + return errs +} + +// CloudEnvironmentName represents the cloud environment where the service is running on +// Accepted values are: +// - AzureChinaCloud +// - AzurePublicCloud +// - AzureUSGovernmentCloud +type CloudEnvironmentName string + +const ( + AzureChinaCloud CloudEnvironmentName = "AzureChinaCloud" + AzurePublicCloud CloudEnvironmentName = "AzurePublicCloud" + AzureUSGovernmentCloud CloudEnvironmentName = "AzureUSGovernmentCloud" +) + +var ( + // validCloudEnvironmentNames is a set of valid cloud environment names. As of now, + // we have only verified AzurePublicCloud. + validCloudEnvironmentNames = sets.New[CloudEnvironmentName]( + AzurePublicCloud, + AzureUSGovernmentCloud, + AzureChinaCloud, + ) +) + +func (c CloudEnvironmentName) Validate(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + return validate.Enum(ctx, op, fldPath, &c, nil, validCloudEnvironmentNames) +} + +type DataPlaneIdentitiesOIDCConfiguration struct { + // Name of the storage account blob container + StorageAccountBlobContainerName string `json:"storageAccountBlobContainerName"` + // URL of the storage account blob service, e.g. https://.blob.core.windows.net/ + // The system's certificate store is used to verify the certificate. + StorageAccountBlobServiceURL string `json:"storageAccountBlobServiceURL"` + // OIDC base issuer URL, e.g. https://.z1.web.core.windows.net/ + // The system's certificate store is used to verify the certificate. + OIDCIssuerBaseURL string `json:"oidcIssuerBaseURL"` +} + +type AzureContainerRegistry struct { + // Resource Id of the Azure Container Registry + ResourceID *azcorearm.ResourceID `json:"resourceID"` + // Hostname of the Azure Container Registry. + // It should only contain the hostname, without any protocol, port or paths. + // The system's certificate store is used to verify the certificate. + Hostname string `json:"hostname"` + // Scope map name for the Azure Container Registry repository + ScopeMapName string `json:"scopeMapName"` +} + +func (r *AzureContainerRegistry) validateACRHostname(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath, &r.Hostname, nil)...) + + if strings.Contains(r.Hostname, "://") { + errs = append(errs, field.Invalid(fldPath, r.Hostname, "url scheme is not allowed")) + } + + // adds protocol for parsing to ensure that the host is set correctly when parsed, otherwise it is set as a + // path in the parsed url + parsedURL, err := url.Parse("http://" + r.Hostname) + if err == nil { + // the given acr url should be the same as the parsed url's hostname, which does not include any ports and paths + if parsedURL.Hostname() != r.Hostname { + errs = append(errs, field.Invalid(fldPath, r.Hostname, "cannot contain port or paths")) + } + + splitUrl := strings.Split(r.Hostname, ".") + nameFromUrl := splitUrl[0] + if r.ResourceID.Name != nameFromUrl { + errs = append(errs, field.Invalid(fldPath, r.Hostname, "contains incorrect resource name")) + } + } else { + errs = append(errs, field.Invalid(fldPath, r.Hostname, fmt.Sprintf("url is not valid: %v", err))) + } + + return errs +} + +func (r AzureContainerRegistry) Validate(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredPointer(ctx, op, fldPath.Child("resourceID"), r.ResourceID, nil)...) + errs = append(errs, validation.ValidateACRResourceID(ctx, op, fldPath.Child("resourceID"), r.ResourceID)...) + + errs = append(errs, r.validateACRHostname(ctx, op, fldPath.Child("hostname"))...) + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath.Child("scopeMapName"), &r.ScopeMapName, nil)...) + + return errs +} + +// Validate - returns an error if the given data plane OIDC configuration was not specified or is not supported +func (c DataPlaneIdentitiesOIDCConfiguration) Validate(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath.Child("storageAccountBlobContainerName"), &c.StorageAccountBlobContainerName, nil)...) + + errs = append(errs, c.validateStorageAccountBlobServiceURL(ctx, op, fldPath.Child("storageAccountBlobServiceURL"))...) + + errs = append(errs, c.validateOIDCIssuerBaseURL(ctx, op, fldPath.Child("oidcIssuerBaseURL"))...) + + return errs +} + +func (c DataPlaneIdentitiesOIDCConfiguration) validateStorageAccountBlobServiceURL(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath, &c.StorageAccountBlobServiceURL, nil)...) + if len(c.StorageAccountBlobServiceURL) > 0 { + errs = append(errs, validation.ValidateAzureServiceURL(ctx, op, fldPath, c.StorageAccountBlobServiceURL)...) + } + + return errs +} + +func (c DataPlaneIdentitiesOIDCConfiguration) validateOIDCIssuerBaseURL(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, validate.RequiredValue(ctx, op, fldPath, &c.OIDCIssuerBaseURL, nil)...) + if len(c.OIDCIssuerBaseURL) > 0 { + errs = append(errs, validation.ValidateAzureServiceURL(ctx, op, fldPath, c.OIDCIssuerBaseURL)...) + } + + return errs +} diff --git a/backend/pkg/apis/config/v1/types_tls_certificates_config.go b/backend/pkg/apis/config/v1/types_tls_certificates_config.go new file mode 100644 index 0000000000..3dc525bdea --- /dev/null +++ b/backend/pkg/apis/config/v1/types_tls_certificates_config.go @@ -0,0 +1,125 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package v1 + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/api/operation" + "k8s.io/apimachinery/pkg/api/validate" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +// TLSCertificatesConfig holds the configuration used to generate tls +// certificates for user-facing apis, such as kube-apiserver and ingress. +type TLSCertificatesConfig struct { + // Issuer holds the issuer used to generated the TLS certificates in + // Azure Key vault. When CertificatesGenerationSource is AzureKeyVault, + // Issuer is required. Only used when CertificatesGenerationSource is + // AzureKeyVault. + Issuer TLSCertificateIssuerType `json:"issuer"` + // CertificatesGenerationSource indicates what is the source to be used to + // generate the TLS certificates. Required. + CertificatesGenerationSource CertificatesGenerationSource `json:"certificatesGenerationSource"` +} + +// TLSCertificateIssuerType indicates the issuer used to generated the TLS +// certificates in Azure Key vault. +type TLSCertificateIssuerType string + +const ( + // TLSCertificateIssuerSelf generates tls certificates with a self signed issuer + TLSCertificateIssuerSelf TLSCertificateIssuerType = "Self" + // TLSCertificateIssuerOneCert generates tls certificates with Microsoft's + // OneCertV2-PublicCA issuer + TLSCertificateIssuerOneCert TLSCertificateIssuerType = "OneCertV2-PublicCA" +) + +var ( + // validIssuerTypes is a set of valid issuer types. + validIssuerTypes = sets.New[TLSCertificateIssuerType]( + TLSCertificateIssuerSelf, + TLSCertificateIssuerOneCert, + ) +) + +// CertificatesGenerationSource indicates what is the source to be used to +// generate the TLS certificates. +type CertificatesGenerationSource string + +const ( + // CertificatesGenerationSourceAzureKeyVault signals TLS certificates to be + // generated in Azure Key Vault. + CertificatesGenerationSourceAzureKeyVault CertificatesGenerationSource = "AzureKeyVault" + // CertificatesGenerationSourceHypershift signals TLS certificates to be + // generated using the default Hypershift generated TLS Certificates. + CertificatesGenerationSourceHypershift CertificatesGenerationSource = "NotSupportedMayRemove_Hypershift" +) + +var ( + // validCertificatesGenerationSources is a set of valid certificates generation sources. + validCertificatesGenerationSources = sets.New[CertificatesGenerationSource]( + CertificatesGenerationSourceAzureKeyVault, + CertificatesGenerationSourceHypershift, + ) +) + +func (tlsConfig TLSCertificatesConfig) Validate(ctx context.Context, op operation.Operation, fldPath *field.Path) field.ErrorList { + errs := field.ErrorList{} + + errs = append(errs, tlsConfig.validateCertificatesGenerationSource(ctx, op, fldPath.Child("certificatesGenerationSource"))...) + + errs = append(errs, tlsConfig.validateIssuer(ctx, op, fldPath.Child("issuer"), fldPath.Child("certificatesGenerationSource"))...) + + return errs +} + +func (tlsConfig TLSCertificatesConfig) validateCertificatesGenerationSource( + ctx context.Context, op operation.Operation, fldPath *field.Path, +) field.ErrorList { + return validate.Enum(ctx, op, fldPath, + &tlsConfig.CertificatesGenerationSource, nil, validCertificatesGenerationSources, + ) +} + +func (tlsConfig TLSCertificatesConfig) validateIssuer( + ctx context.Context, op operation.Operation, fldPath *field.Path, certSourceFldPath *field.Path, +) field.ErrorList { + var errs field.ErrorList + + errs = append(errs, validate.Enum(ctx, op, fldPath, &tlsConfig.Issuer, nil, validIssuerTypes)...) + + if tlsConfig.CertificatesGenerationSource == CertificatesGenerationSourceHypershift && len(tlsConfig.Issuer) > 0 { + errs = append(errs, field.Forbidden(fldPath, + fmt.Sprintf("attribute is not allowed when %s is %s", + certSourceFldPath, + CertificatesGenerationSourceHypershift, + ), + )) + } + + if tlsConfig.CertificatesGenerationSource == CertificatesGenerationSourceAzureKeyVault && len(tlsConfig.Issuer) == 0 { + errs = append(errs, field.Required(fldPath, + fmt.Sprintf("attribute is required when %s is %s", + certSourceFldPath, + CertificatesGenerationSourceAzureKeyVault, + ), + )) + } + + return errs +} diff --git a/backend/pkg/azure/client/fpa_client_builder.go b/backend/pkg/azure/client/fpa_client_builder.go new file mode 100644 index 0000000000..9a4481bc23 --- /dev/null +++ b/backend/pkg/azure/client/fpa_client_builder.go @@ -0,0 +1,75 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" + + "github.com/Azure/ARO-HCP/internal/fpa" +) + +// FirstPartyApplicationClientBuilderType is a type that represents the type of the FPAClientBuilder +// interface. It is used to ensure that that interface is incompatible +// with other client builder interfaces that might have the same set of +// methods +type FirstPartyApplicationClientBuilderType string + +const ( + // FirstPartyApplicationClientBuilderTypeValue is the value of the FPABuilderType type that + // represents the FPA client builder. + FirstPartyApplicationClientBuilderTypeValue FirstPartyApplicationClientBuilderType = "FPA" +) + +type FirstPartyApplicationClientBuilder interface { + // BuilderType returns the type of the client builder. Its only + // purpose is to ensure that this interface is incompatible + // with other client builder interfaces that might have the same + // set of methods. In that way we ensure that they cannot be used + // interchangeably. + BuilderType() FirstPartyApplicationClientBuilderType + ResourceProvidersClient(tenantID string, subscriptionID string) (ResourceProvidersClient, error) +} + +type firstPartyApplicationClientBuilder struct { + fpaTokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever + options *azcorearm.ClientOptions +} + +var _ FirstPartyApplicationClientBuilder = (*firstPartyApplicationClientBuilder)(nil) + +// NewFirstPartyApplicationClientBuilder instantiates a FPAClientBuilder. When clients are instantiated with it the FPA token credential +// retriever is leveraged to get a FPA Token Credential, and the provided ARM client options. +func NewFirstPartyApplicationClientBuilder( + tokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever, options *azcorearm.ClientOptions, +) FirstPartyApplicationClientBuilder { + return &firstPartyApplicationClientBuilder{ + fpaTokenCredRetriever: tokenCredRetriever, + options: options, + } +} + +func (b *firstPartyApplicationClientBuilder) ResourceProvidersClient(tenantID string, subscriptionID string) (ResourceProvidersClient, error) { + creds, err := b.fpaTokenCredRetriever.RetrieveCredential(tenantID) + if err != nil { + return nil, err + } + + return armresources.NewProvidersClient(subscriptionID, creds, b.options) +} + +func (b *firstPartyApplicationClientBuilder) BuilderType() FirstPartyApplicationClientBuilderType { + return FirstPartyApplicationClientBuilderTypeValue +} diff --git a/backend/pkg/azure/client/resource_providers_client.go b/backend/pkg/azure/client/resource_providers_client.go new file mode 100644 index 0000000000..91f74876ae --- /dev/null +++ b/backend/pkg/azure/client/resource_providers_client.go @@ -0,0 +1,39 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package client + +import ( + "context" + + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" +) + +// ResourceProvidersClient is an interface that defines the methods that +// we want to use from the ProvidersClient type in the Azure Go SDK +// (https://github.com/Azure/azure-sdk-for-go/tree/main/sdk/resourcemanager/resources/armresources). +// The aim is to only contain methods that are defined in the Azure Go SDK +// ProvidersClient client. +// If you need to use a method provided by the Azure Go SDK ProvidersClient +// client but it is not defined in this interface then it has to be added here and all +// the types implementing this interface have to implement the new method. +type ResourceProvidersClient interface { + Get(ctx context.Context, resourceProviderNamespace string, + options *armresources.ProvidersClientGetOptions) (armresources.ProvidersClientGetResponse, error) +} + +// interface guard to ensure that all methods defined in the ResourceProvidersClient +// interface are implemented by the real Azure Go SDK ProvidersClient +// client. This interface guard should always compile +var _ ResourceProvidersClient = (*armresources.ProvidersClient)(nil) diff --git a/backend/pkg/azure/config/azure_cloud_environment.go b/backend/pkg/azure/config/azure_cloud_environment.go new file mode 100644 index 0000000000..3d597d78b8 --- /dev/null +++ b/backend/pkg/azure/config/azure_cloud_environment.go @@ -0,0 +1,135 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + "fmt" + + "go.opentelemetry.io/otel/trace" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel" + + apisconfigv1 "github.com/Azure/ARO-HCP/backend/pkg/apis/config/v1" +) + +// AzureCloudEnvironment represents an Azure cloud environment. +type AzureCloudEnvironment struct { + // Configuration of the cloud environment + configuration *cloud.Configuration + // RDBMS scope of the cloud environment + rdbmsScope string + // Check Access V2 environment of the cloud environment + checkAccessV2Environment *checkAccessV2Environment + // Options for the Azure clients. + clientOptions *policy.ClientOptions +} + +// checkAccessV2Environment represents the information associated to Microsoft's +// Check Access V2 API. +type checkAccessV2Environment struct { + // domainSuffix is the domain suffix used as part of the domain name + // of the endpoint of the Check Access V2 API. + domainSuffix string + // scope is the permission scope to be requested for the access token to + // communicate with the Check Access V2 API. + scope string +} + +func NewAzureCloudEnvironment( + cloudEnvironmentName apisconfigv1.CloudEnvironmentName, tracerProvider trace.TracerProvider, +) (*AzureCloudEnvironment, error) { + if len(cloudEnvironmentName) == 0 { + return nil, fmt.Errorf("cloud environment cannot be empty") + } + + var azureCloudEnvironmentConfigurationMapping = map[apisconfigv1.CloudEnvironmentName]struct { + cloud cloud.Configuration + rdbmsScope string + checkAccessV2Environment checkAccessV2Environment + }{ + apisconfigv1.AzureChinaCloud: { + cloud: cloud.AzureChina, + rdbmsScope: "https://ossrdbms-aad.database.chinacloudapi.cn", + checkAccessV2Environment: checkAccessV2Environment{ + domainSuffix: "azure.cn", + scope: "https://authorization.azure.cn/.default", + }, + }, + apisconfigv1.AzurePublicCloud: { + cloud: cloud.AzurePublic, + rdbmsScope: "https://ossrdbms-aad.database.windows.net/.default", + checkAccessV2Environment: checkAccessV2Environment{ + domainSuffix: "azure.net", + scope: "https://authorization.azure.net/.default", + }, + }, + apisconfigv1.AzureUSGovernmentCloud: { + cloud: cloud.AzureGovernment, + rdbmsScope: "https://ossrdbms-aad.database.usgovcloudapi.net", + checkAccessV2Environment: checkAccessV2Environment{ + domainSuffix: "azure.us", + scope: "https://authorization.azure.us/.default", + }, + }, + } + + configuration, ok := azureCloudEnvironmentConfigurationMapping[cloudEnvironmentName] + if !ok { + return nil, + fmt.Errorf("cloud environment %q is not supported", cloudEnvironmentName) + } + + clientOptions := &policy.ClientOptions{ + Cloud: configuration.cloud, + } + if tracerProvider != nil { + clientOptions.TracingProvider = azotel.NewTracingProvider(tracerProvider, nil) + } + + return &AzureCloudEnvironment{ + configuration: &configuration.cloud, + rdbmsScope: configuration.rdbmsScope, + checkAccessV2Environment: &configuration.checkAccessV2Environment, + clientOptions: clientOptions, + }, nil +} + +// AZCoreClientOptions returns an azcore.ClientOptions instance from the current +// Azure Cloud environment. The method returns the same result as calling +// PolicyClientOptions() because azcore.ClientOptions is a type alias of +// policy.ClientOptions. +func (a AzureCloudEnvironment) AZCoreClientOptions() *azcore.ClientOptions { + return a.clientOptions +} + +// PolicyClientOptions returns a policy.ClientOptions instance from the current +// Azure Cloud environment. The method returns the same result as calling +// AZCoreClientOptions() because azcore.ClientOptions is a type alias of +// policy.ClientOptions. +func (a AzureCloudEnvironment) PolicyClientOptions() *policy.ClientOptions { + return a.clientOptions +} + +// ArmClientOptions returns an arm.ClientOptions instance from the current +// Azure Cloud environment. +func (a AzureCloudEnvironment) ARMClientOptions() *azcorearm.ClientOptions { + return &azcorearm.ClientOptions{ + ClientOptions: *a.clientOptions, + } +} diff --git a/backend/pkg/azure/config/azure_config.go b/backend/pkg/azure/config/azure_config.go new file mode 100644 index 0000000000..84842941c0 --- /dev/null +++ b/backend/pkg/azure/config/azure_config.go @@ -0,0 +1,33 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package config + +import ( + apisconfigv1 "github.com/Azure/ARO-HCP/backend/pkg/apis/config/v1" +) + +// AzureConfig represents Azure related configuration used by the service +type AzureConfig struct { + // Cloud environment where the service is running on + CloudEnvironment *AzureCloudEnvironment + // AzureRuntimeConfig holds additional serialized configuration provided + // to the service via a configuration file. This + // is useful for pulling direct values from it. + AzureRuntimeConfig *apisconfigv1.AzureRuntimeConfig + + // Other attributes in the future like the operators managed identities + // configuration + // OperatorsManagedIdentitiesConfig AzureOperatorsManagedIdentitiesConfig +} diff --git a/backend/pkg/azure/validation/resourceid.go b/backend/pkg/azure/validation/resourceid.go new file mode 100644 index 0000000000..0cb6e7a81d --- /dev/null +++ b/backend/pkg/azure/validation/resourceid.go @@ -0,0 +1,201 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package validation + +import ( + "context" + "fmt" + "strings" + + "k8s.io/apimachinery/pkg/api/operation" + "k8s.io/apimachinery/pkg/util/validation/field" + + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + + "github.com/Azure/ARO-HCP/internal/api" +) + +var ( + subnetResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Network/virtualNetworks/subnets")) + vnetResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Network/virtualNetworks")) + nsgResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Network/networkSecurityGroups")) + publicDNSZoneResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Network/dnsZones")) + userAssignedManagedIdentityType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.ManagedIdentity/userAssignedIdentities")) + acrResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.ContainerRegistry/registries")) + roleDefinitionResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Authorization/roleDefinitions")) + resourceGroupResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Resources/resourceGroups")) + diskEncryptionSetResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.Compute/diskEncryptionSets")) + keyVaultResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.KeyVault/vaults")) + containerServiceManagedClusterResourceType azcorearm.ResourceType = api.Must(azcorearm.ParseResourceType("Microsoft.ContainerService/managedClusters")) +) + +// ValidateResourceGroupScopedResourceID validates that the Azure Resource ID +// `resourceID“ is a valid resource group scoped resource id of the Azure Resource +// Type `resourceType` an Azure Resource ID. It also validates that the +// resource ID has a name. +// The validations that are performed are: +// - The resource type of the resourceID matches the provided resourceType +// - The Azure Subscription ID of resourceID can be parsed and it is not empty +// - The Azure Resource Group Name of resourceID can be parsed and it is not empty +// - The Azure Resource Name of resourceID can be parsed and it is not empty +func ValidateResourceGroupScopedResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, + resourceID *azcorearm.ResourceID, resourceType azcorearm.ResourceType, +) field.ErrorList { + errs := field.ErrorList{} + + if resourceID == nil { + return nil + } + + resourceResourceType := resourceID.ResourceType + if !strings.EqualFold(resourceResourceType.String(), resourceType.String()) { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), fmt.Sprintf("'%s' is not a valid '%s' Resource ID", resourceID.String(), resourceType))) + } + + if len(resourceID.SubscriptionID) == 0 { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), "subscription id could not be parsed")) + } + + if len(resourceID.ResourceGroupName) == 0 { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), "resource group could not be parsed")) + } + + if len(resourceID.Name) == 0 { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), "resource name could not be parsed")) + } + + return errs +} + +// ValidateSubnetResourceID validates that the Azure Subnet Resource ID +// specified in `resourceID` is a valid Azure Subnet Resource ID. +// The validations that are performed are: +// - The resource type of the subnet resource id is the expected one for Azure +// Subnets. +func ValidateSubnetResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, subnetResourceType) +} + +// ValidateVnetResourceID validates that the Azure VNet Resource ID +// specified in `resourceID` is a valid Azure VNet Resource ID. +// The validations that are performed are: +// - The resource type of the vnet resource id is the expected one for Azure +// VNets. +func ValidateVnetResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, vnetResourceType) +} + +// ValidateNetworkSecurityGroupResourceID validates that the Azure Network Security Group +// Resource ID string specified in `resourceID` is a valid Azure Network +// Security Group Resource ID. +// The validations that are performed are: +// - The resource type of the network security group resource id is the +// expected one for Azure Network Security Groups +func ValidateNetworkSecurityGroupResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, nsgResourceType) +} + +// ValidatePublicDNSZoneResourceID validates that the Azure Public DNS Zone +// Resource ID string specified in `resourceID` is a valid Azure Public DNS Zone +// Resource ID. +// The validations that are performed are: +// - The resource type of the Public DNS Zone resource id is the +// expected one for Azure Network Security Groups +func ValidatePublicDNSZoneResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, publicDNSZoneResourceType) +} + +// ValidateUserAssignedManagedIdentity validates that the Azure User-Assigned Managed +// `resourceID` is a valid Azure User-Assigned Managed Identity Resource ID. +// The validations that are performed are: +// - The resource type of the User-Assigned Managed Identity resource id is the +// expected one for Azure User-Assigned Managed Identities +func ValidateUserAssignedManagedIdentity(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, userAssignedManagedIdentityType) +} + +// ValidateACRResourceID validates that the Azure Container Registry +// `resourceID` is a valid Azure Container Registry Resource ID. +// The validations that are performed are: +// - The resource type of the Azure Container Registry resource id is the +// expected one for Azure Container Registries +func ValidateACRResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, acrResourceType) +} + +// ValidateRoleDefinitionResourceID validates that the Azure Role Definition +// `resourceID` is a valid Azure Role Definition Resource ID. +// The validations that are performed are: +// - The resource type of the role definition resource id is the expected one +// for Azure Role Definitions +func ValidateRoleDefinitionResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + var errs field.ErrorList + + if resourceID == nil { + return nil + } + + if !strings.EqualFold(resourceID.ResourceType.String(), roleDefinitionResourceType.String()) { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), fmt.Sprintf("'%s' is not a valid '%s' Resource ID", resourceID.String(), roleDefinitionResourceType))) + } + + return errs +} + +// ValidateResourceGroupResourceID validates that the Azure Resource Group Resource +// `resourceID` is a valid Azure Resource Group Resource ID. +// The validations that are performed are: +// - The resource type of the resource group resource id is the expected one +// for Azure Resource Groups. +func ValidateResourceGroupResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + var errs field.ErrorList + + if resourceID == nil { + return nil + } + + if !strings.EqualFold(resourceID.ResourceType.String(), resourceGroupResourceType.String()) { + errs = append(errs, field.Invalid(fldPath, resourceID.String(), fmt.Sprintf("'%s' is not a valid '%s' Resource ID", resourceID.String(), resourceGroupResourceType))) + } + + return errs +} + +// ValidateDiskEncryptionSetResourceID validates that the Azure Disk Encryption Set +// `resourceID` is a valid Azure Disk Encryption Set Resource ID. +// The validations that are performed are: +// - The resource type of the disk encryption set resource id is the expected one +// for Azure Disk Encryption Sets +func ValidateDiskEncryptionSetResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, diskEncryptionSetResourceType) +} + +// ValidateKeyVaultResourceID validates that the Azure Key Vault Resource +// `resourceID` is a valid Azure Key Vault Resource ID. +// The validations that are performed are: +// - The resource type of the key vault resource id is the expected one +// for Azure Key Vaults. +func ValidateKeyVaultResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, keyVaultResourceType) +} + +// ValidateContainerServiceManagedClusterResourceID validates that the Azure Container Service Managed Cluster, +// (also known as AKS Cluster) `resourceID` is a valid Azure Container Service Managed Cluster Resource ID. +// The validations that are performed are: +// - The resource type of the container service managed cluster resource id is the +// expected one for Azure Container Service Managed Clusters. +func ValidateContainerServiceManagedClusterResourceID(ctx context.Context, op operation.Operation, fldPath *field.Path, resourceID *azcorearm.ResourceID) field.ErrorList { + return ValidateResourceGroupScopedResourceID(ctx, op, fldPath, resourceID, containerServiceManagedClusterResourceType) +} diff --git a/backend/pkg/azure/validation/service_url.go b/backend/pkg/azure/validation/service_url.go new file mode 100644 index 0000000000..4c4b3f30cb --- /dev/null +++ b/backend/pkg/azure/validation/service_url.go @@ -0,0 +1,45 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package validation + +import ( + "context" + "fmt" + "net/url" + + "k8s.io/apimachinery/pkg/api/operation" + "k8s.io/apimachinery/pkg/util/validation/field" +) + +// ValidateAzureServiceURL ensures the URL is parsable, with +// scheme "https", and the path is "/". +func ValidateAzureServiceURL(_ context.Context, _ operation.Operation, fldPath *field.Path, rawURL string) field.ErrorList { + errs := field.ErrorList{} + parsedURL, err := url.Parse(rawURL) + if err != nil { + errs = append(errs, field.Invalid(fldPath, rawURL, fmt.Sprintf("attribute is not a valid azure service url: %v", err))) + return errs + } + + if parsedURL.Scheme != "https" { + errs = append(errs, field.Invalid(fldPath, rawURL, "the URL is expected to be of scheme 'HTTPS'")) + } + + if parsedURL.Path != "/" { + errs = append(errs, field.Invalid(fldPath, rawURL, "the URL is expected to be with path '/'")) + } + + return errs +} diff --git a/backend/pkg/controllers/validationcontrollers/validations/azure_rp_registration_validation.go b/backend/pkg/controllers/validationcontrollers/validations/azure_rp_registration_validation.go new file mode 100644 index 0000000000..9a46e16b85 --- /dev/null +++ b/backend/pkg/controllers/validationcontrollers/validations/azure_rp_registration_validation.go @@ -0,0 +1,83 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package validations + +import ( + "context" + "fmt" + "strings" + + azureclient "github.com/Azure/ARO-HCP/backend/pkg/azure/client" + "github.com/Azure/ARO-HCP/internal/api" + "github.com/Azure/ARO-HCP/internal/api/arm" + "github.com/Azure/ARO-HCP/internal/utils" +) + +// The RpRegistrationValidation struct validates the states of several +// Azure Resource Providers associated with a clusters region, subscription, etc. +type AzureResourceProvidersRegistrationValidation struct { + azureFPAClientBuilder azureclient.FirstPartyApplicationClientBuilder +} + +func NewAzureResourceProvidersRegistrationValidation( + azureFPAClientBuilder azureclient.FirstPartyApplicationClientBuilder, +) *AzureResourceProvidersRegistrationValidation { + return &AzureResourceProvidersRegistrationValidation{ + azureFPAClientBuilder: azureFPAClientBuilder, + } +} + +func (v *AzureResourceProvidersRegistrationValidation) Name() string { + return "AzureResourceProvidersRegistrationValidation" +} + +func (v *AzureResourceProvidersRegistrationValidation) Validate( + ctx context.Context, clusterSubscription *arm.Subscription, cluster *api.HCPOpenShiftCluster, +) error { + resourceProvidersToCheck := []string{ + "Microsoft.Authorization", + "Microsoft.Compute", + "Microsoft.Network", + "Microsoft.Storage", + } + + missingResourcesProviders := []string{} + + rpClient, err := v.azureFPAClientBuilder.ResourceProvidersClient( + *clusterSubscription.Properties.TenantId, + cluster.ID.SubscriptionID, + ) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get resource providers client: %w", err)) + } + + for _, rp := range resourceProvidersToCheck { + providerResp, err := rpClient.Get(ctx, rp, nil) + if err != nil { + return err + } + if providerResp.RegistrationState == nil || + *providerResp.RegistrationState != "Registered" { + missingResourcesProviders = append(missingResourcesProviders, rp) + } + } + + if len(missingResourcesProviders) > 0 { + return utils.TrackError(fmt.Errorf("%v of the resource providers are not registered, or their state is empty: %s", + len(missingResourcesProviders), strings.Join(missingResourcesProviders, ", "))) + } + + return nil +} From 09a526621438cd42c92a2b4f553ed566449a1145 Mon Sep 17 00:00:00 2001 From: Miguel Soriano Date: Fri, 16 Jan 2026 19:09:06 +0100 Subject: [PATCH 2/4] feat: prepare hcpcluster resource group existence inflight check logic We introduce inflight check that verifies that the Azure Resource Group part of the hcpcluster resourceid of the cluster being created exists. To create an hcpcluster the resource group where is to be created must exist beforehand. --- .golangci.yml | 2 + ...ter_resource_group_existence_validation.go | 52 +++++++++++++++++++ backend/pkg/azure/client/errors.go | 13 +++++ .../azure/client/resource_groups_client.go | 21 ++++++++ 4 files changed, 88 insertions(+) create mode 100644 backend/controllers/azure_hcp_cluster_resource_group_existence_validation.go create mode 100644 backend/pkg/azure/client/errors.go create mode 100644 backend/pkg/azure/client/resource_groups_client.go diff --git a/.golangci.yml b/.golangci.yml index b8f4e38e06..2a99ecacd5 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -33,6 +33,8 @@ linters: alias: $1$2 - pkg: github.com/Azure/ARO-HCP/test/sdk/v20240610preview/resourcemanager/redhatopenshifthcp/armredhatopenshifthcp" alias: hcpsdk20240610preview + - pkg: github.com/Azure/ARO-HCP/backend/pkg/azure/client + alias: azureclient staticcheck: dot-import-whitelist: - "github.com/onsi/ginkgo" diff --git a/backend/controllers/azure_hcp_cluster_resource_group_existence_validation.go b/backend/controllers/azure_hcp_cluster_resource_group_existence_validation.go new file mode 100644 index 0000000000..ea954ea55f --- /dev/null +++ b/backend/controllers/azure_hcp_cluster_resource_group_existence_validation.go @@ -0,0 +1,52 @@ +package controllers + +import ( + "context" + "fmt" + + azureclient "github.com/Azure/ARO-HCP/backend/pkg/azure/client" + "github.com/Azure/ARO-HCP/internal/api" + "github.com/Azure/ARO-HCP/internal/api/arm" + "github.com/Azure/ARO-HCP/internal/utils" +) + +// AzureHCPClusterResourceGroupExistenceValidation validates that the Azure Resource +// Group part of the HCP Cluster Resource ID being created exists beforehand. +type AzureHCPClusterResourceGroupExistenceValidation struct { + azureFPAClientBuilder azureclient.FPAClientBuilder +} + +func NewAzureHCPClusterResourceGroupExistenceValidation( + azureFPAClientBuilder azureclient.FPAClientBuilder, +) *AzureHCPClusterResourceGroupExistenceValidation { + return &AzureHCPClusterResourceGroupExistenceValidation{ + azureFPAClientBuilder: azureFPAClientBuilder, + } +} + +func (a *AzureHCPClusterResourceGroupExistenceValidation) Name() string { + return "azure-cluster-resource-group-existence-validation" +} + +func (a *AzureHCPClusterResourceGroupExistenceValidation) Validate( + ctx context.Context, clusterSubscription *arm.Subscription, cluster *api.HCPOpenShiftCluster, +) error { + rgClient, err := a.azureFPAClientBuilder.ResourceGroupsClient( + *clusterSubscription.Properties.TenantId, + cluster.ID.SubscriptionID, + ) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get resource groups client: %w", err)) + } + + _, err = rgClient.Get(ctx, cluster.ID.ResourceGroupName, nil) + if azureclient.IsResourceGroupNotFoundErr(err) { + return utils.TrackError(fmt.Errorf("resource group does not exist: %w", err)) + } + + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get resource group: %w", err)) + } + + return nil +} diff --git a/backend/pkg/azure/client/errors.go b/backend/pkg/azure/client/errors.go new file mode 100644 index 0000000000..3006976bc0 --- /dev/null +++ b/backend/pkg/azure/client/errors.go @@ -0,0 +1,13 @@ +package client + +import ( + "errors" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore" +) + +// IsResourceGroupNotFoundErr is used to determine if we are failing to find a resource group within azure. +func IsResourceGroupNotFoundErr(err error) bool { + var azErr *azcore.ResponseError + return errors.As(err, &azErr) && azErr.ErrorCode == "ResourceGroupNotFound" +} diff --git a/backend/pkg/azure/client/resource_groups_client.go b/backend/pkg/azure/client/resource_groups_client.go new file mode 100644 index 0000000000..55921cc9c7 --- /dev/null +++ b/backend/pkg/azure/client/resource_groups_client.go @@ -0,0 +1,21 @@ +package client + +import ( + "context" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources" +) + +type ResourceGroupsClient interface { + CreateOrUpdate(ctx context.Context, resourceGroupName string, parameters armresources.ResourceGroup, + options *armresources.ResourceGroupsClientCreateOrUpdateOptions) ( + armresources.ResourceGroupsClientCreateOrUpdateResponse, error) + BeginDelete(ctx context.Context, resourceGroupName string, + options *armresources.ResourceGroupsClientBeginDeleteOptions) ( + *runtime.Poller[armresources.ResourceGroupsClientDeleteResponse], error) + Get(ctx context.Context, resourceGroupName string, options *armresources.ResourceGroupsClientGetOptions) ( + armresources.ResourceGroupsClientGetResponse, error) +} + +var _ ResourceGroupsClient = (*armresources.ResourceGroupsClient)(nil) From 3dd70daeb2277a38ca05c55406370b33a488686a Mon Sep 17 00:00:00 2001 From: Miguel Soriano Date: Fri, 16 Jan 2026 16:20:53 +0100 Subject: [PATCH 3/4] feat: add ARO-HCP Clusters Managed Identities existence cluster validation This commit also introduces the ability to authenticate as the cluster's service managed identity, which is required to be able to instantiate a user-assigned identities client that uses it, which has enough permissions to check the existence of the cluster-scoped user-provided azure managed identities associated to the cluster's operators. The service managed identity (SMI) is a cluster-scoped azure user-assigned managed identity. This identity is used to interact with Azure resources that are created and provided by the end-user. To get be able to authenticate as the cluster's managed identity, credentials need to be retrieved for them. To achieve that, this commit also introduces the ability to interact with Microsoft's Managed Identities Data Plane service. This service is only available in environments where Microsoft's First Party Application integration is available. For the environments where the First Party Application integration is not available we cannot communicate with the Managed Identities Data Plane service, so instead we use a mock implementation of the ManagedIdentitiesDataplaneClient that always returns a single Azure Service Principal identity representing a Managed Identity. This commit also introduces this mock client implementation. --- backend/fpa_wiring.go | 65 +++++++++- backend/go.mod | 7 ++ backend/go.sum | 16 +++ backend/main.go | 104 ++++++++++++--- backend/pkg/azure/client/errors.go | 9 ++ .../client/fpa_mi_dataplane_client_builder.go | 75 +++++++++++ ...dentity_fpa_mi_dataplane_client_builder.go | 38 ++++++ .../hardcoded_identity_mi_dataplane_client.go | 115 +++++++++++++++++ .../pkg/azure/client/mi_dataplane_client.go | 26 ++++ .../pkg/azure/client/smi_client_builder.go | 119 ++++++++++++++++++ .../client/user_assigned_identities_client.go | 38 ++++++ .../azure/config/azure_cloud_environment.go | 4 + .../azure_cluster_mis_existence_validation.go | 91 ++++++++++++++ 13 files changed, 689 insertions(+), 18 deletions(-) create mode 100644 backend/pkg/azure/client/fpa_mi_dataplane_client_builder.go create mode 100644 backend/pkg/azure/client/hardcoded_identity_fpa_mi_dataplane_client_builder.go create mode 100644 backend/pkg/azure/client/hardcoded_identity_mi_dataplane_client.go create mode 100644 backend/pkg/azure/client/mi_dataplane_client.go create mode 100644 backend/pkg/azure/client/smi_client_builder.go create mode 100644 backend/pkg/azure/client/user_assigned_identities_client.go create mode 100644 backend/pkg/controllers/validationcontrollers/validations/azure_cluster_mis_existence_validation.go diff --git a/backend/fpa_wiring.go b/backend/fpa_wiring.go index cf7efb6059..835cfb5d9d 100644 --- a/backend/fpa_wiring.go +++ b/backend/fpa_wiring.go @@ -16,8 +16,10 @@ package main import ( "context" + "encoding/base64" "fmt" "log/slog" + "os" "time" "github.com/go-logr/logr" @@ -28,10 +30,10 @@ import ( "github.com/Azure/ARO-HCP/internal/utils" ) -func getFirstPartyApplicationClientBuilder( - ctx context.Context, fpaCertBundlePath string, fpaClientID string, - azureConfig *azureconfig.AzureConfig, -) (azureclient.FirstPartyApplicationClientBuilder, error) { +func getFirstPartyApplicationTokenCredentialRetriever( + ctx context.Context, fpaCertBundlePath string, + fpaClientID string, azureConfig *azureconfig.AzureConfig, +) (fpa.FirstPartyApplicationTokenCredentialRetriever, error) { if len(fpaCertBundlePath) == 0 || len(fpaClientID) == 0 { return nil, nil } @@ -66,9 +68,64 @@ func getFirstPartyApplicationClientBuilder( return nil, fmt.Errorf("failed to create FPA token credential retriever: %w", err) } + return fpaTokenCredRetriever, nil +} + +func getFirstPartyApplicationClientBuilder( + fpaTokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever, azureConfig *azureconfig.AzureConfig, +) (azureclient.FirstPartyApplicationClientBuilder, error) { fpaClientBuilder := azureclient.NewFirstPartyApplicationClientBuilder( fpaTokenCredRetriever, azureConfig.CloudEnvironment.ARMClientOptions(), ) return fpaClientBuilder, nil } + +func getFirstPartyApplicationManagedIdentitiesDataplaneClientBuilder( + fpaTokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever, + azureMIMockSPCertBundlePath string, azureMIMockSPClientID string, azureMIMockSPPrincipalID string, azureMIMockSPTenantID string, + azureConfig *azureconfig.AzureConfig, +) (azureclient.FPAMIDataplaneClientBuilder, error) { + + if len(azureMIMockSPCertBundlePath) == 0 || len(azureMIMockSPClientID) == 0 || len(azureMIMockSPPrincipalID) == 0 { + // TODO if we want to support detecting when the cert bundle path content + // changes, we could use a file watcher similar to the one used in the + // fpa token credential retriever, and pass that retriever to the client + // builder. + bundle, err := os.ReadFile(azureMIMockSPCertBundlePath) + if err != nil { + return nil, fmt.Errorf("failed to read bundle file: %w", err) + } + bundleBase64Encoded := base64.StdEncoding.EncodeToString(bundle) + hardcodedIdentity := &azureclient.HardcodedIdentity{ + ClientID: azureMIMockSPClientID, + ClientSecret: bundleBase64Encoded, + PrincipalID: azureMIMockSPPrincipalID, + TenantID: azureMIMockSPTenantID, + } + hardcodedIdentityFPAMIDataplaneClientBuilder := azureclient.NewHardcodedIdentityFPAMIDataplaneClientBuilder( + azureConfig.CloudEnvironment.CloudConfiguration(), + hardcodedIdentity, + ) + return hardcodedIdentityFPAMIDataplaneClientBuilder, nil + } + + fpaMIdataplaneClientBuilder := azureclient.NewFPAMIDataplaneClientBuilder( + azureConfig.AzureRuntimeConfig.ServiceTenantID, + fpaTokenCredRetriever, + azureConfig.AzureRuntimeConfig.ManagedIdentitiesDataPlaneAudienceResource, + azureConfig.CloudEnvironment.AZCoreClientOptions(), + ) + + return fpaMIdataplaneClientBuilder, nil +} + +func getServiceManagedIdentityClientBuilderFactory( + fpaMIdataplaneClientBuilder azureclient.FPAMIDataplaneClientBuilder, + azureConfig *azureconfig.AzureConfig, +) azureclient.ServiceManagedIdentityClientBuilderFactory { + return azureclient.NewServiceManagedIdentityClientBuilderFactory( + fpaMIdataplaneClientBuilder, + azureConfig.CloudEnvironment.ARMClientOptions(), + ) +} diff --git a/backend/go.mod b/backend/go.mod index 5d0fc5305d..3304bfda46 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -6,8 +6,10 @@ require ( github.com/Azure/ARO-HCP/internal v0.0.0-00010101000000-000000000000 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 github.com/Azure/azure-sdk-for-go/sdk/data/azcosmos v1.4.1 + github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel v0.4.0 + github.com/Azure/msi-dataplane v0.4.3 github.com/go-logr/logr v1.4.3 github.com/openshift-online/ocm-sdk-go v0.1.480 github.com/prometheus/client_golang v1.23.2 @@ -31,7 +33,10 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -41,6 +46,7 @@ require ( github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/evanphx/json-patch v5.9.11+incompatible // indirect github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/fxamacker/cbor/v2 v2.9.0 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.21.1 // indirect @@ -100,6 +106,7 @@ require ( go.yaml.in/yaml/v2 v2.4.2 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/crypto v0.46.0 // indirect + golang.org/x/exp v0.0.0-20250911091902-df9299821621 // indirect golang.org/x/net v0.47.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sys v0.39.0 // indirect diff --git a/backend/go.sum b/backend/go.sum index 33e8f02734..2d8768de57 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -14,16 +14,28 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDo github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0 h1:PTFGRSlMKCQelWwxUyYVEUqseBJVemLyqWJjvMyt0do= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v2 v2.0.0/go.mod h1:LRr2FzBTQlONPPa5HREE5+RjSCTXl7BwOvYOaWTqCaI= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsIIvxVT+uE6yrNldntJKlLRgxGbZ85kgtz5SNBhMw= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0/go.mod h1:AW8VEadnhw9xox+VaVd9sP7NjzOAnaZBLRH6Tq3cJ38= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0 h1:akP6VpxJGgQRpDR1P462piz/8OhYLRCreDj48AyNabc= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/managementgroups/armmanagementgroups v1.2.0/go.mod h1:8wzvopPfyZYPaQUoKW87Zfdul7jmJMDfp/k7YY3oJyA= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0 h1:L7G3dExHBgUxsO3qpTGhk/P2dgnYyW48yn7AO33Tbek= +github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi v1.3.0/go.mod h1:Ms6gYEy0+A2knfKrwdatsggTXYA2+ICKug8w7STorFw= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0 h1:Dd+RhdJn0OTtVGaeDLZpcumkIVCtA/3/Fo42+eoYvVM= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armresources v1.2.0/go.mod h1:5kakwfW5CjC9KK+Q4wjXAg+ShuIm2mBMua0ZFj2C8PE= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1 h1:mrkDCdkMsD4l9wjFGhofFHFrV43Y3c53RSLKOCJ5+Ow= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.3.1/go.mod h1:hPv41DbqMmnxcGralanA/kVlfdH5jv3T4LxGku2E1BY= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1 h1:bFWuoEKg+gImo7pvkiQEFAc8ocibADgXeiLAxWhWmkI= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.1.1/go.mod h1:Vih/3yc6yac2JzU4hzpaDupBJP0Flaia9rXXrU8xyww= github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel v0.4.0 h1:RTTsXUJWn0jumeX62Mb153wYXykqnrzYBYDeHp0kiuk= github.com/Azure/azure-sdk-for-go/sdk/tracing/azotel v0.4.0/go.mod h1:k4MMjrPHIEK+umaMGk1GNLgjEybJZ9mHSRDZ+sDFv3Y= +github.com/Azure/msi-dataplane v0.4.3 h1:dWPWzY4b54tLIR9T1Q014Xxd/1DxOsMIp6EjRFAJlQY= +github.com/Azure/msi-dataplane v0.4.3/go.mod h1:yAfxdJyvcnvSDfSyOFV9qm4fReEQDl+nZLGeH2ZWSmw= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs= github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= +github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= +github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -47,6 +59,8 @@ github.com/evanphx/json-patch/v5 v5.9.11 h1:/8HVnzMq13/3x9TPvjG08wUGqBTmZBsCWzjT github.com/evanphx/json-patch/v5 v5.9.11/go.mod h1:3j+LviiESTElxA4p3EMKAB9HXj3/XEtnUf6OZxqIQTM= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -246,6 +260,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= +golang.org/x/exp v0.0.0-20250911091902-df9299821621 h1:2id6c1/gto0kaHYyrixvknJ8tUK/Qs5IsmBtrc+FtgU= +golang.org/x/exp v0.0.0-20250911091902-df9299821621/go.mod h1:TwQYMMnGpvZyc+JpB/UAuTNIsVJifOlSkrZkhcvpVUk= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/backend/main.go b/backend/main.go index b6078c59d6..f200ceabfb 100644 --- a/backend/main.go +++ b/backend/main.go @@ -69,18 +69,22 @@ const ( ) var ( - argKubeconfig string - argNamespace string - argLocation string - argCosmosName string - argCosmosURL string - argClustersServiceURL string - argInsecure bool - argMetricsListenAddress string - argPortListenAddress string - argAzureRuntimeConfigPath string - argAzureFPACertBundlePath string - argAzureFPAClientID string + argKubeconfig string + argNamespace string + argLocation string + argCosmosName string + argCosmosURL string + argClustersServiceURL string + argInsecure bool + argMetricsListenAddress string + argPortListenAddress string + argAzureRuntimeConfigPath string + argAzureFPACertBundlePath string + argAzureFPAClientID string + argAzureMIMockSPCertBundlePath string + argAzureMIMockSPClientID string + argAzureMIMockSPPrincipalID string + argAzureMIMockTenantID string processName = filepath.Base(os.Args[0]) @@ -133,8 +137,58 @@ func init() { "The client id of the first party application identity", ) + rootCmd.Flags().StringVar( + &argAzureMIMockSPCertBundlePath, + "azure-mi-mock-certificate-bundle-path", + "", + "Path to a file containing an X.509 Certificate based client certificate, consisting of a private key and "+ + "certificate chain, in a PEM or PKCS#12 format for authenticating clients with the msi mock identity, which is "+ + "a common Azure Service Principal identity. This flag should only be set in environments where "+ + "Microsoft's MI Dataplane service is not available. "+ + "When set, it must be set in combination with the '--azure-mi-mock-client-id' and "+ + "'--azure-mi-mock-service-principal-id' and '--azure-mi-mock-tenant-id' flags.", + ) + + rootCmd.Flags().StringVar( + &argAzureMIMockSPClientID, + "azure-mi-mock-client-id", + "", + "The client id of the ARO-HCP Clusters Managed Identities (MI) mock identity, which is a common Azure Service Principal identity. "+ + "This flag should only be set in environments where Microsoft's MI Dataplane service is not available. "+ + "When set, it must be set in combination with the '--azure-mi-mock-certificate-bundle-path' and "+ + "'--azure-mi-mock-service-principal-id' and '--azure-mi-mock-tenant-id' flags.", + ) + + rootCmd.Flags().StringVar( + &argAzureMIMockSPPrincipalID, + "azure-mi-mock-service-principal-id", + "", + "The principal id of the ARO-HCP Clusters Managed Identities (MI) mock identity, which is a common Azure Service Principal identity. "+ + "This flag should only be set in environments where Microsoft's MI Dataplane service is not available. "+ + "When set, it must be set in combination with the '--azure-mi-mock-certificate-bundle-path' and "+ + "'--azure-mi-mock-principal-client-id' and '--azure-mi-mock-tenant-id' flags.", + ) + + rootCmd.Flags().StringVar( + &argAzureMIMockTenantID, + "azure-mi-mock-tenant-id", + "", + "The tenant id of the ARO-HCP Clusters Managed Identities (MI) mock identity, which is a common Azure Service Principal identity. "+ + "This flag should only be set in environments where Microsoft's MI Dataplane service is not available. "+ + "When set, it must be set in combination with the '--azure-mi-mock-certificate-bundle-path', "+ + "'--azure-mi-mock-client-id' and '--azure-mi-mock-service-principal-id' flags.", + ) + rootCmd.MarkFlagsRequiredTogether("cosmos-name", "cosmos-url") + // We require that if one of the mi mock service principal flags is set, all of them must be set together. + rootCmd.MarkFlagsRequiredTogether( + "azure-mi-mock-certificate-bundle-path", + "azure-mi-mock-client-id", + "azure-mi-mock-principal-id", + "azure-mi-mock-tenant-id", + ) + rootCmd.Version = version.CommitSHA } @@ -206,11 +260,26 @@ func Run(cmd *cobra.Command, args []string) error { return fmt.Errorf("error getting azure configuration: %w", err) } - fpaClientBuilder, err := getFirstPartyApplicationClientBuilder(ctx, argAzureFPACertBundlePath, argAzureFPAClientID, azureConfig) + fpaTokenCredRetriever, err := getFirstPartyApplicationTokenCredentialRetriever(ctx, argAzureFPACertBundlePath, argAzureFPAClientID, azureConfig) if err != nil { - return fmt.Errorf("error configuring FPA client builder: %w", err) + return fmt.Errorf("error getting FPA token credential retriever: %w", err) } + fpaClientBuilder, err := getFirstPartyApplicationClientBuilder(fpaTokenCredRetriever, azureConfig) + if err != nil { + return fmt.Errorf("error getting FPA client builder: %w", err) + } + + fpaMIDataplaneClientBuilder, err := getFirstPartyApplicationManagedIdentitiesDataplaneClientBuilder( + fpaTokenCredRetriever, + argAzureMIMockSPCertBundlePath, argAzureMIMockSPClientID, argAzureMIMockSPPrincipalID, argAzureMIMockTenantID, + azureConfig, + ) + if err != nil { + return fmt.Errorf("error getting FPA MI dataplane client builder: %w", err) + } + smiClientBuilderFactory := getServiceManagedIdentityClientBuilderFactory(fpaMIDataplaneClientBuilder, azureConfig) + // Create the database client. cosmosDatabaseClient, err := database.NewCosmosDatabaseClient( argCosmosURL, @@ -399,6 +468,12 @@ func Run(cmd *cobra.Command, args []string) error { dbClient, subscriptionLister, ) + + azureClusterManagedIdentitiesExistenceValidationController = validationcontrollers.NewClusterValidationController( + validations.NewAzureClusterManagedIdetitiesExistenceValidation(smiClientBuilderFactory), + dbClient, + subscriptionLister, + ) ) le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ @@ -425,6 +500,7 @@ func Run(cmd *cobra.Command, args []string) error { go cosmosMatchingClusterController.Run(ctx, 20) go alwaysSuccessClusterValidationController.Run(ctx, 20) go azureRPRegistrationValidationController.Run(ctx, 20) + go azureClusterManagedIdentitiesExistenceValidationController.Run(ctx, 20) }, OnStoppedLeading: func() { operationsScanner.LeaderGauge.Set(0) diff --git a/backend/pkg/azure/client/errors.go b/backend/pkg/azure/client/errors.go index 3006976bc0..0bd504a639 100644 --- a/backend/pkg/azure/client/errors.go +++ b/backend/pkg/azure/client/errors.go @@ -11,3 +11,12 @@ func IsResourceGroupNotFoundErr(err error) bool { var azErr *azcore.ResponseError return errors.As(err, &azErr) && azErr.ErrorCode == "ResourceGroupNotFound" } + +// IsResourceNotFoundErr is used to determine if we are failing to find a resource within azure. +// *WARNING* Not all azure API operations return the `ResourceNotFound` error code when the resource +// is not found, and more specific error codes are returned for some of them e.g `RoleAssignmentNotFound` +// is returned when a role assignement is not found +func IsResourceNotFoundErr(err error) bool { + var azErr *azcore.ResponseError + return errors.As(err, &azErr) && azErr.ErrorCode == "ResourceNotFound" +} diff --git a/backend/pkg/azure/client/fpa_mi_dataplane_client_builder.go b/backend/pkg/azure/client/fpa_mi_dataplane_client_builder.go new file mode 100644 index 0000000000..c3210c1a2f --- /dev/null +++ b/backend/pkg/azure/client/fpa_mi_dataplane_client_builder.go @@ -0,0 +1,75 @@ +package client + +import ( + "github.com/Azure/ARO-HCP/internal/fpa" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/msi-dataplane/pkg/dataplane" +) + +// FPAClientBuilderType is a type that represents the type of the MIDataplaneClientBuilder +// interface. It is used to ensure that that interface is incompatible +// with other client builder interfaces that might have the same set of +// methods +type FPAMIDataplaneClientBuilderType string + +const ( + // FPAClientBuilderTypeValue is the value of the FPABuilderType type that + // represents the FPA client builder. + FPAMIDataplaneClientBuilderTypeValue FPAMIDataplaneClientBuilderType = "FPA-MIDP" +) + +// FPAMIDataplaneClientBuilder offers the ability to create Managed Identity Data Plane clients +// authenticating as the the First Party Application (FPA) identity. +type FPAMIDataplaneClientBuilder interface { + BuilderType() FPAMIDataplaneClientBuilderType + // ManagedIdentitiesDataplane returns a new Managed Identity Data Plane client using the given identity URL. + ManagedIdentitiesDataplane(identityURL string) (ManagedIdentitiesDataplaneClient, error) +} + +type fpaMIdataplaneClientBuilder struct { + serviceTenantID string + audience string + fpaTokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever + options *azcore.ClientOptions +} + +var _ FPAMIDataplaneClientBuilder = (*fpaMIdataplaneClientBuilder)(nil) + +func (b *fpaMIdataplaneClientBuilder) BuilderType() FPAMIDataplaneClientBuilderType { + return FPAMIDataplaneClientBuilderTypeValue +} + +func (b *fpaMIdataplaneClientBuilder) ManagedIdentitiesDataplane(identityURL string) (ManagedIdentitiesDataplaneClient, error) { + creds, err := b.fpaTokenCredRetriever.RetrieveCredential( + b.serviceTenantID, + // The MI dataplane client receives tenant from the bearer challenge, we use a widlcard * so as + // to not limit the allowed tenants in the credential. This was taken from + // https://github.com/Azure/ARO-RP/blob/9719391dd5d2213abb1b895e9b9471925f5aec0d/pkg/cluster/cluster.go#L329 + // which was added as part of needed fixes to make Managed Identity work in MSFT Canary env + // in https://github.com/Azure/ARO-RP/pull/3957 + "*", + ) + if err != nil { + return nil, err + } + + dpClientFactory := dataplane.NewClientFactory(creds, b.audience, b.options) + return dpClientFactory.NewClient(identityURL) +} + +// NewFPAMIDataplaneClientBuilder provides a new instance of +// FPAMIDataplaneClientBuilder that allows to retrieve Managed Identities Data Plane clients +// authenticating as the the First Party Application (FPA) identity. +func NewFPAMIDataplaneClientBuilder( + serviceTenantID string, + fpaTokenCredRetriever fpa.FirstPartyApplicationTokenCredentialRetriever, + audience string, options *azcore.ClientOptions, +) FPAMIDataplaneClientBuilder { + + return &fpaMIdataplaneClientBuilder{ + serviceTenantID: serviceTenantID, + fpaTokenCredRetriever: fpaTokenCredRetriever, + audience: audience, + options: options, + } +} diff --git a/backend/pkg/azure/client/hardcoded_identity_fpa_mi_dataplane_client_builder.go b/backend/pkg/azure/client/hardcoded_identity_fpa_mi_dataplane_client_builder.go new file mode 100644 index 0000000000..321fba1f0e --- /dev/null +++ b/backend/pkg/azure/client/hardcoded_identity_fpa_mi_dataplane_client_builder.go @@ -0,0 +1,38 @@ +package client + +import "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" + +// hardcodedIdentityFPAMIDataplaneClientBuilder is used to +// create Managed Identity Data Plane clients based on the +// hardcoded identity implementation of the Managed Identities +// Data Plane client hardcodedIdentityManagedIdentitiesDataplaneClient. +type hardcodedIdentityFPAMIDataplaneClientBuilder struct { + cloudConfiguration *cloud.Configuration + hardcodedIdentity *HardcodedIdentity +} + +var _ FPAMIDataplaneClientBuilder = (*hardcodedIdentityFPAMIDataplaneClientBuilder)(nil) + +func (b *hardcodedIdentityFPAMIDataplaneClientBuilder) BuilderType() FPAMIDataplaneClientBuilderType { + return FPAMIDataplaneClientBuilderTypeValue +} + +// ManagedIdentitiesDataplane returns a new Managed Identity Data Plane client +// based on the hardcoded identity implementation of the Managed Identities +// Data Plane client hardcodedIdentityManagedIdentitiesDataplaneClient. +// The identity URL parameter is not used in the hardcoded identity implementation +// of the managed identities dataplane clientso we ignore it. +func (b *hardcodedIdentityFPAMIDataplaneClientBuilder) ManagedIdentitiesDataplane(_ string) (ManagedIdentitiesDataplaneClient, error) { + return newHardcodedIdentityManagedIdentitiesDataPlaneClient(b.cloudConfiguration, b.hardcodedIdentity), nil +} + +// NewHardcodedIdentityFPAMIDataplaneClientBuilder provides a new instance of +// FPAMIDataplaneClientBuilder that allows to retrieve Managed Identities Data Plane clients +// based on the hardcoded identity implementation of the Managed Identities Data Plane client +// hardcodedIdentityManagedIdentitiesDataplaneClient. +func NewHardcodedIdentityFPAMIDataplaneClientBuilder(cloudConfiguration *cloud.Configuration, hardcodedIdentity *HardcodedIdentity) FPAMIDataplaneClientBuilder { + return &hardcodedIdentityFPAMIDataplaneClientBuilder{ + cloudConfiguration: cloudConfiguration, + hardcodedIdentity: hardcodedIdentity, + } +} diff --git a/backend/pkg/azure/client/hardcoded_identity_mi_dataplane_client.go b/backend/pkg/azure/client/hardcoded_identity_mi_dataplane_client.go new file mode 100644 index 0000000000..b507fc9a8a --- /dev/null +++ b/backend/pkg/azure/client/hardcoded_identity_mi_dataplane_client.go @@ -0,0 +1,115 @@ +package client + +import ( + "context" + "time" + + "k8s.io/utils/ptr" + + "github.com/Azure/azure-sdk-for-go/sdk/azcore/cloud" + "github.com/Azure/msi-dataplane/pkg/dataplane" +) + +// HardcodedIdentity represents the information of an Azure identity +// that will be returned when leveraging the hardcodedIdentityManagedIdentitiesDataPlaneClient +// Data Plane client. HardcodedIdentity support is limited to the following +// Azure identities +// - Azure User-Assigned Managed Identity +// - Azure Service Principal +// It is often the case that what we really want to hardcode is the information +// of an Azure Service Principal instead of a User-Assigned Managed Identity. +type HardcodedIdentity struct { + // ClientID is the Client ID of a valid identity. + ClientID string + // ClientSecret is The base64 encoded bundle + // certificate (public + private key) of the identity. + // The identity is a valid identity credential associated to the identity + // identified by ClientID. + ClientSecret string + // PrincipalID is the Principal ID of the identity identified by ClientID. + // For User-Assigned Managed identities this is the Object (principal) ID + // of the Managed Identity. This is, the Object ID of the service principal + // backing the Managed Identity. + // For Service Principals it is the Object ID of the service principal. + PrincipalID string + // TenantID is the Tenant ID of the identity identified by ClientID. + TenantID string +} + +// hardcodedIdentityManagedIdentitiesDataplaneClient is a mock implementation of the +// ManagedIdentitiesDataplaneClient interface. The Managed Identities Data +// Plane service is only available in Azure tenants where Microsoft's +// First Party Application (FPA) integration is available. For the environments +// where the FPA integration is not enabled we cannot communicate with the +// Managed Identities Data Plane service so instead we use this mock implementation +// of the client, where all requests made with it return a single +// Azure Service Principal identity, disguised as a Managed Identity from the +// point of view of the consumers of the client. We commonly refer to this +// identity as the "mock MSI" (also known as mi mock) identity. +type hardcodedIdentityManagedIdentitiesDataplaneClient struct { + cloudConfiguration *cloud.Configuration + + // hardcodedIdentity represents part of the identity information that will + // be hardcoded and returned in all responses provided by the client. + hardcodedIdentity *HardcodedIdentity +} + +var _ ManagedIdentitiesDataplaneClient = (*hardcodedIdentityManagedIdentitiesDataplaneClient)(nil) + +// GetUserAssignedIdentities returns the User Assigned Managed Identities associated +// The returned results will have the stubbed data provided during construction of the client +// for the client id, client secret and principal id attributes. +func (c *hardcodedIdentityManagedIdentitiesDataplaneClient) GetUserAssignedIdentitiesCredentials(ctx context.Context, + request dataplane.UserAssignedIdentitiesRequest) (*dataplane.ManagedIdentityCredentials, error) { + now := time.Now().UTC() + aHundredYearsFromNow := now.AddDate(100, 0, 0).Format(time.RFC3339) + aDayAgo := now.AddDate(0, 0, -1).Format(time.RFC3339) + managedIdentityCredentials := dataplane.ManagedIdentityCredentials{ + AuthenticationEndpoint: ptr.To(c.cloudConfiguration.ActiveDirectoryAuthorityHost), + NotBefore: ptr.To(aDayAgo), + CannotRenewAfter: ptr.To(aHundredYearsFromNow), + RenewAfter: ptr.To(aHundredYearsFromNow), + NotAfter: ptr.To(aHundredYearsFromNow), + } + + placeholder := "placeholder" + identities := make([]dataplane.UserAssignedIdentityCredentials, len(request.IdentityIDs)) + for i, miResourceID := range request.IdentityIDs { + identity := dataplane.UserAssignedIdentityCredentials{ + ClientID: ptr.To(c.hardcodedIdentity.ClientID), + ClientSecret: ptr.To(c.hardcodedIdentity.ClientSecret), + TenantID: ptr.To(c.hardcodedIdentity.TenantID), + ResourceID: ptr.To(miResourceID), + AuthenticationEndpoint: ptr.To(c.cloudConfiguration.ActiveDirectoryAuthorityHost), + ClientSecretURL: &placeholder, + MtlsAuthenticationEndpoint: &placeholder, + NotBefore: ptr.To(aDayAgo), + CannotRenewAfter: ptr.To(aHundredYearsFromNow), + RenewAfter: ptr.To(aHundredYearsFromNow), + NotAfter: ptr.To(aHundredYearsFromNow), + CustomClaims: &dataplane.CustomClaims{ + XMSAzNwperimid: []string{placeholder}, + XMSAzTm: &placeholder, + }, + // In this specific context Object ID is equivalent to Principal ID + ObjectID: ptr.To(c.hardcodedIdentity.PrincipalID), + } + + identities[i] = identity + } + + managedIdentityCredentials.ExplicitIdentities = identities + return &managedIdentityCredentials, nil +} + +// newHardcodedIdentityManagedIdentitiesDataPlaneClient provides a new instance of +// ManagedIdentitiesDataplaneClient based on the hardcoded identity implementation +// of the Managed Identities Data Plane client hardcodedIdentityManagedIdentitiesDataplaneClient. +func newHardcodedIdentityManagedIdentitiesDataPlaneClient( + cloudConfiguration *cloud.Configuration, hardcodedIdentity *HardcodedIdentity, +) ManagedIdentitiesDataplaneClient { + return &hardcodedIdentityManagedIdentitiesDataplaneClient{ + cloudConfiguration: cloudConfiguration, + hardcodedIdentity: hardcodedIdentity, + } +} diff --git a/backend/pkg/azure/client/mi_dataplane_client.go b/backend/pkg/azure/client/mi_dataplane_client.go new file mode 100644 index 0000000000..0ffc0b04eb --- /dev/null +++ b/backend/pkg/azure/client/mi_dataplane_client.go @@ -0,0 +1,26 @@ +package client + +import ( + "context" + + "github.com/Azure/msi-dataplane/pkg/dataplane" +) + +// ManagedIdentitiesDataplaneClient is the interface to interact with Azure's Managed Identity +// Data Plane service. The Managed Identities Data Plane service is only +// available in Azure tenants where Microsoft's First Party Application (FPA) +// integration is available. For the environments where the FPA integration is not available +// we cannot communicate with the Managed Identities Data Plane service, so +// instead we use a mock implementation of the ManagedIdentitiesDataplaneClient that +// always returns a single Azure Service Principal identity representing +// a Managed Identity. This mock implementation and details on it can be found +// in the hardcodedIdentityManagedIdentitiesDataplaneClient Go type. +// This client is different than Azure Go SDK's armmsi.UserAssignedIdentitiesClient/armmsiSystemAssignedIdentitiesClient +// clients, which are used to interact with the control plane side of the Managed Identities service. +type ManagedIdentitiesDataplaneClient interface { + GetUserAssignedIdentitiesCredentials( + ctx context.Context, request dataplane.UserAssignedIdentitiesRequest, + ) (*dataplane.ManagedIdentityCredentials, error) +} + +var _ ManagedIdentitiesDataplaneClient = (dataplane.Client)(nil) diff --git a/backend/pkg/azure/client/smi_client_builder.go b/backend/pkg/azure/client/smi_client_builder.go new file mode 100644 index 0000000000..712dd770a3 --- /dev/null +++ b/backend/pkg/azure/client/smi_client_builder.go @@ -0,0 +1,119 @@ +package client + +import ( + "context" + "fmt" + + "github.com/Azure/ARO-HCP/internal/utils" + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi" + "github.com/Azure/msi-dataplane/pkg/dataplane" +) + +// ServiceManagedIdentityClientBuilderType is a type that represents the type of the +// ServiceManagedIdentityClientBuilder interface. It is used to ensure that +// that interface is incompatible with other client builder interfaces that +// might have the same set of methods +type ServiceManagedIdentityClientBuilderType string + +const ( + // ServiceManagedIdentityClientBuilderTypeValue is the value of the ServiceManagedIdentityClientBuilderType type that + // represents the SMI client builder. + ServiceManagedIdentityClientBuilderTypeValue ServiceManagedIdentityClientBuilderType = "SMI" +) + +// ServiceManagedIdentityClientBuilder offers the ability tocreate Azure clients +// authenticating as the the Cluster's Service Managed Identity, which is +// a cluster-scoped identity. +type ServiceManagedIdentityClientBuilder interface { + BuilderType() ServiceManagedIdentityClientBuilderType + // UserAssignedIdentitiesClient returns a new User Assigned Identities client. + UserAssignedIdentitiesClient(ctx context.Context, subscriptionID string) (UserAssignedIdentitiesClient, error) +} + +type serviceManagedIdentityClientBuilder struct { + clusterIdentityURL string + smiResourceID *azcorearm.ResourceID + fpaMIdataplaneClientBuilder FPAMIDataplaneClientBuilder + azCoreARMClientOptions *azcorearm.ClientOptions +} + +var _ ServiceManagedIdentityClientBuilder = (*serviceManagedIdentityClientBuilder)(nil) + +func (b *serviceManagedIdentityClientBuilder) BuilderType() ServiceManagedIdentityClientBuilderType { + return ServiceManagedIdentityClientBuilderTypeValue +} + +func (b *serviceManagedIdentityClientBuilder) UserAssignedIdentitiesClient(ctx context.Context, subscriptionID string) (UserAssignedIdentitiesClient, error) { + // We obtain the Managed Identity Data Plane client using the Cluster's Identity URL. + miDataplaneClient, err := b.fpaMIdataplaneClientBuilder.ManagedIdentitiesDataplane(b.clusterIdentityURL) + if err != nil { + return nil, err + } + + // We then use the Managed Identity Data Plane client to get + // credentials associated to the Cluster's Service Managed Identity. + dataplaneRequest := dataplane.UserAssignedIdentitiesRequest{ + IdentityIDs: []string{b.smiResourceID.String()}, + } + resp, err := miDataplaneClient.GetUserAssignedIdentitiesCredentials(ctx, dataplaneRequest) + if err != nil { + return nil, err + } + if len(resp.ExplicitIdentities) == 0 { + return nil, + utils.TrackError(fmt.Errorf("managed identities data plane returned no credentials for the cluster's service managed identity '%s", b.smiResourceID.String())) + } + + // We convert the received UserAssignedIdentityCredentials result into + // an azidentity.ClientCertificateCredential, which Azure Go SDK's uses + // to instantiate a UserAssignedIdentitiesClient. + userAssignedIdentityCredential := resp.ExplicitIdentities[0] + creds, err := dataplane.GetCredential(b.azCoreARMClientOptions.ClientOptions, userAssignedIdentityCredential) + if err != nil { + return nil, err + } + + // We finally instantiate the UserAssignedIdentitiesClient using the + // the credentials we obtained from the Managed Identities Data Plane Service. + return armmsi.NewUserAssignedIdentitiesClient(subscriptionID, creds, b.azCoreARMClientOptions) +} + +// ServiceManagedIdentityClientBuilderFactory offers the ability to create ServiceManagedIdentityClientBuilder instances. +type ServiceManagedIdentityClientBuilderFactory interface { + // NewServiceManagedIdentityClientBuilder creates a new ServiceManagedIdentityClientBuilder instance where + // all the clients returned from it will use the Cluster's Service + // Managed Identity represented by smiResourceID. The credentials associated + // to the cluster's Service Managed Identity are retrieved from the Managed + // Identities Data Plane Service using the Cluster's Identity URL clusterIdentityURL. + NewServiceManagedIdentityClientBuilder(clusterIdentityURL string, smiResourceID *azcorearm.ResourceID) ServiceManagedIdentityClientBuilder +} + +type serviceManagedIdentityClientBuilderFactory struct { + fpaMIdataplaneClientBuilder FPAMIDataplaneClientBuilder + options *azcorearm.ClientOptions +} + +var _ ServiceManagedIdentityClientBuilderFactory = (*serviceManagedIdentityClientBuilderFactory)(nil) + +func (f *serviceManagedIdentityClientBuilderFactory) NewServiceManagedIdentityClientBuilder( + clusterIdentityURL string, smiResourceID *azcorearm.ResourceID, +) ServiceManagedIdentityClientBuilder { + return &serviceManagedIdentityClientBuilder{ + clusterIdentityURL: clusterIdentityURL, + smiResourceID: smiResourceID, + fpaMIdataplaneClientBuilder: f.fpaMIdataplaneClientBuilder, + azCoreARMClientOptions: f.options, + } +} + +// NewServiceManagedIdentityClientBuilderFactory instantiates a ServiceManagedIdentityClientBuilderFactory, +// which allows to create ServiceManagedIdentityClientBuilder instances. +func NewServiceManagedIdentityClientBuilderFactory( + fpaMIdataplaneClientBuilder FPAMIDataplaneClientBuilder, options *azcorearm.ClientOptions, +) ServiceManagedIdentityClientBuilderFactory { + return &serviceManagedIdentityClientBuilderFactory{ + fpaMIdataplaneClientBuilder: fpaMIdataplaneClientBuilder, + options: options, + } +} diff --git a/backend/pkg/azure/client/user_assigned_identities_client.go b/backend/pkg/azure/client/user_assigned_identities_client.go new file mode 100644 index 0000000000..b769ce3d25 --- /dev/null +++ b/backend/pkg/azure/client/user_assigned_identities_client.go @@ -0,0 +1,38 @@ +package client + +import ( + "context" + + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/msi/armmsi" +) + +// UserAssignedIdentitiesClient is an interface that defines the +// methods that we want to use from the UserAssignedIdentitiesClient type in +// the Azure Go SDK (https://github.com/Azure/azure-sdk-for-go/tree/main/sdk/resourcemanager/msi/armmsi). +// The aim is to only contain methods that are defined in the Azure Go SDK +// UserAssignedIdentitiesClient client. +// If you need to use a method provided by the Azure Go SDK UserAssignedIdentitiesClient +// client but it is not defined in this interface then it has to be added here and all +// the types implementing this interface have to implement the new method. +type UserAssignedIdentitiesClient interface { + CreateOrUpdate(ctx context.Context, + resourceGroupName string, resourceName string, + parameters armmsi.Identity, + options *armmsi.UserAssignedIdentitiesClientCreateOrUpdateOptions, + ) (armmsi.UserAssignedIdentitiesClientCreateOrUpdateResponse, error) + + Delete(ctx context.Context, + resourceGroupName string, resourceName string, + options *armmsi.UserAssignedIdentitiesClientDeleteOptions, + ) (armmsi.UserAssignedIdentitiesClientDeleteResponse, error) + + Get(ctx context.Context, + resourceGroupName string, resourceName string, + options *armmsi.UserAssignedIdentitiesClientGetOptions, + ) (armmsi.UserAssignedIdentitiesClientGetResponse, error) +} + +// interface guard to ensure that all methods defined in the UserAssignedIdentitiesClient +// interface are implemented by the real Azure Go SDK UserAssignedIdentitiesClient +// client. This interface guard should always compile +var _ UserAssignedIdentitiesClient = (*armmsi.UserAssignedIdentitiesClient)(nil) diff --git a/backend/pkg/azure/config/azure_cloud_environment.go b/backend/pkg/azure/config/azure_cloud_environment.go index 3d597d78b8..8f80daa2a4 100644 --- a/backend/pkg/azure/config/azure_cloud_environment.go +++ b/backend/pkg/azure/config/azure_cloud_environment.go @@ -133,3 +133,7 @@ func (a AzureCloudEnvironment) ARMClientOptions() *azcorearm.ClientOptions { ClientOptions: *a.clientOptions, } } + +func (a AzureCloudEnvironment) CloudConfiguration() *cloud.Configuration { + return a.configuration +} diff --git a/backend/pkg/controllers/validationcontrollers/validations/azure_cluster_mis_existence_validation.go b/backend/pkg/controllers/validationcontrollers/validations/azure_cluster_mis_existence_validation.go new file mode 100644 index 0000000000..5e952801f5 --- /dev/null +++ b/backend/pkg/controllers/validationcontrollers/validations/azure_cluster_mis_existence_validation.go @@ -0,0 +1,91 @@ +package validations + +import ( + "context" + "fmt" + "strings" + + "github.com/Azure/ARO-HCP/internal/api" + "github.com/Azure/ARO-HCP/internal/api/arm" + "github.com/Azure/ARO-HCP/internal/utils" + azcorearm "github.com/Azure/azure-sdk-for-go/sdk/azcore/arm" + + azureclient "github.com/Azure/ARO-HCP/backend/pkg/azure/client" +) + +// AzureClusterManagedIdentitiesExistenceValidation validates the existence of all managed identities defined in the cluster. +// It assumes all identities present are for recognized operators. +type AzureClusterManagedIdentitiesExistenceValidation struct { + smiClientBuilderFactory azureclient.ServiceManagedIdentityClientBuilderFactory +} + +func NewAzureClusterManagedIdetitiesExistenceValidation( + smiClientBuilderFactory azureclient.ServiceManagedIdentityClientBuilderFactory, +) *AzureClusterManagedIdentitiesExistenceValidation { + return &AzureClusterManagedIdentitiesExistenceValidation{ + smiClientBuilderFactory: smiClientBuilderFactory, + } +} + +func (v *AzureClusterManagedIdentitiesExistenceValidation) Name() string { + return "AzureClusterManagedIdentitiesExistenceValidation" +} + +func (v *AzureClusterManagedIdentitiesExistenceValidation) Validate(ctx context.Context, clusterSubscription *arm.Subscription, cluster *api.HCPOpenShiftCluster) error { + smiResourceID := cluster.CustomerProperties.Platform.OperatorsAuthentication.UserAssignedIdentities.ServiceManagedIdentity + // TODO get the cluster identity URL from the cluster. It originally comes + // from the x-ms-identity-url header provided from ARM when the initial + // cluster creation request is made. Right now we do not have access to it. + // This should be available once https://github.com/Azure/ARO-HCP/pull/3838 is merged. + clusterIdentityURL := "TODO" + + smiClientBuilder := v.smiClientBuilderFactory.NewServiceManagedIdentityClientBuilder(clusterIdentityURL, smiResourceID) + // We check the existence of the Cluster's Service Managed Identity by + // attempting to retrieve the user assigned identities client using the + // service managed identity's identity credentials, which we obtain by + // requesting them via the Managed Identities Data Plane Service. If the + // service managed identity does not exist the request will fail. + uaisClient, err := smiClientBuilder.UserAssignedIdentitiesClient(ctx, cluster.ID.SubscriptionID) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get user assigned identities client: %w", err)) + } + + clusterUAIsProfile := &cluster.CustomerProperties.Platform.OperatorsAuthentication.UserAssignedIdentities + clusterOperatorsMIsResourceIDs, err := v.clusterOperatorsManagedIdentities(clusterUAIsProfile) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get cluster managed identities resource IDs: %w", err)) + } + + var notFoundMIsStrs []string + for _, resourceID := range clusterOperatorsMIsResourceIDs { + _, err := uaisClient.Get(ctx, resourceID.ResourceGroupName, resourceID.Name, nil) + if azureclient.IsResourceNotFoundErr(err) { + notFoundMIsStrs = append(notFoundMIsStrs, resourceID.String()) + } + if err != nil { + // TODO is it ok to error when one of them fails to get when the error is not a resource not found error? + return utils.TrackError(fmt.Errorf("failed to get managed identity '%s': %w", resourceID, err)) + } + } + + if len(notFoundMIsStrs) > 0 { + return utils.TrackError(fmt.Errorf("managed identities not found: %s", strings.Join(notFoundMIsStrs, ", "))) + } + + return nil +} + +// clusterOperatorsManagedIdentities returns a list of the control and data plane identities defined in the cluster. +func (v *AzureClusterManagedIdentitiesExistenceValidation) clusterOperatorsManagedIdentities( + clusterUAIsProfile *api.UserAssignedIdentitiesProfile) ([]*azcorearm.ResourceID, error) { + var resourceIDs []*azcorearm.ResourceID + + for _, miResourceID := range clusterUAIsProfile.ControlPlaneOperators { + resourceIDs = append(resourceIDs, miResourceID) + } + for _, miResourceID := range clusterUAIsProfile.DataPlaneOperators { + resourceIDs = append(resourceIDs, miResourceID) + } + + return resourceIDs, nil +} From 83e208ab20c46816a256f852c17b5d66c9dd79f9 Mon Sep 17 00:00:00 2001 From: Miguel Soriano Date: Tue, 24 Feb 2026 13:58:21 +0100 Subject: [PATCH 4/4] feat: add controller that retrieves information about MSI based identities --- backend/main.go | 7 + .../controllers/fetch_msi_identities_info.go | 197 ++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 backend/pkg/controllers/fetch_msi_identities_info.go diff --git a/backend/main.go b/backend/main.go index f200ceabfb..342a6ba592 100644 --- a/backend/main.go +++ b/backend/main.go @@ -474,6 +474,12 @@ func Run(cmd *cobra.Command, args []string) error { dbClient, subscriptionLister, ) + fetchMSIIdentitiesInfoController = controllers.NewFetchMSIIdentitiesInfoController( + dbClient, + subscriptionLister, + clusterServiceClient, + fpaMIDataplaneClientBuilder, + ) ) le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ @@ -501,6 +507,7 @@ func Run(cmd *cobra.Command, args []string) error { go alwaysSuccessClusterValidationController.Run(ctx, 20) go azureRPRegistrationValidationController.Run(ctx, 20) go azureClusterManagedIdentitiesExistenceValidationController.Run(ctx, 20) + go fetchMSIIdentitiesInfoController.Run(ctx, 20) }, OnStoppedLeading: func() { operationsScanner.LeaderGauge.Set(0) diff --git a/backend/pkg/controllers/fetch_msi_identities_info.go b/backend/pkg/controllers/fetch_msi_identities_info.go new file mode 100644 index 0000000000..d230993c10 --- /dev/null +++ b/backend/pkg/controllers/fetch_msi_identities_info.go @@ -0,0 +1,197 @@ +// Copyright 2026 Microsoft Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package controllers + +import ( + "context" + "errors" + "fmt" + "net/http" + "time" + + "k8s.io/apimachinery/pkg/api/equality" + + "github.com/Azure/msi-dataplane/pkg/dataplane" + + "github.com/Azure/ARO-HCP/backend/pkg/controllers/controllerutils" + "github.com/Azure/ARO-HCP/backend/pkg/listers" + "github.com/Azure/ARO-HCP/internal/api/arm" + "github.com/Azure/ARO-HCP/internal/database" + "github.com/Azure/ARO-HCP/internal/ocm" + "github.com/Azure/ARO-HCP/internal/utils" + + azureclient "github.com/Azure/ARO-HCP/backend/pkg/azure/client" +) + +// readAndPersistMaestroReadonlyBundlesContentSyncer is a controller that reads the Maestro readonly bundles +// references stored in the ServiceProviderCluster resource, retrieves the Maestro readonly bundles using those +// references, extracts the content of the Maestro readonly bundles and persists it in Cosmos. +// It is not responsible for creating the Maestro readonly bundles themselves. That is the responsibility of +// the createMaestroReadonlyBundlesSyncer controller. +// Right now we only support reading the content of the Maestro readonly bundle for HostedCluster associated to the cluster. +// In the future we might want to support reading the content of the Maestro readonly bundle for other resources. +type fetchMSIIdentitiesInfoSyncer struct { + cooldownChecker controllerutils.CooldownChecker + + activeOperationLister listers.ActiveOperationLister + + cosmosClient database.DBClient + + clusterServiceClient ocm.ClusterServiceClientSpec + + fpaMIdataplaneClientBuilder azureclient.FPAMIDataplaneClientBuilder +} + +var _ controllerutils.ClusterSyncer = (*fetchMSIIdentitiesInfoSyncer)(nil) + +func NewFetchMSIIdentitiesInfoController( + activeOperationLister listers.ActiveOperationLister, + cosmosClient database.DBClient, + clusterServiceClient ocm.ClusterServiceClientSpec, + fpaMIdataplaneClientBuilder azureclient.FPAMIDataplaneClientBuilder, +) controllerutils.Controller { + + syncer := &fetchMSIIdentitiesInfoSyncer{ + cooldownChecker: controllerutils.DefaultActiveOperationPrioritizingCooldown(activeOperationLister), + cosmosClient: cosmosClient, + clusterServiceClient: clusterServiceClient, + activeOperationLister: activeOperationLister, + fpaMIdataplaneClientBuilder: fpaMIdataplaneClientBuilder, + } + + controller := controllerutils.NewClusterWatchingController( + "ReadAndPersistMaestroReadonlyBundlesContent", + cosmosClient, + clusterInformer, + 1*time.Minute, + syncer, + ) + + return controller +} + +func (c *fetchMSIIdentitiesInfoSyncer) SyncOnce(ctx context.Context, key controllerutils.HCPClusterKey) error { + existingCluster, err := c.cosmosClient.HCPClusters(key.SubscriptionID, key.ResourceGroupName).Get(ctx, key.HCPClusterName) + if database.IsResponseError(err, http.StatusNotFound) { + return nil // cluster doesn't exist, no work to do + } + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get Cluster: %w", err)) + } + + // TODO do we need to check if existingCluster.Identity is nil or are we guaranteed that after Frontend stores to cosmos + // that section is not nil? + // TODO do we need to check if existingCluster.Identity.UserAssignedIdentities is nil or are we guaranteed that after Frontend stores to cosmos + // that section is not nil? + var identitiesToSync []string + for identityResourceIDStr, identity := range existingCluster.Identity.UserAssignedIdentities { + if identity.ClientID == nil || len(*identity.ClientID) == 0 { + identitiesToSync = append(identitiesToSync, identityResourceIDStr) + } + + if identity.PrincipalID == nil || len(*identity.PrincipalID) == 0 { + identitiesToSync = append(identitiesToSync, identityResourceIDStr) + } + } + + if len(identitiesToSync) == 0 { + return nil + } + + // TODO for now we get the Managed Identities Data Plane Identity URL from the Cluster Service Cluster. In the future + // we are going to calculate it from the RP and store it in Cosmos. + csCluster, err := c.clusterServiceClient.GetCluster(ctx, existingCluster.ServiceProviderProperties.ClusterServiceID) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get Cluster Service Cluster: %w", err)) + } + + // As a relevant note, on environments where the real Managed Identities Data Plane service is not available a + // fake implementation of the Managed Identities Data Plane client is used, which always returns the information and + // same set of credentials for all requests. The returned information is the information associated to the "mock MSI" identity. + fpaMIDataplaneClient, err := c.fpaMIdataplaneClientBuilder.ManagedIdentitiesDataplane(csCluster.Azure().OperatorsAuthentication().ManagedIdentities().ManagedIdentitiesDataPlaneIdentityUrl()) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get Managed Identities Data Plane Client: %w", err)) + } + + // We get all the Managed Identities information in a single Managed Identities Data Plane Credentials request because + // we have been told to minimize calls to the Managed Identities Data Plane Service. + fpaMIDataplaneCredentialsRequest := dataplane.UserAssignedIdentitiesRequest{ + IdentityIDs: identitiesToSync, + } + fpaMIDataplaneCredentials, err := fpaMIDataplaneClient.GetUserAssignedIdentitiesCredentials(ctx, fpaMIDataplaneCredentialsRequest) + if err != nil { + return utils.TrackError(fmt.Errorf("failed to get Managed Identities Data Plane Credentials: %w", err)) + } + + // TODO at some point we will also have to implement logic that retrieves the initial set of credentials for the + // control plane operators managed identities and for the service managed identity and store it in the Managed + // Identities Key Vault (a Management Cluster scoped resource). Do we want to do it here at the same time because + // we are already calling the Managed Identities Data Plane Service and getting credentials here? As relevant context, + // these set of initial credentials should be stored in the Managed Identities Key Vault before creating the HostedCluster + // and those credentials have a limited lifespan (unknown which without investigating further). + + if len(fpaMIDataplaneCredentials.ExplicitIdentities) == 0 { + return utils.TrackError(fmt.Errorf("returned number of Managed Identities Data Plane Credentials is 0")) + } + + if len(fpaMIDataplaneCredentials.ExplicitIdentities) != len(identitiesToSync) { + return utils.TrackError(fmt.Errorf("unexpected number of Managed Identities Data Plane Credentials. Expected: %d, Received: %d", len(identitiesToSync), len(fpaMIDataplaneCredentials.ExplicitIdentities))) + } + + desiredMSIIdentities := make(map[string]*arm.UserAssignedIdentity) + var syncErrors []error + for i, fpaMIDataplaneCredential := range fpaMIDataplaneCredentials.ExplicitIdentities { + if fpaMIDataplaneCredential.ResourceID == nil || len(*fpaMIDataplaneCredential.ResourceID) == 0 { + syncErrors = append(syncErrors, utils.TrackError(fmt.Errorf("unexpected Managed Identities Data Plane Credential %s Resource ID is nil or empty", identitiesToSync[i]))) + continue + } + desiredMSIIdentities[*fpaMIDataplaneCredential.ResourceID] = &arm.UserAssignedIdentity{} + currentDesiredMSIIdentity := desiredMSIIdentities[*fpaMIDataplaneCredential.ResourceID] + + if fpaMIDataplaneCredential.ClientID != nil && len(*fpaMIDataplaneCredential.ClientID) > 0 { + currentDesiredMSIIdentity.ClientID = fpaMIDataplaneCredential.ClientID + } else { + syncErrors = append(syncErrors, utils.TrackError(fmt.Errorf("unexpected Managed Identities Data Plane Credential %s Client ID is nil or empty", identitiesToSync[i]))) + } + + if fpaMIDataplaneCredential.ObjectID != nil && len(*fpaMIDataplaneCredential.ObjectID) > 0 { + currentDesiredMSIIdentity.PrincipalID = fpaMIDataplaneCredential.ObjectID + } else { + syncErrors = append(syncErrors, utils.TrackError(fmt.Errorf("unexpected Managed Identities Data Plane Credential %s Principal ID is nil or empty", identitiesToSync[i]))) + } + } + + // TODO are we ok with storing this directly in the HCPCluster resource, as it needs to be set anyway because it + // is returned as part of the API of the Cluster to end-users? + if !equality.Semantic.DeepEqual(existingCluster.Identity.UserAssignedIdentities, desiredMSIIdentities) && len(desiredMSIIdentities) > 0 { + // TODO should we check if existingCluster.Identity is nil and initialize it? or are we guaranteed that after Frontend stores to cosmos + // that section is not nil? + + for desiredIdentityResourceIDStr, desiredIdentity := range desiredMSIIdentities { + if desiredIdentity.ClientID != nil && len(*desiredIdentity.ClientID) > 0 { + existingCluster.Identity.UserAssignedIdentities[desiredIdentityResourceIDStr].ClientID = desiredIdentity.ClientID + } + if desiredIdentity.PrincipalID != nil && len(*desiredIdentity.PrincipalID) > 0 { + existingCluster.Identity.UserAssignedIdentities[desiredIdentityResourceIDStr].PrincipalID = desiredIdentity.PrincipalID + } + } + + _, err := c.cosmosClient.HCPClusters(existingCluster.ID.SubscriptionID, existingCluster.ID.ResourceGroupName).Replace(ctx, existingCluster, nil) + if err != nil { + syncErrors = append(syncErrors, utils.TrackError(fmt.Errorf("failed to replace HCPCluster: %w", err))) + } + } + + return errors.Join(syncErrors...) +}