diff --git a/test/cmd/aro-hcp-tests/custom-link-tools/cmd_test.go b/test/cmd/aro-hcp-tests/custom-link-tools/cmd_test.go index 1f956353c5..1b61503524 100644 --- a/test/cmd/aro-hcp-tests/custom-link-tools/cmd_test.go +++ b/test/cmd/aro-hcp-tests/custom-link-tools/cmd_test.go @@ -25,7 +25,6 @@ import ( clocktesting "k8s.io/utils/clock/testing" "github.com/Azure/ARO-HCP/test/util/testutil" - "github.com/Azure/ARO-HCP/tooling/templatize/pkg/pipeline" ) func TestGeneratedHTML(t *testing.T) { @@ -39,31 +38,14 @@ func TestGeneratedHTML(t *testing.T) { tmpdir := t.TempDir() opts := Options{ completedOptions: &completedOptions{ - TimingInputDir: "../testdata/output", - Steps: []pipeline.NodeInfo{ - { - Identifier: pipeline.Identifier{ - ServiceGroup: "Microsoft.Azure.ARO.HCP.Service.Infra", - ResourceGroup: "service", - Step: "cluster", - }, - Details: &pipeline.ExecutionDetails{ - ARM: &pipeline.ARMExecutionDetails{ - Operations: []pipeline.Operation{ - { - OperationType: "Create", - Resource: &pipeline.Resource{ - ResourceType: "Microsoft.ContainerService/managedClusters", - Name: "hcp-underlay-prow-usw3j688-svc-1", - ResourceGroup: "hcp-underlay-prow-usw3j688-svc-1", - }, - }, - }, - }, - }, - }, + TimingInputDir: "../testdata/output", + ADXClusterName: "hcp-dev-us.westus3", + ClusterNames: ClusterNames{ + Service: "hcp-underlay-prow-usw3j688-svc-1", + Management: "hcp-underlay-prow-usw3j688-mgmt-1", }, - OutputDir: tmpdir, + GlobalLogStartTime: fakeTime.Add(-6 * time.Hour), + OutputDir: tmpdir, }, } err = opts.Run(ctx) diff --git a/test/cmd/aro-hcp-tests/custom-link-tools/options.go b/test/cmd/aro-hcp-tests/custom-link-tools/options.go index 0e3f463a5e..7e0fa1ecec 100644 --- a/test/cmd/aro-hcp-tests/custom-link-tools/options.go +++ b/test/cmd/aro-hcp-tests/custom-link-tools/options.go @@ -35,6 +35,7 @@ import ( "k8s.io/utils/clock" + configtypes "github.com/Azure/ARO-Tools/pkg/config/types" "sigs.k8s.io/yaml" "github.com/Azure/ARO-HCP/internal/utils" @@ -47,13 +48,11 @@ var templatesFS embed.FS var endGracePeriodDuration = 45 * time.Minute -var ( - serviceClusterStepID = pipeline.Identifier{ - ServiceGroup: "Microsoft.Azure.ARO.HCP.Service.Infra", - ResourceGroup: "service", - Step: "cluster", - } -) +// ClusterNames holds the names of the service and management AKS clusters. +type ClusterNames struct { + Service string + Management string +} func mustReadArtifact(name string) []byte { ret, err := templatesFS.ReadFile("artifacts/" + name) @@ -71,13 +70,15 @@ func DefaultOptions() *RawOptions { func BindOptions(opts *RawOptions, cmd *cobra.Command) error { cmd.Flags().StringVar(&opts.TimingInputDir, "timing-input", opts.TimingInputDir, "Path to the directory holding timing outputs from an end-to-end test run.") cmd.Flags().StringVar(&opts.OutputDir, "output", opts.OutputDir, "Path to the directory where html will be written.") + cmd.Flags().StringVar(&opts.RenderedConfigPath, "rendered-config", opts.RenderedConfigPath, "Path to a rendered config.yaml file for the target environment.") return nil } type RawOptions struct { - TimingInputDir string - OutputDir string + TimingInputDir string + OutputDir string + RenderedConfigPath string } // validatedOptions is a private wrapper that enforces a call of Validate() before Complete() can be invoked. @@ -92,9 +93,11 @@ type ValidatedOptions struct { // completedOptions is a private wrapper that enforces a call of Complete() before config generation can be invoked. type completedOptions struct { - TimingInputDir string - OutputDir string - Steps []pipeline.NodeInfo + TimingInputDir string + OutputDir string + ClusterNames ClusterNames + ADXClusterName string + GlobalLogStartTime time.Time } type Options struct { @@ -110,6 +113,7 @@ func (o *RawOptions) Validate() (*ValidatedOptions, error) { }{ {flag: "timing-input", name: "timing input dir", value: &o.TimingInputDir}, {flag: "output", name: "output dir", value: &o.OutputDir}, + {flag: "rendered-config", name: "rendered config path", value: &o.RenderedConfigPath}, } { if item.value == nil || *item.value == "" { return nil, fmt.Errorf("the %s must be provided with --%s", item.name, item.flag) @@ -124,43 +128,62 @@ func (o *RawOptions) Validate() (*ValidatedOptions, error) { } func (o *ValidatedOptions) Complete(logger logr.Logger) (*Options, error) { - // we consume steps.yaml (output of templatize and stored for us by the visualization) to determine the cluster name - // Try to read compressed file first, then fall back to uncompressed - var stepsYamlBytes []byte - - compressedPath := path.Join(o.TimingInputDir, "steps.yaml.gz") - uncompressedPath := path.Join(o.TimingInputDir, "steps.yaml") - - // Try compressed file first - compressedData, err := os.ReadFile(compressedPath) - if err == nil { - gzipReader, err := gzip.NewReader(bytes.NewReader(compressedData)) - if err != nil { - return nil, fmt.Errorf("failed to create gzip reader for %s: %w", compressedPath, err) - } - defer gzipReader.Close() + // Read and parse the rendered config.yaml. + configData, err := os.ReadFile(o.RenderedConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to read rendered config %s: %w", o.RenderedConfigPath, err) + } + var cfg configtypes.Configuration + if err := yaml.Unmarshal(configData, &cfg); err != nil { + return nil, fmt.Errorf("failed to unmarshal rendered config %s: %w", o.RenderedConfigPath, err) + } - stepsYamlBytes, err = io.ReadAll(gzipReader) - if err != nil { - return nil, fmt.Errorf("failed to decompress %s: %w", compressedPath, err) - } - } else { - // Fall back to uncompressed file - stepsYamlBytes, err = os.ReadFile(uncompressedPath) - if err != nil { - return nil, utils.TrackError(err) - } + // Extract ADX cluster name: kustoName + "." + region (e.g. "hcp-int-uk.uksouth"). + kustoNameAny, err := cfg.GetByPath("kusto.kustoName") + if err != nil { + return nil, fmt.Errorf("failed to get kusto.kustoName from config: %w", err) + } + kustoName, ok := kustoNameAny.(string) + if !ok { + return nil, fmt.Errorf("kusto.kustoName is not a string in config") + } + regionAny, err := cfg.GetByPath("region") + if err != nil { + return nil, fmt.Errorf("failed to get region from config: %w", err) } + region, ok := regionAny.(string) + if !ok { + return nil, fmt.Errorf("region is not a string in config") + } + adxClusterName := kustoName + "." + region - var steps []pipeline.NodeInfo - if err := yaml.Unmarshal(stepsYamlBytes, &steps); err != nil { - return nil, fmt.Errorf("failed to unmarshal timing input file: %w", err) + // Extract cluster names. + svcNameAny, err := cfg.GetByPath("svc.aks.name") + if err != nil { + return nil, fmt.Errorf("failed to get svc.aks.name from config: %w", err) + } + svcName, ok := svcNameAny.(string) + if !ok { + return nil, fmt.Errorf("svc.aks.name is not a string in config") } + mgmtNameAny, err := cfg.GetByPath("mgmt.aks.name") + if err != nil { + return nil, fmt.Errorf("failed to get mgmt.aks.name from config: %w", err) + } + mgmtName, ok := mgmtNameAny.(string) + if !ok { + return nil, fmt.Errorf("mgmt.aks.name is not a string in config") + } + + globalLogStartTime := computeGlobalLogStartTime(logger, o.TimingInputDir) + return &Options{ completedOptions: &completedOptions{ - Steps: steps, - OutputDir: o.OutputDir, - TimingInputDir: o.TimingInputDir, + ClusterNames: ClusterNames{Service: svcName, Management: mgmtName}, + ADXClusterName: adxClusterName, + GlobalLogStartTime: globalLogStartTime, + OutputDir: o.OutputDir, + TimingInputDir: o.TimingInputDir, }, }, nil } @@ -184,6 +207,7 @@ type QueryInfo struct { EndTime string ClusterName string Database string + ADXClusterName string } type TimingInfo struct { @@ -202,7 +226,7 @@ func createQueryURL(templatePath string, info QueryInfo) string { currURL := url.URL{ Scheme: "https", Host: "dataexplorer.azure.com", - Path: fmt.Sprintf("clusters/hcp-dev-us.westus3/databases/%s", info.Database), + Path: fmt.Sprintf("clusters/%s/databases/%s", info.ADXClusterName, info.Database), } urlQuery := currURL.Query() template, err := template.New("custom-link-tools").Parse(string(mustReadArtifact(templatePath))) @@ -267,12 +291,14 @@ func (o Options) Run(ctx context.Context) error { Database: "HostedControlPlaneLogs", StartTime: timing.StartTime.Format(time.RFC3339), EndTime: timing.EndTime.Format(time.RFC3339), + ADXClusterName: o.ADXClusterName, }), createLinkForTest("Service Logs", "service-logs.kql.tmpl", QueryInfo{ ResourceGroupName: rg, Database: "ServiceLogs", StartTime: timing.StartTime.Format(time.RFC3339), EndTime: timing.EndTime.Format(time.RFC3339), + ADXClusterName: o.ADXClusterName, }), }, Database: "HostedControlPlaneLogs", @@ -296,7 +322,10 @@ func (o Options) Run(ctx context.Context) error { return utils.TrackError(err) } - serviceLogLinks, err := getServiceLogLinks(o.Steps) + endTime := localClock.Now().Add(1 * time.Hour) + serviceLogLinks, err := getServiceLogLinksFromClusterNamesWithWindow( + o.ClusterNames, o.ADXClusterName, o.GlobalLogStartTime, endTime, + ) if err != nil { return utils.TrackError(err) } @@ -413,44 +442,102 @@ func loadAllTestTimingInfo(timingInputDir string) (map[string]TimingInfo, error) var localClock clock.PassiveClock = clock.RealClock{} -func getServiceLogLinks(steps []pipeline.NodeInfo) ([]LinkDetails, error) { - allLinks := []LinkDetails{} +// computeGlobalLogStartTime determines the start time for global component log links +// using a three-tier fallback: steps.yaml, timing-metadata files, or 6 hours ago. +func computeGlobalLogStartTime(logger logr.Logger, timingInputDir string) time.Time { + if t, ok := extractStartTimeFromStepsYaml(logger, timingInputDir); ok { + return t + } + if t, ok := extractEarliestStartTimeFromTimingMetadata(logger, timingInputDir); ok { + return t + } + return localClock.Now().Add(-6 * time.Hour) +} + +// extractStartTimeFromStepsYaml reads steps.yaml(.gz) and returns the earliest StartedAt time. +// Returns false on any error (soft failure). +func extractStartTimeFromStepsYaml(logger logr.Logger, timingInputDir string) (time.Time, bool) { + var stepsYamlBytes []byte + + compressedPath := path.Join(timingInputDir, "steps.yaml.gz") + uncompressedPath := path.Join(timingInputDir, "steps.yaml") + + compressedData, err := os.ReadFile(compressedPath) + if err == nil { + gzipReader, gzErr := gzip.NewReader(bytes.NewReader(compressedData)) + if gzErr != nil { + logger.V(1).Info("failed to create gzip reader for steps.yaml.gz, skipping", "error", gzErr) + return time.Time{}, false + } + defer gzipReader.Close() + + stepsYamlBytes, err = io.ReadAll(gzipReader) + if err != nil { + logger.V(1).Info("failed to decompress steps.yaml.gz, skipping", "error", err) + return time.Time{}, false + } + } else { + stepsYamlBytes, err = os.ReadFile(uncompressedPath) + if err != nil { + logger.V(1).Info("steps.yaml not found, skipping", "error", err) + return time.Time{}, false + } + } + + var steps []pipeline.NodeInfo + if err := yaml.Unmarshal(stepsYamlBytes, &steps); err != nil { + logger.V(1).Info("failed to unmarshal steps.yaml, skipping", "error", err) + return time.Time{}, false + } - earliestStartTime := time.Time{} - allClusterNames := []string{} + earliest := time.Time{} for _, step := range steps { if len(step.Info.StartedAt) > 0 { startTime, err := time.Parse(time.RFC3339, step.Info.StartedAt) if err != nil { - return nil, fmt.Errorf("failed to parse started at: %w", err) - } - if earliestStartTime.IsZero() || startTime.Before(earliestStartTime) { - earliestStartTime = startTime + logger.V(1).Info("failed to parse StartedAt in steps.yaml, skipping", "error", err) + return time.Time{}, false } - } - - // we're looking for the service cluster's step to make a query for backend and frontend - // forming like this so that we can easily add more steps (like the management cluster) that we want queries for - if step.Identifier == serviceClusterStepID { - if step.Details != nil && step.Details.ARM != nil { - for _, operation := range step.Details.ARM.Operations { - allClusterNames = append(allClusterNames, locateAllServiceClusters(operation)...) - } + if earliest.IsZero() || startTime.Before(earliest) { + earliest = startTime } } } - if earliestStartTime.IsZero() { - earliestStartTime = localClock.Now().Add(-6 * time.Hour) // lots longer than default timeouts, but still shorter than forever + if earliest.IsZero() { + logger.V(1).Info("no StartedAt times found in steps.yaml") + return time.Time{}, false } + return earliest, true +} - if len(allClusterNames) != 1 { - return nil, fmt.Errorf("expecting only one service cluster, found %d: %s", len(allClusterNames), strings.Join(allClusterNames, ", ")) +// extractEarliestStartTimeFromTimingMetadata returns the earliest StartTime from timing-metadata files. +// Returns false on any error (soft failure). +func extractEarliestStartTimeFromTimingMetadata(logger logr.Logger, timingInputDir string) (time.Time, bool) { + timingInfo, err := loadAllTestTimingInfo(timingInputDir) + if err != nil { + logger.V(1).Info("failed to load timing metadata, skipping", "error", err) + return time.Time{}, false + } + if len(timingInfo) == 0 { + logger.V(1).Info("no timing metadata found") + return time.Time{}, false } - endTime := localClock.Now().Add(1 * time.Hour) // we need to include all cleanup, this is a good bet. + earliest := time.Time{} + for _, info := range timingInfo { + if earliest.IsZero() || info.StartTime.Before(earliest) { + earliest = info.StartTime + } + } + return earliest, true +} - // Define all components and their log query templates - components := []struct { +// getServiceLogLinksFromClusterNamesWithWindow generates service log links for all components +// using the provided cluster names and time window. +// Frontend, Backend, Cluster Service, and Maestro logs use the service cluster. +// Hypershift and ACM logs use the management cluster. +func getServiceLogLinksFromClusterNamesWithWindow(clusterNames ClusterNames, adxClusterName string, startTime, endTime time.Time) ([]LinkDetails, error) { + svcComponents := []struct { component string template string }{ @@ -458,39 +545,38 @@ func getServiceLogLinks(steps []pipeline.NodeInfo) ([]LinkDetails, error) { {"Frontend Logs", "frontend-logs.kql.tmpl"}, {"Clusters Service Logs", "clusters-service-logs.kql.tmpl"}, {"Maestro Logs", "maestro-logs.kql.tmpl"}, + } + + mgmtComponents := []struct { + component string + template string + }{ {"Hypershift Logs", "hypershift-logs.kql.tmpl"}, {"ACM Logs", "acm-logs.kql.tmpl"}, } - // Generate links for each component and cluster - for _, clusterName := range allClusterNames { - for _, comp := range components { - allLinks = append(allLinks, createLinkForTest(comp.component, comp.template, QueryInfo{ - ResourceGroupName: clusterName, - Database: "ServiceLogs", - ClusterName: clusterName, - StartTime: earliestStartTime.Format(time.RFC3339), - EndTime: endTime.Format(time.RFC3339), - })) - } + var links []LinkDetails + for _, comp := range svcComponents { + links = append(links, createLinkForTest(comp.component, comp.template, QueryInfo{ + ResourceGroupName: clusterNames.Service, + Database: "ServiceLogs", + ClusterName: clusterNames.Service, + ADXClusterName: adxClusterName, + StartTime: startTime.Format(time.RFC3339), + EndTime: endTime.Format(time.RFC3339), + })) } - return allLinks, nil -} - -func locateAllServiceClusters(operation pipeline.Operation) []string { - allClusterNames := []string{} - for _, currChild := range operation.Children { - currClusterNames := locateAllServiceClusters(currChild) - if len(currClusterNames) > 0 { - allClusterNames = append(allClusterNames, currClusterNames...) - } - } - if operation.Resource == nil { - return allClusterNames + for _, comp := range mgmtComponents { + links = append(links, createLinkForTest(comp.component, comp.template, QueryInfo{ + ResourceGroupName: clusterNames.Management, + Database: "ServiceLogs", + ClusterName: clusterNames.Management, + ADXClusterName: adxClusterName, + StartTime: startTime.Format(time.RFC3339), + EndTime: endTime.Format(time.RFC3339), + })) } - if operation.OperationType == "Create" && operation.Resource.ResourceType == "Microsoft.ContainerService/managedClusters" { - allClusterNames = append(allClusterNames, operation.Resource.Name) - } - return allClusterNames + + return links, nil } diff --git a/test/cmd/testdata/zz_fixture_TestGeneratedHTMLcustom_link_tools.html b/test/cmd/testdata/zz_fixture_TestGeneratedHTMLcustom_link_tools.html index 68666c2ef0..ea8c9597cb 100644 --- a/test/cmd/testdata/zz_fixture_TestGeneratedHTMLcustom_link_tools.html +++ b/test/cmd/testdata/zz_fixture_TestGeneratedHTMLcustom_link_tools.html @@ -56,13 +56,13 @@
  • - + Hypershift Logs
  • - + ACM Logs