diff --git a/README.md b/README.md index 054f5bfb1..37506e447 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ See each command's linked README for more details, or click on [šŸ“š](https://do #### `stepfunctions` - `instrument`: Instrument [AWS Step Function](src/commands/stepfunctions) with Datadog to get logs and traces. [šŸ“š](https://docs.datadoghq.com/serverless/step_functions/installation/?tab=datadogcli) - `uninstrument`: Uninstrument [AWS Step Function](src/commands/stepfunctions). [šŸ“š](https://docs.datadoghq.com/serverless/step_functions/installation/?tab=datadogcli) +- `flare`: Gather [AWS Step Function](src/commands/stepfunctions) configuration, execution history, and logs for Datadog support. [šŸ“š](src/commands/stepfunctions/README.md#flare) #### `synthetics` - `run-tests`: Run [Continuous Testing tests](src/commands/synthetics) from the CI. [šŸ“š](https://docs.datadoghq.com/continuous_testing/) diff --git a/src/commands/stepfunctions/README.md b/src/commands/stepfunctions/README.md index bcb1f0559..516f38bb7 100644 --- a/src/commands/stepfunctions/README.md +++ b/src/commands/stepfunctions/README.md @@ -1,6 +1,10 @@ # stepfunctions commands -You can use the `stepfunctions instrument` command to instrument your Step Functions with Datadog. This command enables instrumentation by subscribing Step Function logs to a [Datadog Forwarder](https://docs.datadoghq.com/logs/guide/forwarder/). +The Step Functions commands allow you to manage Datadog instrumentation and troubleshooting for your AWS Step Functions: + +- Use the `stepfunctions instrument` command to instrument your Step Functions with Datadog. This command enables instrumentation by subscribing Step Function logs to a [Datadog Forwarder](https://docs.datadoghq.com/logs/guide/forwarder/). +- Use the `stepfunctions uninstrument` command to remove Datadog instrumentation from your Step Functions. +- Use the `stepfunctions flare` command to collect diagnostic information for troubleshooting with Datadog support. You can also add the `stepfunctions instrument` command to your CI/CD pipelines to enable Datadog instrumentation for all of your Step Functions. Run the command after your normal serverless application deployment, so that changes made by this command do not get overridden by changes in the CI/CD pipeline. @@ -23,6 +27,38 @@ Run the `uninstrument` command to unsubscribe a Step Function log group from the datadog-ci stepfunctions uninstrument --step-function --forwarder [--dry-run] ``` +### `flare` +Run the `flare` command to gather state machine configuration, execution history, logs, and project files for Datadog support troubleshooting. This command collects diagnostic information about your Step Functions and creates a flare file that can be shared with Datadog support. + +```bash +datadog-ci stepfunctions flare --state-machine --case-id --email [--with-logs] [--start] [--end] [--max-executions] [--dry-run] +``` + +Example: +```bash +# Basic flare collection +datadog-ci stepfunctions flare \ + --state-machine arn:aws:states:us-east-1:123456789012:stateMachine:MyStateMachine \ + --case-id 12345 \ + --email support@example.com + +# Include logs from the last 7 days +datadog-ci stepfunctions flare \ + --state-machine arn:aws:states:us-east-1:123456789012:stateMachine:MyStateMachine \ + --case-id 12345 \ + --email support@example.com \ + --with-logs \ + --start "2025-06-11" \ + --end "2025-06-18" + +# Dry run to preview without sending +datadog-ci stepfunctions flare \ + --state-machine arn:aws:states:us-east-1:123456789012:stateMachine:MyStateMachine \ + --case-id 12345 \ + --email support@example.com \ + --dry-run +``` + ## Arguments ### instrument @@ -44,6 +80,19 @@ datadog-ci stepfunctions uninstrument --step-function --forw | `--forwarder` | | :white_check_mark: | The ARN of the [Datadog Forwarder](https://docs.datadoghq.com/logs/guide/forwarder/) to subscribe Step Function log groups. | | | `--dry-run` | `-d` | | Preview changes without applying them. | `false` | +### flare + +| Argument | Shorthand | Required | Description | Default | +| ----------------- | --------- | ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| `--state-machine` | `-s` | :white_check_mark: | The ARN of the Step Functions state machine to collect diagnostic information from. | | +| `--case-id` | `-c` | :white_check_mark: | The Datadog support case ID to associate with this flare. | | +| `--email` | `-e` | :white_check_mark: | The email address associated with the support case. | | +| `--with-logs` | | | Include CloudWatch logs from the state machine's log group in the flare. | `false` | +| `--start` | | | Start time for log collection (ISO 8601 format). Only used with `--with-logs`. | | +| `--end` | | | End time for log collection (ISO 8601 format). Only used with `--with-logs`. | | +| `--max-executions`| | | Maximum number of recent executions to include in the flare. | `10` | +| `--dry-run` | `-d` | | Preview the flare collection without creating or sending files. | `false` | + ## Installation 1. Install the Datadog CLI diff --git a/src/commands/stepfunctions/__tests__/fixtures/stepfunctions-flare.ts b/src/commands/stepfunctions/__tests__/fixtures/stepfunctions-flare.ts new file mode 100644 index 000000000..be98e0a9f --- /dev/null +++ b/src/commands/stepfunctions/__tests__/fixtures/stepfunctions-flare.ts @@ -0,0 +1,227 @@ +import {SubscriptionFilter, OutputLogEvent} from '@aws-sdk/client-cloudwatch-logs' +import {DescribeStateMachineCommandOutput, ExecutionListItem, HistoryEvent, Tag} from '@aws-sdk/client-sfn' + +export const stateMachineConfigFixture = ( + props: Partial = {} +): DescribeStateMachineCommandOutput => { + const defaults: DescribeStateMachineCommandOutput = { + $metadata: {}, + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + name: 'MyWorkflow', + status: 'ACTIVE', + definition: JSON.stringify({ + StartAt: 'HelloWorld', + States: { + HelloWorld: { + Type: 'Pass', + Result: 'Hello World!', + End: true, + }, + }, + }), + roleArn: 'arn:aws:iam::123456789012:role/MyRole', + type: 'STANDARD', + creationDate: new Date('2024-01-01'), + loggingConfiguration: { + level: 'ALL', + includeExecutionData: true, + destinations: [ + { + cloudWatchLogsLogGroup: { + logGroupArn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/vendedlogs/states/MyWorkflow-Logs', + }, + }, + ], + }, + } + + return {...defaults, ...props} +} + +export const sensitiveStateMachineConfigFixture = (): DescribeStateMachineCommandOutput => { + return stateMachineConfigFixture({ + definition: JSON.stringify({ + StartAt: 'ProcessPayment', + States: { + ProcessPayment: { + Type: 'Task', + Resource: 'arn:aws:lambda:us-east-1:123456789012:function:ProcessPayment', + Parameters: { + 'ApiKey.$': '$.credentials.apiKey', + SecretToken: 'secret-12345-token', + DatabasePassword: 'super-secret-password', + }, + End: true, + }, + }, + }), + description: 'Payment processing workflow with sensitive data', + }) +} + +export const executionsFixture = (): ExecutionListItem[] => { + return [ + { + executionArn: 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1', + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + name: 'execution1', + status: 'SUCCEEDED', + startDate: new Date('2024-01-01T10:00:00Z'), + stopDate: new Date('2024-01-01T10:01:00Z'), + }, + { + executionArn: 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution2', + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + name: 'execution2', + status: 'FAILED', + startDate: new Date('2024-01-01T09:00:00Z'), + stopDate: new Date('2024-01-01T09:01:00Z'), + }, + ] +} + +interface SensitiveExecution { + executionArn: string + stateMachineArn: string + name: string + status: string + startDate: Date + stopDate: Date + input: string + output: string +} + +export const sensitiveExecutionFixture = (): SensitiveExecution => { + return { + executionArn: 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1', + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + name: 'execution1', + status: 'SUCCEEDED', + startDate: new Date('2024-01-01T10:00:00Z'), + stopDate: new Date('2024-01-01T10:01:00Z'), + input: '{"creditCard": "4111-1111-1111-1111", "cvv": "123", "amount": 100}', + output: '{"transactionId": "secret-transaction-id", "authToken": "Bearer secret-token"}', + } +} + +export const executionHistoryFixture = (): HistoryEvent[] => { + return [ + { + timestamp: new Date('2024-01-01T10:00:00Z'), + type: 'ExecutionStarted', + id: 1, + previousEventId: 0, + executionStartedEventDetails: { + input: '{"orderId": "12345"}', + roleArn: 'arn:aws:iam::123456789012:role/MyRole', + }, + }, + { + timestamp: new Date('2024-01-01T10:00:01Z'), + type: 'TaskStateEntered', + id: 2, + previousEventId: 1, + stateEnteredEventDetails: { + name: 'ProcessPayment', + input: '{"orderId": "12345", "amount": 100}', + }, + }, + { + timestamp: new Date('2024-01-01T10:00:59Z'), + type: 'TaskStateExited', + id: 3, + previousEventId: 2, + stateExitedEventDetails: { + name: 'ProcessPayment', + output: '{"success": true, "transactionId": "tx-12345"}', + }, + }, + { + timestamp: new Date('2024-01-01T10:01:00Z'), + type: 'ExecutionSucceeded', + id: 4, + previousEventId: 3, + executionSucceededEventDetails: { + output: '{"result": "completed"}', + }, + }, + ] +} + +export const stepFunctionTagsFixture = (): Tag[] => { + return [ + {key: 'Environment', value: 'test'}, + {key: 'Service', value: 'payment-processor'}, + {key: 'Team', value: 'platform'}, + ] +} + +export const logSubscriptionFiltersFixture = (): SubscriptionFilter[] => { + return [ + { + filterName: 'datadog-forwarder', + destinationArn: 'arn:aws:lambda:us-east-1:123456789012:function:DatadogForwarder', + filterPattern: '', + logGroupName: '/aws/vendedlogs/states/MyWorkflow-Logs', + }, + ] +} + +export const cloudWatchLogsFixture = (): OutputLogEvent[] => { + return [ + { + timestamp: 1704106800000, + message: 'Execution started', + ingestionTime: 1704106801000, + }, + { + timestamp: 1704106801000, + message: 'Processing payment for order 12345', + ingestionTime: 1704106802000, + }, + { + timestamp: 1704106859000, + message: 'Payment processed successfully', + ingestionTime: 1704106860000, + }, + { + timestamp: 1704106860000, + message: 'Execution completed', + ingestionTime: 1704106861000, + }, + ] +} + +export const MOCK_STATE_MACHINE_ARN = 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow' +export const MOCK_REGION = 'us-east-1' +export const MOCK_CASE_ID = 'case-123456' +export const MOCK_EMAIL = 'test@example.com' +export const MOCK_API_KEY = 'test-api-key-1234' + +export const MOCK_AWS_CREDENTIALS = { + accessKeyId: 'AKIAIOSFODNN7EXAMPLE', + secretAccessKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', + sessionToken: undefined, +} + +export const MOCK_FRAMEWORK = 'Serverless Framework' + +export const MOCK_OUTPUT_DIR = '.datadog-ci/flare/stepfunctions-MyWorkflow-1704106800000' + +export const MOCK_INSIGHTS_CONTENT = `# Step Functions Flare Insights + +Generated: 2024-01-01T10:00:00.000Z + +## State Machine Configuration +- Name: MyWorkflow +- ARN: arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow +- Type: STANDARD +- Status: ACTIVE + +## Framework +Serverless Framework + +## Environment +- Region: us-east-1 +- CLI Version: 1.0.0 +` diff --git a/src/commands/stepfunctions/__tests__/flare.test.ts b/src/commands/stepfunctions/__tests__/flare.test.ts new file mode 100644 index 000000000..ca9ab3f6c --- /dev/null +++ b/src/commands/stepfunctions/__tests__/flare.test.ts @@ -0,0 +1,664 @@ +import fs from 'fs' + +import { + CloudWatchLogsClient, + DescribeSubscriptionFiltersCommand, + GetLogEventsCommand, + DescribeLogStreamsCommand, +} from '@aws-sdk/client-cloudwatch-logs' +import { + SFNClient, + DescribeStateMachineCommand, + ListTagsForResourceCommand, + ListExecutionsCommand, + GetExecutionHistoryCommand, + DescribeExecutionCommand, + ExecutionStatus, +} from '@aws-sdk/client-sfn' +import {mockClient} from 'aws-sdk-client-mock' +import upath from 'upath' +import 'aws-sdk-client-mock-jest' + +import {API_KEY_ENV_VAR, CI_API_KEY_ENV_VAR, FLARE_OUTPUT_DIRECTORY} from '../../../constants' +import {deleteFolder} from '../../../helpers/fs' + +import {getAWSCredentials} from '../../lambda/functions/commons' + +import {StepFunctionsFlareCommand} from '../flare' + +import { + stateMachineConfigFixture, + sensitiveStateMachineConfigFixture, + executionsFixture, + sensitiveExecutionFixture, + executionHistoryFixture, + stepFunctionTagsFixture, + logSubscriptionFiltersFixture, + cloudWatchLogsFixture, + MOCK_STATE_MACHINE_ARN, + MOCK_REGION, + MOCK_CASE_ID, + MOCK_EMAIL, + MOCK_API_KEY, + MOCK_OUTPUT_DIR, +} from './fixtures/stepfunctions-flare' + +// Mock the AWS SDK clients +const sfnClientMock = mockClient(SFNClient) +const cloudWatchLogsClientMock = mockClient(CloudWatchLogsClient) + +// Mock the helpers +jest.mock('../../../helpers/flare') +jest.mock('../../../helpers/prompt') +jest.mock('../../lambda/functions/commons') + +describe('StepFunctionsFlareCommand', () => { + let command: StepFunctionsFlareCommand + + // Helper function to set up command with values for unit testing + // This simulates what Clipanion does when parsing command line arguments + const setupCommand = (options: {stateMachineArn?: string; caseId?: string; email?: string; region?: string}) => { + const cmd = new StepFunctionsFlareCommand() + // Override the Option objects with actual values for testing + ;(cmd as any).stateMachineArn = options.stateMachineArn + ;(cmd as any).caseId = options.caseId + ;(cmd as any).email = options.email + ;(cmd as any).region = options.region + // Set up context for commands that use stdout/stderr + cmd.context = { + stdout: {write: jest.fn()}, + stderr: {write: jest.fn()}, + } as any + + return cmd + } + + beforeEach(() => { + // Reset all mocks + jest.resetAllMocks() + sfnClientMock.reset() + cloudWatchLogsClientMock.reset() + + // Set up environment + process.env[CI_API_KEY_ENV_VAR] = MOCK_API_KEY + + // Create command instance for unit tests + command = new StepFunctionsFlareCommand() + }) + + afterEach(() => { + delete process.env[CI_API_KEY_ENV_VAR] + }) + + describe('validateInputs', () => { + it('should return 1 when state machine ARN is missing', async () => { + const cmd = setupCommand({}) + const result = await cmd['validateInputs']() + expect(result).toBe(1) + }) + + it('should return 1 when case ID is missing', async () => { + const cmd = setupCommand({stateMachineArn: MOCK_STATE_MACHINE_ARN}) + const result = await cmd['validateInputs']() + expect(result).toBe(1) + }) + + it('should return 1 when email is missing', async () => { + const cmd = setupCommand({ + stateMachineArn: MOCK_STATE_MACHINE_ARN, + caseId: MOCK_CASE_ID, + }) + const result = await cmd['validateInputs']() + expect(result).toBe(1) + }) + + it('should return 1 when API key is missing', async () => { + delete process.env[CI_API_KEY_ENV_VAR] + delete process.env[API_KEY_ENV_VAR] + const cmd = setupCommand({ + stateMachineArn: MOCK_STATE_MACHINE_ARN, + caseId: MOCK_CASE_ID, + email: MOCK_EMAIL, + }) + const result = await cmd['validateInputs']() + expect(result).toBe(1) + }) + + it('should return 1 when state machine ARN is invalid', async () => { + const cmd = setupCommand({ + stateMachineArn: 'invalid-arn', + caseId: MOCK_CASE_ID, + email: MOCK_EMAIL, + }) + const result = await cmd['validateInputs']() + expect(result).toBe(1) + }) + + it('should return 0 when all required inputs are valid', async () => { + const cmd = setupCommand({ + stateMachineArn: MOCK_STATE_MACHINE_ARN, + caseId: MOCK_CASE_ID, + email: MOCK_EMAIL, + region: MOCK_REGION, + }) + const result = await cmd['validateInputs']() + expect(result).toBe(0) + }) + }) + + describe('getStateMachineConfiguration', () => { + it('should fetch state machine configuration', async () => { + const mockConfig = stateMachineConfigFixture() + sfnClientMock.on(DescribeStateMachineCommand).resolves(mockConfig) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const result = await command['getStateMachineConfiguration'](sfnClient, MOCK_STATE_MACHINE_ARN) + + expect(result).toEqual(mockConfig) + expect(sfnClientMock).toHaveReceivedCommandWith(DescribeStateMachineCommand, { + stateMachineArn: MOCK_STATE_MACHINE_ARN, + includedData: 'ALL_DATA', + }) + }) + + it('should handle errors when fetching configuration', async () => { + sfnClientMock.on(DescribeStateMachineCommand).rejects(new Error('State machine not found')) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + + await expect(command['getStateMachineConfiguration'](sfnClient, MOCK_STATE_MACHINE_ARN)).rejects.toThrow( + 'State machine not found' + ) + }) + }) + + describe('getStateMachineTags', () => { + it('should fetch and format state machine tags', async () => { + const mockTags = stepFunctionTagsFixture() + sfnClientMock.on(ListTagsForResourceCommand).resolves({tags: mockTags}) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const result = await command['getStateMachineTags'](sfnClient, MOCK_STATE_MACHINE_ARN) + + expect(result).toEqual({ + Environment: 'test', + Service: 'payment-processor', + Team: 'platform', + }) + expect(sfnClientMock).toHaveReceivedCommandWith(ListTagsForResourceCommand, { + resourceArn: MOCK_STATE_MACHINE_ARN, + }) + }) + + it('should return empty object when no tags exist', async () => { + sfnClientMock.on(ListTagsForResourceCommand).resolves({tags: []}) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const result = await command['getStateMachineTags'](sfnClient, MOCK_STATE_MACHINE_ARN) + + expect(result).toEqual({}) + }) + }) + + describe('getRecentExecutions', () => { + it('should fetch recent executions with default limit', async () => { + const mockExecutions = executionsFixture() + sfnClientMock.on(ListExecutionsCommand).resolves({executions: mockExecutions}) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const result = await command['getRecentExecutions'](sfnClient, MOCK_STATE_MACHINE_ARN) + + expect(result).toEqual(mockExecutions) + expect(sfnClientMock).toHaveReceivedCommandWith(ListExecutionsCommand, { + stateMachineArn: MOCK_STATE_MACHINE_ARN, + maxResults: 10, + }) + }) + + it('should respect custom maxExecutions parameter', async () => { + command['maxExecutions'] = '5' + const mockExecutions = executionsFixture() + sfnClientMock.on(ListExecutionsCommand).resolves({executions: mockExecutions}) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + await command['getRecentExecutions'](sfnClient, MOCK_STATE_MACHINE_ARN) + + expect(sfnClientMock).toHaveReceivedCommandWith(ListExecutionsCommand, { + stateMachineArn: MOCK_STATE_MACHINE_ARN, + maxResults: 5, + }) + }) + }) + + describe('getExecutionHistory', () => { + it('should fetch execution history events', async () => { + const mockHistory = executionHistoryFixture() + sfnClientMock.on(GetExecutionHistoryCommand).resolves({events: mockHistory}) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const executionArn = 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1' + const result = await command['getExecutionHistory'](sfnClient, executionArn) + + expect(result).toEqual(mockHistory) + expect(sfnClientMock).toHaveReceivedCommandWith(GetExecutionHistoryCommand, { + executionArn, + includeExecutionData: true, + maxResults: 500, + }) + }) + }) + + describe('getLogSubscriptions', () => { + it('should fetch log subscription filters', async () => { + const mockFilters = logSubscriptionFiltersFixture() + cloudWatchLogsClientMock.on(DescribeSubscriptionFiltersCommand).resolves({ + subscriptionFilters: mockFilters, + }) + + const cwClient = new CloudWatchLogsClient({region: MOCK_REGION}) + const logGroupName = '/aws/vendedlogs/states/MyWorkflow-Logs' + const result = await command['getLogSubscriptions'](cwClient, logGroupName) + + expect(result).toEqual({filters: mockFilters, exists: true}) + expect(cloudWatchLogsClientMock).toHaveReceivedCommandWith(DescribeSubscriptionFiltersCommand, { + logGroupName, + }) + }) + + it('should return empty array when log group does not exist', async () => { + cloudWatchLogsClientMock.on(DescribeSubscriptionFiltersCommand).rejects(new Error('ResourceNotFoundException')) + + const cwClient = new CloudWatchLogsClient({region: MOCK_REGION}) + const logGroupName = '/aws/vendedlogs/states/MyWorkflow-Logs' + const result = await command['getLogSubscriptions'](cwClient, logGroupName) + + expect(result).toEqual({filters: [], exists: false}) + }) + }) + + describe('getCloudWatchLogs', () => { + it('should fetch and organize CloudWatch logs', async () => { + const mockLogs = cloudWatchLogsFixture() + cloudWatchLogsClientMock + .on(DescribeLogStreamsCommand) + .resolves({ + logStreams: [{logStreamName: 'stream1'}, {logStreamName: 'stream2'}], + }) + .on(GetLogEventsCommand) + .resolves({events: mockLogs}) + + const cwClient = new CloudWatchLogsClient({region: MOCK_REGION}) + const logGroupName = '/aws/vendedlogs/states/MyWorkflow-Logs' + const result = await command['getCloudWatchLogs'](cwClient, logGroupName) + + expect(result).toBeInstanceOf(Map) + expect(result.size).toBeGreaterThan(0) + }) + }) + + describe('maskStateMachineConfig', () => { + it('should mask sensitive data in state machine configuration', () => { + const sensitiveConfig = sensitiveStateMachineConfigFixture() + const maskedConfig = command['maskStateMachineConfig'](sensitiveConfig) + + // Verify that sensitive data is masked + const maskedDefinition = JSON.parse(maskedConfig.definition!) + expect(maskedDefinition.States.ProcessPayment.Parameters.SecretToken).not.toBe('secret-12345-token') + expect(maskedDefinition.States.ProcessPayment.Parameters.DatabasePassword).not.toBe('super-secret-password') + }) + }) + + describe('maskExecutionData', () => { + it('should mask sensitive execution input/output', () => { + const sensitiveExecution = sensitiveExecutionFixture() + const maskedExecution = command['maskExecutionData'](sensitiveExecution) + + // Verify that input and output are masked + expect(maskedExecution.input).not.toContain('4111-1111-1111-1111') + expect(maskedExecution.output).not.toContain('Bearer secret-token') + }) + }) + + describe('generateInsightsFile', () => { + it('should generate insights file with correct content', () => { + const mockConfig = stateMachineConfigFixture() + const filePath = upath.join(MOCK_OUTPUT_DIR, 'INSIGHTS.md') + + // Create the directory if it doesn't exist + if (!fs.existsSync(MOCK_OUTPUT_DIR)) { + fs.mkdirSync(MOCK_OUTPUT_DIR, {recursive: true}) + } + + // Set up command with state machine ARN for region extraction + command = setupCommand({ + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + }) + + const mockTags = { + DD_TRACE_ENABLED: 'true', + DD_TRACE_SAMPLE_RATE: '1', + Environment: 'production', + Team: 'platform', + } + + command['generateInsightsFile'](filePath, false, mockConfig, mockTags, undefined, true) + + // Read the file and check its content + const content = fs.readFileSync(filePath, 'utf8') + expect(content).toContain('Step Functions Flare Insights') + expect(content).toContain('MyWorkflow') + + // Clean up + deleteFolder(MOCK_OUTPUT_DIR) + }) + + it('should detect configuration issues', () => { + // Create a config with issues + const mockConfig = { + ...stateMachineConfigFixture(), + loggingConfiguration: { + level: 'ERROR' as any, // Should be 'ALL' + includeExecutionData: false, // Should be true + destinations: [], + }, + tracingConfiguration: { + enabled: true, // X-Ray is duplicative + }, + } + + const mockTags = { + DD_TRACE_ENABLED: 'false', // Should be 'true' + DD_TRACE_SAMPLE_RATE: '2.5', // Invalid - should be 0-1 + } + + const filePath = upath.join(MOCK_OUTPUT_DIR, 'INSIGHTS_ISSUES.md') + + // Create the directory if it doesn't exist + if (!fs.existsSync(MOCK_OUTPUT_DIR)) { + fs.mkdirSync(MOCK_OUTPUT_DIR, {recursive: true}) + } + + command = setupCommand({ + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + }) + + command['generateInsightsFile'](filePath, false, mockConfig, mockTags, [], true) + + const content = fs.readFileSync(filePath, 'utf8') + + // Check for configuration issues + expect(content).toContain('Configuration Analysis') + expect(content).toContain('Configuration Issues') + expect(content).toContain('Log level must be set to "ALL"') + expect(content).toContain('Include Execution Data must be enabled') + expect(content).toContain('Missing Datadog log integration') + expect(content).toContain('DD_TRACE_ENABLED must be set to true') + + // Check for warnings + expect(content).toContain('Warnings') + expect(content).toContain('X-Ray tracing is enabled') + expect(content).toContain('DD_TRACE_SAMPLE_RATE has invalid value') + + // Clean up + deleteFolder(MOCK_OUTPUT_DIR) + }) + + it('should detect when log group does not exist', () => { + const mockConfig = { + ...stateMachineConfigFixture(), + loggingConfiguration: { + level: 'ALL' as any, + includeExecutionData: true, + destinations: [ + { + cloudWatchLogsLogGroup: { + logGroupArn: 'arn:aws:logs:us-east-1:123456789012:log-group:/aws/vendedlogs/states/TestStateMachine:*', + }, + }, + ], + }, + } + + const mockTags = { + DD_TRACE_ENABLED: 'true', + } + + const filePath = upath.join(MOCK_OUTPUT_DIR, 'INSIGHTS_NO_LOG_GROUP.md') + + if (!fs.existsSync(MOCK_OUTPUT_DIR)) { + fs.mkdirSync(MOCK_OUTPUT_DIR, {recursive: true}) + } + + command = setupCommand({ + stateMachineArn: 'arn:aws:states:us-east-1:123456789012:stateMachine:MyWorkflow', + }) + + // Call with logGroupExists = false + command['generateInsightsFile'](filePath, false, mockConfig, mockTags, [], false) + + const content = fs.readFileSync(filePath, 'utf8') + + // Check for log group not found issue + expect(content).toContain('Configuration Issues') + expect(content).toContain('Log group does not exist') + expect(content).toContain('/aws/vendedlogs/states/TestStateMachine') + expect(content).not.toContain('Missing Datadog log integration') + + // Clean up + deleteFolder(MOCK_OUTPUT_DIR) + }) + }) + + describe('getFramework', () => { + it('should detect frameworks based on files', () => { + // Since getFramework reads from process.cwd(), we can't easily test it + // without mocking. Let's just test that it returns a string + const framework = command['getFramework']() + expect(typeof framework).toBe('string') + }) + }) + + describe('createOutputDirectory', () => { + it('should create output directory structure', async () => { + // Set up command with stateMachineArn + const cmd = setupCommand({ + stateMachineArn: MOCK_STATE_MACHINE_ARN, + }) + + const outputDir = await cmd['createOutputDirectory']() + + expect(outputDir).toContain(FLARE_OUTPUT_DIRECTORY) + expect(outputDir).toContain('stepfunctions-MyWorkflow-') + expect(fs.existsSync(outputDir)).toBe(true) + + // Clean up + deleteFolder(FLARE_OUTPUT_DIRECTORY) + }) + }) + + describe('writeOutputFiles', () => { + it('should write all output files', async () => { + const mockData = { + config: stateMachineConfigFixture(), + tags: {Environment: 'test'}, + executions: executionsFixture(), + subscriptionFilters: logSubscriptionFiltersFixture(), + logs: new Map([['stream1', cloudWatchLogsFixture()]]), + } + + // Create test directory + if (!fs.existsSync(MOCK_OUTPUT_DIR)) { + fs.mkdirSync(MOCK_OUTPUT_DIR, {recursive: true}) + } + + await command['writeOutputFiles'](MOCK_OUTPUT_DIR, mockData) + + // Check that files were created + expect(fs.existsSync(upath.join(MOCK_OUTPUT_DIR, 'state_machine_config.json'))).toBe(true) + expect(fs.existsSync(upath.join(MOCK_OUTPUT_DIR, 'tags.json'))).toBe(true) + expect(fs.existsSync(upath.join(MOCK_OUTPUT_DIR, 'recent_executions.json'))).toBe(true) + expect(fs.existsSync(upath.join(MOCK_OUTPUT_DIR, 'log_subscription_filters.json'))).toBe(true) + expect(fs.existsSync(upath.join(MOCK_OUTPUT_DIR, 'logs'))).toBe(true) + + // Clean up + deleteFolder(MOCK_OUTPUT_DIR) + }) + }) + + describe('parseStateMachineArn', () => { + it('should correctly parse state machine ARN', () => { + const parsed = command['parseStateMachineArn'](MOCK_STATE_MACHINE_ARN) + + expect(parsed).toEqual({ + region: 'us-east-1', + name: 'MyWorkflow', + }) + }) + }) + + describe('getLogGroupName', () => { + it('should extract log group name from configuration', () => { + const mockConfig = stateMachineConfigFixture() + const logGroupName = command['getLogGroupName'](mockConfig) + + expect(logGroupName).toBe('/aws/vendedlogs/states/MyWorkflow-Logs') + }) + + it('should return undefined when no logging configuration', () => { + const mockConfig = stateMachineConfigFixture() + mockConfig.loggingConfiguration = undefined + + const logGroupName = command['getLogGroupName'](mockConfig) + + expect(logGroupName).toBeUndefined() + }) + }) + + describe('maskAslDefinition', () => { + it('should mask sensitive fields in ASL definition', () => { + const sensitiveAsl = JSON.stringify({ + States: { + ProcessPayment: { + Parameters: { + ApiKey: 'secret-api-key', + Password: 'secret-password', + }, + }, + }, + }) + + const maskedAsl = command['maskAslDefinition'](sensitiveAsl) + const parsed = JSON.parse(maskedAsl) + + expect(parsed.States.ProcessPayment.Parameters.ApiKey).not.toBe('secret-api-key') + expect(parsed.States.ProcessPayment.Parameters.Password).not.toBe('secret-password') + }) + }) + + describe('getExecutionDetails', () => { + it('should fetch detailed execution information', async () => { + const mockExecutionDetails = { + executionArn: 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1', + status: ExecutionStatus.SUCCEEDED, + input: '{"orderId": "12345"}', + output: '{"result": "success"}', + } + + sfnClientMock.on(DescribeExecutionCommand).resolves(mockExecutionDetails) + + const sfnClient = new SFNClient({region: MOCK_REGION}) + const result = await command['getExecutionDetails']( + sfnClient, + 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1' + ) + + expect(result).toEqual(mockExecutionDetails) + }) + }) + + describe('execute', () => { + let context: any + + beforeEach(() => { + // Create command with context + context = { + stdout: {write: jest.fn()}, + stderr: {write: jest.fn()}, + } + command.context = context + + // Set command options + ;(command as any).stateMachineArn = MOCK_STATE_MACHINE_ARN + ;(command as any).caseId = MOCK_CASE_ID + ;(command as any).email = MOCK_EMAIL + ;(command as any).region = MOCK_REGION + ;(command as any).isDryRun = true + ;(command as any).withLogs = false + }) + + it('should successfully execute in dry run mode', async () => { + // Mock AWS credentials + ;(getAWSCredentials as jest.Mock).mockResolvedValue({ + accessKeyId: 'test-access-key', + secretAccessKey: 'test-secret-key', + }) + + // Mock AWS responses + sfnClientMock.on(DescribeStateMachineCommand).resolves(stateMachineConfigFixture()) + sfnClientMock.on(ListTagsForResourceCommand).resolves({tags: stepFunctionTagsFixture()}) + sfnClientMock.on(ListExecutionsCommand).resolves({executions: executionsFixture()}) + sfnClientMock.on(DescribeExecutionCommand).resolves({ + executionArn: 'arn:aws:states:us-east-1:123456789012:execution:MyWorkflow:execution1', + status: 'SUCCEEDED', + input: '{"orderId": "12345"}', + output: '{"result": "success"}', + }) + sfnClientMock.on(GetExecutionHistoryCommand).resolves({events: executionHistoryFixture()}) + + // Mock CloudWatch logs responses + cloudWatchLogsClientMock.on(DescribeSubscriptionFiltersCommand).resolves({ + subscriptionFilters: logSubscriptionFiltersFixture(), + }) + + // No need to mock fs anymore + + const result = await command.execute() + + expect(result).toBe(0) + expect(context.stdout.write).toHaveBeenCalledWith(expect.stringContaining('Collecting Step Functions flare data')) + expect(context.stdout.write).toHaveBeenCalledWith( + expect.stringContaining('The flare files were not sent because the command was executed in dry run mode') + ) + expect(context.stdout.write).toHaveBeenCalledWith(expect.stringContaining('Your output files are located at')) + + // Clean up + if (fs.existsSync(FLARE_OUTPUT_DIRECTORY)) { + deleteFolder(FLARE_OUTPUT_DIRECTORY) + } + }) + + it('should handle missing required parameters', async () => { + ;(command as any).stateMachineArn = undefined + + const result = await command.execute() + + expect(result).toBe(1) + expect(context.stderr.write).toHaveBeenCalledWith(expect.stringContaining('No state machine ARN specified')) + }) + + it('should handle AWS API errors gracefully', async () => { + // Mock AWS credentials + ;(getAWSCredentials as jest.Mock).mockResolvedValue({ + accessKeyId: 'test-access-key', + secretAccessKey: 'test-secret-key', + }) + + sfnClientMock.on(DescribeStateMachineCommand).rejects(new Error('State machine not found')) + + const result = await command.execute() + + expect(result).toBe(1) + expect(context.stderr.write).toHaveBeenCalledWith(expect.stringContaining('Error collecting flare data')) + }) + }) +}) diff --git a/src/commands/stepfunctions/cli.ts b/src/commands/stepfunctions/cli.ts index 145f02b15..34e32bbaa 100644 --- a/src/commands/stepfunctions/cli.ts +++ b/src/commands/stepfunctions/cli.ts @@ -1,4 +1,5 @@ +import {StepFunctionsFlareCommand} from './flare' import {InstrumentStepFunctionsCommand} from './instrument' import {UninstrumentStepFunctionsCommand} from './uninstrument' -module.exports = [InstrumentStepFunctionsCommand, UninstrumentStepFunctionsCommand] +module.exports = [InstrumentStepFunctionsCommand, UninstrumentStepFunctionsCommand, StepFunctionsFlareCommand] diff --git a/src/commands/stepfunctions/flare.ts b/src/commands/stepfunctions/flare.ts new file mode 100644 index 000000000..961c76c46 --- /dev/null +++ b/src/commands/stepfunctions/flare.ts @@ -0,0 +1,996 @@ +import * as fs from 'fs' + +import { + CloudWatchLogsClient, + DescribeLogStreamsCommand, + DescribeSubscriptionFiltersCommand, + GetLogEventsCommand, + OutputLogEvent, + SubscriptionFilter, +} from '@aws-sdk/client-cloudwatch-logs' +import { + DescribeExecutionCommand, + DescribeStateMachineCommand, + DescribeStateMachineCommandOutput, + ExecutionListItem, + GetExecutionHistoryCommand, + HistoryEvent, + ListExecutionsCommand, + ListTagsForResourceCommand, + SFNClient, +} from '@aws-sdk/client-sfn' +import {AwsCredentialIdentity} from '@aws-sdk/types' +import chalk from 'chalk' +import {Command, Option} from 'clipanion' + +import { + API_KEY_ENV_VAR, + CI_API_KEY_ENV_VAR, + FIPS_ENV_VAR, + FIPS_IGNORE_ERROR_ENV_VAR, + FLARE_OUTPUT_DIRECTORY, +} from '../../constants' +import {toBoolean} from '../../helpers/env' +import {enableFips} from '../../helpers/fips' +import {sendToDatadog} from '../../helpers/flare' +import {createDirectories, deleteFolder, writeFile, zipContents} from '../../helpers/fs' +import {requestConfirmation} from '../../helpers/prompt' +import * as helpersRenderer from '../../helpers/renderer' +import {version} from '../../helpers/version' + +import {getAWSCredentials} from '../lambda/functions/commons' + +export class StepFunctionsFlareCommand extends Command { + public static paths = [['stepfunctions', 'flare']] + + public static usage = Command.Usage({ + category: 'Serverless', + description: + 'Gather state machine configuration, execution history, logs, and project files for Datadog support troubleshooting.', + }) + + // CLI Options + private isDryRun = Option.Boolean('-d,--dry,--dry-run', false) + private withLogs = Option.Boolean('--with-logs', false) + private stateMachineArn = Option.String('-s,--state-machine') + private caseId = Option.String('-c,--case-id') + private email = Option.String('-e,--email') + private start = Option.String('--start') + private end = Option.String('--end') + private maxExecutions = Option.String('--max-executions', '10') + + private apiKey?: string + private credentials?: AwsCredentialIdentity + + private fips = Option.Boolean('--fips', false) + private fipsIgnoreError = Option.Boolean('--fips-ignore-error', false) + private config = { + fips: toBoolean(process.env[FIPS_ENV_VAR]) ?? false, + fipsIgnoreError: toBoolean(process.env[FIPS_IGNORE_ERROR_ENV_VAR]) ?? false, + } + + /** + * Entry point for the `stepfunctions flare` command. + * Gathers state machine configuration, execution history, logs, and project files + * for Datadog support troubleshooting. + * @returns 0 if the command ran successfully, 1 otherwise. + */ + public async execute(): Promise<0 | 1> { + // Enable FIPS if configured + enableFips(this.fips || this.config.fips, this.fipsIgnoreError || this.config.fipsIgnoreError) + + this.context.stdout.write(helpersRenderer.renderFlareHeader('Step Functions', this.isDryRun)) + + // Validate inputs + const validationResult = await this.validateInputs() + if (validationResult !== 0) { + return validationResult + } + + try { + // Get AWS credentials + this.context.stdout.write(chalk.bold('\nšŸ”‘ Getting AWS credentials...\n')) + try { + this.credentials = await getAWSCredentials() + } catch (err) { + if (err instanceof Error) { + this.context.stderr.write(helpersRenderer.renderError(err.message)) + } + + return 1 + } + + // Parse ARN to get region + const {region} = this.parseStateMachineArn(this.stateMachineArn!) + + // Create AWS clients + const sfnClient = new SFNClient({region, credentials: this.credentials}) + const cloudWatchLogsClient = new CloudWatchLogsClient({region, credentials: this.credentials}) + + this.context.stdout.write(chalk.bold('\nšŸ” Collecting Step Functions flare data...\n')) + + // 1. Get state machine configuration + this.context.stdout.write('šŸ“‹ Fetching state machine configuration...\n') + const stateMachineConfig = await this.getStateMachineConfiguration(sfnClient, this.stateMachineArn!) + const maskedConfig = this.maskStateMachineConfig(stateMachineConfig) + + // 2. Get state machine tags + this.context.stdout.write('šŸ”– Getting resource tags...\n') + const tags = await this.getStateMachineTags(sfnClient, this.stateMachineArn!) + + // 3. Get recent executions + this.context.stdout.write('šŸ“Š Fetching recent executions...\n') + + const executions = await this.getRecentExecutions(sfnClient, this.stateMachineArn!) + + // Mask sensitive data in executions + const maskedExecutions = executions.map((exec) => this.maskExecutionData(exec)) + + // 4. Get execution details and history for each execution + this.context.stdout.write('šŸ“œ Fetching execution details and history...\n') + + for (const execution of executions.slice(0, 5)) { + // Limit to 5 most recent + if (execution.executionArn) { + const details = await this.getExecutionDetails(sfnClient, execution.executionArn) + const maskedDetails = this.maskExecutionData(details) + // Add details to execution object + Object.assign(execution, maskedDetails) + + // Get execution history + const history = await this.getExecutionHistory(sfnClient, execution.executionArn) + ;(execution as any).history = history + } + } + + // 5. Get log subscription filters (always collected) + let subscriptionFilters: SubscriptionFilter[] | undefined + let logGroupExists = true + const logGroupName = this.getLogGroupName(stateMachineConfig) + if (logGroupName) { + this.context.stdout.write('šŸ” Getting log subscription filters...\n') + const result = await this.getLogSubscriptions(cloudWatchLogsClient, logGroupName) + subscriptionFilters = result.filters + logGroupExists = result.exists + } + + // 6. Get CloudWatch logs if enabled + let logs: Map | undefined + if (this.withLogs && logGroupName) { + this.context.stdout.write('ā˜ļø Getting CloudWatch logs...\n') + const startTime = this.start ? new Date(this.start).getTime() : undefined + const endTime = this.end ? new Date(this.end).getTime() : undefined + logs = await this.getCloudWatchLogs(cloudWatchLogsClient, logGroupName, startTime, endTime) + if (!logs || logs.size === 0) { + this.context.stdout.write(' No logs found in the specified time range\n') + } else { + this.context.stdout.write(` Found logs from ${logs.size} log streams\n`) + } + } + + // 7. Create output directory + this.context.stdout.write(chalk.bold('\nšŸ’¾ Saving files...\n')) + const outputDir = await this.createOutputDirectory() + + // 8. Generate insights file + const insightsPath = `${outputDir}/INSIGHTS.md` + this.generateInsightsFile(insightsPath, this.isDryRun, maskedConfig, tags, subscriptionFilters, logGroupExists) + + // 9. Write all output files + await this.writeOutputFiles(outputDir, { + config: maskedConfig, + tags, + executions: maskedExecutions, + subscriptionFilters, + logs, + }) + + // 10. Create zip archive + const zipPath = `${outputDir}.zip` + await zipContents(outputDir, zipPath) + + // 11. Send to Datadog or show dry-run message + if (this.isDryRun) { + this.context.stdout.write( + '\n🚫 The flare files were not sent because the command was executed in dry run mode.\n' + ) + this.context.stdout.write(`\nšŸ“ Your output files are located at: ${outputDir}\n`) + this.context.stdout.write(`šŸ“¦ Zip file created at: ${zipPath}\n`) + + return 0 + } + + // Confirm before sending + this.context.stdout.write('\n') + const confirmSendFiles = await requestConfirmation( + 'Are you sure you want to send the flare file to Datadog Support?', + false + ) + + if (!confirmSendFiles) { + this.context.stdout.write('\n🚫 The flare files were not sent based on your selection.') + this.context.stdout.write(`\nšŸ“ Your output files are located at: ${outputDir}\n`) + this.context.stdout.write(`šŸ“¦ Zip file created at: ${zipPath}\n`) + + return 0 + } + + // Send to Datadog + this.context.stdout.write(chalk.bold('\nšŸš€ Sending to Datadog Support...\n')) + await sendToDatadog(zipPath, this.caseId!, this.email!, this.apiKey!, outputDir) + this.context.stdout.write(chalk.bold('\nāœ… Successfully sent flare file to Datadog Support!\n')) + + // Delete contents + deleteFolder(outputDir) + fs.unlinkSync(zipPath) + + return 0 + } catch (error) { + this.context.stderr.write( + `\nError collecting flare data: ${error instanceof Error ? error.message : String(error)}\n` + ) + + return 1 + } + } + + /** + * Validates required inputs for the flare command + * @returns 0 if all inputs are valid, 1 otherwise + */ + private async validateInputs(): Promise<0 | 1> { + const errorMessages: string[] = [] + + // Validate state machine ARN + if (this.stateMachineArn === undefined) { + errorMessages.push(helpersRenderer.renderError('No state machine ARN specified. [-s,--state-machine]')) + } else { + // Validate ARN format + const arnPattern = /^arn:aws:states:[a-z0-9-]+:\d{12}:stateMachine:[a-zA-Z0-9-_]+$/ + if (!arnPattern.test(this.stateMachineArn)) { + errorMessages.push(helpersRenderer.renderError('Invalid state machine ARN format.')) + } + } + + // Validate case ID + if (this.caseId === undefined) { + errorMessages.push(helpersRenderer.renderError('No case ID specified. [-c,--case-id]')) + } + + // Validate email + if (this.email === undefined) { + errorMessages.push(helpersRenderer.renderError('No email specified. [-e,--email]')) + } + + // Validate API key + this.apiKey = process.env[CI_API_KEY_ENV_VAR] ?? process.env[API_KEY_ENV_VAR] + if (this.apiKey === undefined) { + errorMessages.push( + helpersRenderer.renderError( + 'No Datadog API key specified. Set an API key with the DATADOG_API_KEY environment variable.' + ) + ) + } + + if (errorMessages.length > 0) { + for (const message of errorMessages) { + this.context.stderr.write(message) + } + + return 1 + } + + return 0 + } + + /** + * Fetches the state machine configuration from AWS + * @param sfnClient Step Functions client + * @param stateMachineArn ARN of the state machine + * @returns State machine configuration + */ + private async getStateMachineConfiguration( + sfnClient: SFNClient, + stateMachineArn: string + ): Promise { + const command = new DescribeStateMachineCommand({ + stateMachineArn, + includedData: 'ALL_DATA', + }) + + return sfnClient.send(command) + } + + /** + * Fetches tags associated with the state machine + * @param sfnClient Step Functions client + * @param stateMachineArn ARN of the state machine + * @returns Map of tag keys to values + */ + private async getStateMachineTags(sfnClient: SFNClient, stateMachineArn: string): Promise> { + const command = new ListTagsForResourceCommand({ + resourceArn: stateMachineArn, + }) + const response = await sfnClient.send(command) + const tags: Record = {} + if (response.tags) { + for (const tag of response.tags) { + if (tag.key && tag.value) { + tags[tag.key] = tag.value + } + } + } + + return tags + } + + /** + * Fetches recent executions of the state machine + * @param sfnClient Step Functions client + * @param stateMachineArn ARN of the state machine + * @returns List of recent executions + */ + private async getRecentExecutions(sfnClient: SFNClient, stateMachineArn: string): Promise { + // Handle both direct string values (from tests) and Option objects (from CLI) + const maxExecutionsValue = typeof this.maxExecutions === 'string' ? this.maxExecutions : '10' + const maxResults = parseInt(maxExecutionsValue, 10) + const command = new ListExecutionsCommand({ + stateMachineArn, + maxResults, + }) + const response = await sfnClient.send(command) + + return response.executions ?? [] + } + + /** + * Fetches the execution history for a specific execution + * @param sfnClient Step Functions client + * @param executionArn ARN of the execution + * @returns List of history events + */ + private async getExecutionHistory(sfnClient: SFNClient, executionArn: string): Promise { + const command = new GetExecutionHistoryCommand({ + executionArn, + includeExecutionData: true, + maxResults: 500, + }) + const response = await sfnClient.send(command) + + return response.events ?? [] + } + + /** + * Fetches CloudWatch log subscription filters for a log group + * @param cloudWatchLogsClient CloudWatch Logs client + * @param logGroupName Name of the log group + * @returns Object with subscription filters and whether the log group exists + */ + private async getLogSubscriptions( + cloudWatchLogsClient: CloudWatchLogsClient, + logGroupName: string + ): Promise<{filters: SubscriptionFilter[]; exists: boolean}> { + try { + const command = new DescribeSubscriptionFiltersCommand({ + logGroupName, + }) + const response = await cloudWatchLogsClient.send(command) + + return { + filters: response.subscriptionFilters ?? [], + exists: true, + } + } catch (error) { + // If log group doesn't exist, return empty array with exists=false + if ( + error instanceof Error && + (error.message.includes('ResourceNotFoundException') || + error.message.includes('specified log group does not exist')) + ) { + return { + filters: [], + exists: false, + } + } + throw error + } + } + + /** + * Fetches CloudWatch logs from a log group + * @param cloudWatchLogsClient CloudWatch Logs client + * @param logGroupName Name of the log group + * @param startTime Start time in milliseconds (optional) + * @param endTime End time in milliseconds (optional) + * @returns Map of log stream names to their log events + */ + private async getCloudWatchLogs( + cloudWatchLogsClient: CloudWatchLogsClient, + logGroupName: string, + startTime?: number, + endTime?: number + ): Promise> { + const logs = new Map() + + try { + // Get log streams + const describeStreamsCommand = new DescribeLogStreamsCommand({ + logGroupName, + orderBy: 'LastEventTime', + descending: true, + limit: 50, + }) + const streamsResponse = await cloudWatchLogsClient.send(describeStreamsCommand) + const logStreams = streamsResponse.logStreams ?? [] + + // Get logs from each stream + for (const stream of logStreams) { + if (!stream.logStreamName) { + continue + } + + const getLogsCommand = new GetLogEventsCommand({ + logGroupName, + logStreamName: stream.logStreamName, + startTime, + endTime, + limit: 1000, + }) + + const logsResponse = await cloudWatchLogsClient.send(getLogsCommand) + if (logsResponse.events && logsResponse.events.length > 0) { + logs.set(stream.logStreamName, logsResponse.events) + } + } + + return logs + } catch (error) { + // If log group doesn't exist, return empty map + if ( + error instanceof Error && + (error.message.includes('ResourceNotFoundException') || + error.message.includes('specified log group does not exist')) + ) { + return logs + } + throw error + } + } + + /** + * Masks sensitive data in state machine configuration + * @param config State machine configuration + * @returns Configuration with sensitive data masked + */ + private maskStateMachineConfig(config: DescribeStateMachineCommandOutput): DescribeStateMachineCommandOutput { + const maskedConfig = {...config} + + if (maskedConfig.definition) { + maskedConfig.definition = this.maskAslDefinition(maskedConfig.definition) + } + + return maskedConfig + } + + /** + * Masks sensitive data in execution data + * @param execution Execution data object + * @returns Execution data with sensitive fields masked + */ + private maskExecutionData(execution: any): any { + const maskedExecution = {...execution} + + // Mask sensitive data in input and output + if (maskedExecution.input) { + maskedExecution.input = this.maskJsonString(maskedExecution.input) + } + + if (maskedExecution.output) { + maskedExecution.output = this.maskJsonString(maskedExecution.output) + } + + return maskedExecution + } + + private maskJsonString(jsonString: string): string { + try { + const data = JSON.parse(jsonString) + const masked = this.maskSensitiveData(data) + + return JSON.stringify(masked, undefined, 2) + } catch { + // If not valid JSON, return as-is + return jsonString + } + } + + private maskSensitiveData(data: any): any { + if (typeof data !== 'object' || data === undefined) { + return data + } + + if (Array.isArray(data)) { + return data.map((item) => this.maskSensitiveData(item)) + } + + const masked: any = {} + const sensitiveKeys = [ + 'password', + 'secret', + 'token', + 'key', + 'apikey', + 'api_key', + 'access_token', + 'refresh_token', + 'private_key', + 'credential', + 'creditcard', + 'credit_card', + 'ssn', + 'cvv', + 'pin', + ] + + for (const [key, value] of Object.entries(data)) { + const lowerKey = key.toLowerCase() + if (sensitiveKeys.some((sensitive) => lowerKey.includes(sensitive))) { + masked[key] = '[REDACTED]' + } else { + masked[key] = this.maskSensitiveData(value) + } + } + + return masked + } + + /** + * Generates the insights markdown file with state machine information + * @param filePath Path to write the insights file + * @param isDryRun Whether this is a dry run + * @param config State machine configuration + * @param tags State machine tags + * @param subscriptionFilters CloudWatch log subscription filters (optional) + */ + private generateInsightsFile( + filePath: string, + isDryRun: boolean, + config: DescribeStateMachineCommandOutput, + tags: Record, + subscriptionFilters?: SubscriptionFilter[], + logGroupExists = true + ): void { + const lines: string[] = [] + + // Header + lines.push('# Step Functions Flare Insights') + lines.push('\n_Autogenerated file from `stepfunctions flare`_ ') + if (isDryRun) { + lines.push('_This command was run in dry mode._') + } + + // Configuration Analysis - FIRST SECTION + lines.push('\n## Configuration Analysis') + const issues: string[] = [] + const warnings: string[] = [] + const recommendations: string[] = [] + + // Check log level + if (!config.loggingConfiguration || config.loggingConfiguration.level !== 'ALL') { + issues.push('**Log level must be set to "ALL"** for complete observability') + } + + // Check includeExecutionData + if (!config.loggingConfiguration || !config.loggingConfiguration.includeExecutionData) { + issues.push('**Include Execution Data must be enabled** to capture input/output in logs') + } + + // Check X-Ray tracing + if (config.tracingConfiguration?.enabled) { + warnings.push('**X-Ray tracing is enabled** - This is duplicative with Datadog tracing and will increase costs') + } + + // Check log subscriptions + const logGroupName = this.getLogGroupName(config) + if (logGroupName && !logGroupExists) { + issues.push( + `**Log group does not exist** - The configured log group \`${logGroupName}\` was not found. Create the log group or update the state machine logging configuration.` + ) + } else { + let hasDatadogIntegration = false + if (subscriptionFilters && subscriptionFilters.length > 0) { + for (const filter of subscriptionFilters) { + if (filter.destinationArn) { + // Check for Lambda forwarder + if ( + filter.destinationArn.includes(':lambda:') && + (filter.destinationArn.includes('datadog') || filter.destinationArn.includes('Datadog')) + ) { + hasDatadogIntegration = true + } + // Check for Kinesis Firehose + if ( + filter.destinationArn.includes(':firehose:') && + (filter.destinationArn.includes('datadog') || filter.destinationArn.includes('Datadog')) + ) { + hasDatadogIntegration = true + } + } + } + } + + if (logGroupExists && !hasDatadogIntegration) { + issues.push( + '**Missing Datadog log integration** - No log subscription filter found for Datadog Lambda forwarder or Kinesis Firehose' + ) + } + } + + // Check Datadog tags + const ddTraceEnabled = tags['DD_TRACE_ENABLED'] || tags['dd_trace_enabled'] + const ddTraceSampleRate = tags['DD_TRACE_SAMPLE_RATE'] || tags['dd_trace_sample_rate'] + + if (!ddTraceEnabled || (ddTraceEnabled.toLowerCase() !== 'true' && ddTraceEnabled !== '1')) { + issues.push('**DD_TRACE_ENABLED must be set to true** on either the Step Function tags or the Datadog forwarder') + } + + if (ddTraceSampleRate) { + const sampleRate = parseFloat(ddTraceSampleRate) + if (isNaN(sampleRate) || sampleRate < 0 || sampleRate > 1) { + warnings.push( + `**DD_TRACE_SAMPLE_RATE has invalid value: "${ddTraceSampleRate}"** - Must be a decimal between 0 and 1` + ) + } else if (sampleRate < 1) { + recommendations.push( + `**Consider setting DD_TRACE_SAMPLE_RATE to 1** for troubleshooting (current: ${sampleRate})` + ) + } + } + // Note: Not mentioning DD_TRACE_SAMPLE_RATE if absent since default is 1 + + // Output analysis results + if (issues.length > 0) { + lines.push('\n### Configuration Issues') + for (const issue of issues) { + lines.push(`- ${issue}`) + } + } + + if (warnings.length > 0) { + lines.push('\n### Warnings') + for (const warning of warnings) { + lines.push(`- ${warning}`) + } + } + + if (recommendations.length > 0) { + lines.push('\n### Recommendations') + for (const rec of recommendations) { + lines.push(`- ${rec}`) + } + } + + if (issues.length === 0 && warnings.length === 0) { + lines.push('\n**Configuration looks good!** All requirements are met.') + } + + // State Machine Configuration + lines.push('\n## State Machine Configuration') + lines.push(`**Name**: \`${config.name || 'Unknown'}\` `) + lines.push(`**ARN**: \`${config.stateMachineArn || 'Unknown'}\` `) + lines.push(`**Type**: \`${config.type || 'Unknown'}\` `) + lines.push(`**Status**: \`${config.status || 'Unknown'}\` `) + lines.push(`**Role ARN**: \`${config.roleArn || 'Not specified'}\` `) + lines.push(`**Creation Date**: \`${config.creationDate?.toISOString() || 'Unknown'}\` `) + + // Logging Configuration + lines.push('\n**Logging Configuration**:') + if (config.loggingConfiguration) { + lines.push(`- Level: \`${config.loggingConfiguration.level || 'Not specified'}\``) + lines.push(`- Include Execution Data: \`${config.loggingConfiguration.includeExecutionData || false}\``) + if (config.loggingConfiguration.destinations?.length) { + lines.push('- Destinations:') + for (const dest of config.loggingConfiguration.destinations) { + if (dest.cloudWatchLogsLogGroup?.logGroupArn) { + lines.push(` - CloudWatch Logs: \`${dest.cloudWatchLogsLogGroup.logGroupArn}\``) + } + } + } + } else { + lines.push('- Logging not configured') + } + + // Tracing Configuration + lines.push('\n**Tracing Configuration**:') + lines.push(`- X-Ray Tracing: \`${config.tracingConfiguration?.enabled ? 'Enabled' : 'Disabled'}\``) + + // Tags Section - As a subsection of State Machine Configuration + lines.push('\n### Tags') + const tagKeys = Object.keys(tags).sort() + if (tagKeys.length > 0) { + lines.push(`**Total Tags**: ${tagKeys.length}`) + lines.push('') + + // Separate Datadog tags from other tags + const ddTags = tagKeys.filter((key) => key.toUpperCase().startsWith('DD_')) + const otherTags = tagKeys.filter((key) => !key.toUpperCase().startsWith('DD_')) + + if (ddTags.length > 0) { + lines.push('### Datadog Tags') + for (const key of ddTags) { + lines.push(`- **${key}**: \`${tags[key]}\``) + } + } + + if (otherTags.length > 0) { + lines.push(ddTags.length > 0 ? '\n### Other Tags' : '### All Tags') + for (const key of otherTags) { + lines.push(`- **${key}**: \`${tags[key]}\``) + } + } + } else { + lines.push('No tags found on this state machine.') + } + + // CLI Information + lines.push('\n## CLI Information') + lines.push(`**Run Location**: \`${process.cwd()}\` `) + lines.push(`**CLI Version**: \`${version}\` `) + const timeString = new Date().toISOString().replace('T', ' ').replace('Z', '') + ' UTC' + lines.push(`**Timestamp**: \`${timeString}\` `) + lines.push(`**Framework**: \`${this.getFramework()}\``) + + // Command Options + lines.push('\n## Command Options') + const {region} = this.parseStateMachineArn(this.stateMachineArn!) + lines.push(`**Region**: \`${region}\` `) + lines.push(`**Max Executions**: \`${typeof this.maxExecutions === 'string' ? this.maxExecutions : '10'}\` `) + lines.push(`**With Logs**: \`${this.withLogs ? 'Yes' : 'No'}\` `) + if (this.start || this.end) { + lines.push(`**Time Range**: \`${this.start || 'Any'}\` to \`${this.end || 'Now'}\` `) + } + + // Log Subscription Filters + if (subscriptionFilters && subscriptionFilters.length > 0) { + lines.push('\n## Log Subscription Filters') + lines.push(`**Total Filters**: ${subscriptionFilters.length}`) + lines.push('') + + for (const filter of subscriptionFilters) { + lines.push(`### ${filter.filterName || 'Unnamed Filter'}`) + lines.push(`**Destination ARN**: \`${filter.destinationArn || 'Not specified'}\` `) + lines.push(`**Filter Pattern**: \`${filter.filterPattern || 'No pattern (all logs)'}\` `) + + // Check if it might be a Datadog forwarder based on the destination ARN + if (filter.destinationArn && filter.destinationArn.includes('datadog')) { + lines.push('**Note**: This appears to be a Datadog forwarder') + } + + if (filter.roleArn) { + lines.push(`**Role ARN**: \`${filter.roleArn}\` `) + } + + lines.push('') + } + } + + writeFile(filePath, lines.join('\n')) + } + + /** + * Detects the deployment framework used in the current directory + * @returns Framework name or 'Unknown' + */ + private getFramework(): string { + const files = fs.readdirSync(process.cwd()) + + // Check for Serverless Framework + if (files.includes('serverless.yml') || files.includes('serverless.yaml') || files.includes('serverless.json')) { + return 'Serverless Framework' + } + + // Check for AWS SAM + if (files.includes('template.yaml') || files.includes('template.yml') || files.includes('samconfig.toml')) { + return 'AWS SAM' + } + + // Check for AWS CDK + if (files.includes('cdk.json')) { + return 'AWS CDK' + } + + // Check for Terraform + if (files.some((f) => f.endsWith('.tf'))) { + return 'Terraform' + } + + return 'Unknown' + } + + /** + * Creates the output directory structure for flare files + * @returns Path to the created output directory + */ + private async createOutputDirectory(): Promise { + const timestamp = Date.now() + const stateMachineName = this.parseStateMachineArn(this.stateMachineArn!).name + const outputDirName = `stepfunctions-${stateMachineName}-${timestamp}` + const rootDir = FLARE_OUTPUT_DIRECTORY + const outputDir = `${rootDir}/${outputDirName}` + + // Create root directory if it doesn't exist + if (!fs.existsSync(rootDir)) { + fs.mkdirSync(rootDir) + } + + // Clean up old stepfunctions flare directories and zip files + const files = fs.readdirSync(rootDir) + for (const file of files) { + if (file.startsWith('stepfunctions-')) { + const filePath = `${rootDir}/${file}` + const stat = fs.statSync(filePath) + if (stat.isDirectory()) { + deleteFolder(filePath) + } else if (file.endsWith('.zip')) { + fs.unlinkSync(filePath) + } + } + } + + // Create the new directory + createDirectories(outputDir, []) + + return outputDir + } + + /** + * Writes all collected data to output files + * @param outputDir Directory to write files to + * @param data Collected data to write + */ + private async writeOutputFiles( + outputDir: string, + data: { + config: DescribeStateMachineCommandOutput + tags: Record + executions: ExecutionListItem[] + subscriptionFilters?: SubscriptionFilter[] + logs?: Map + } + ): Promise { + // Write state machine configuration + const configPath = `${outputDir}/state_machine_config.json` + writeFile(configPath, JSON.stringify(data.config, undefined, 2)) + + // Write tags + const tagsPath = `${outputDir}/tags.json` + writeFile(tagsPath, JSON.stringify(data.tags, undefined, 2)) + + // Write recent executions + const executionsPath = `${outputDir}/recent_executions.json` + writeFile(executionsPath, JSON.stringify(data.executions, undefined, 2)) + + // Write subscription filters if present + if (data.subscriptionFilters) { + const filtersPath = `${outputDir}/log_subscription_filters.json` + writeFile(filtersPath, JSON.stringify(data.subscriptionFilters, undefined, 2)) + } + + // Write logs if present + if (data.logs && data.logs.size > 0) { + const logsDir = `${outputDir}/logs` + createDirectories(logsDir, []) + + for (const [streamName, events] of data.logs) { + const safeStreamName = streamName.replace(/[^a-zA-Z0-9-_]/g, '_') + const logPath = `${logsDir}/${safeStreamName}.json` + writeFile(logPath, JSON.stringify(events, undefined, 2)) + } + } + } + + /** + * Parses a state machine ARN to extract region and name + * @param arn State machine ARN + * @returns Object with region and name + * @throws Error if ARN format is invalid + */ + private parseStateMachineArn(arn: string): {region: string; name: string} { + // ARN format: arn:aws:states:region:account:stateMachine:name + const parts = arn.split(':') + if (parts.length !== 7 || parts[0] !== 'arn' || parts[1] !== 'aws' || parts[2] !== 'states') { + throw new Error('Invalid state machine ARN format') + } + + return { + region: parts[3], + name: parts[6], + } + } + + /** + * Extracts CloudWatch log group name from state machine configuration + * @param config State machine configuration + * @returns Log group name or undefined if not configured + */ + private getLogGroupName(config: DescribeStateMachineCommandOutput): string | undefined { + if (!config.loggingConfiguration || !config.loggingConfiguration.destinations) { + return undefined + } + + for (const destination of config.loggingConfiguration.destinations) { + if (destination.cloudWatchLogsLogGroup && destination.cloudWatchLogsLogGroup.logGroupArn) { + // Extract log group name from ARN + // ARN format: arn:aws:logs:region:account:log-group:name:* + const arnParts = destination.cloudWatchLogsLogGroup.logGroupArn.split(':') + if (arnParts.length >= 7) { + // The log group name is at index 6, and index 7 might be '*' + return arnParts[6] + } + } + } + + return undefined + } + + /** + * Masks sensitive data in Amazon States Language definition + * @param definition ASL definition as JSON string + * @returns Masked ASL definition + */ + private maskAslDefinition(definition: string): string { + try { + const asl = JSON.parse(definition) + const maskedAsl = this.maskAslObject(asl) + + return JSON.stringify(maskedAsl, undefined, 2) + } catch { + // If not valid JSON, return as-is + return definition + } + } + + private maskAslObject(obj: any): any { + if (typeof obj !== 'object' || obj === undefined) { + return obj + } + + if (Array.isArray(obj)) { + return obj.map((item) => this.maskAslObject(item)) + } + + const masked: any = {} + const sensitiveKeys = ['ApiKey', 'SecretToken', 'Password', 'DatabasePassword', 'Token', 'Secret'] + + for (const [key, value] of Object.entries(obj)) { + // Check if key contains sensitive data + if (sensitiveKeys.some((sensitive) => key.includes(sensitive))) { + masked[key] = '[REDACTED]' + } else if (key === 'States' && typeof value === 'object') { + // Recursively mask states + masked[key] = this.maskAslObject(value) + } else if (key === 'Parameters' && typeof value === 'object') { + // Mask parameters object + masked[key] = this.maskAslObject(value) + } else { + masked[key] = this.maskAslObject(value) + } + } + + return masked + } + + /** + * Fetches detailed information about a specific execution + * @param sfnClient Step Functions client + * @param executionArn ARN of the execution + * @returns Execution details + */ + private async getExecutionDetails(sfnClient: SFNClient, executionArn: string): Promise { + const command = new DescribeExecutionCommand({ + executionArn, + }) + + return sfnClient.send(command) + } +}