From bc4807f7a5aebb045daae35ef307d1a413681994 Mon Sep 17 00:00:00 2001
From: hitesh <hitesh.bhati@gmail.com>
Date: Tue, 28 Jan 2025 16:08:19 +0530
Subject: [PATCH] scripts to simulate ec2 resources and test cloudwatch alarms

---
 ops-scripts/check_cloudwatch_logs.sh          | 113 ++++++++
 ops-scripts/simulate_volume_load.sh           | 221 +++++++++++++++
 ops-scripts/stress_ec2_with_alert_check.sh    | 252 ++++++++++++++++++
 .../trigger_and_verify_cloudwatch_alarms.sh   | 166 ++++++++++++
 4 files changed, 752 insertions(+)
 create mode 100755 ops-scripts/check_cloudwatch_logs.sh
 create mode 100755 ops-scripts/simulate_volume_load.sh
 create mode 100755 ops-scripts/stress_ec2_with_alert_check.sh
 create mode 100755 ops-scripts/trigger_and_verify_cloudwatch_alarms.sh

diff --git a/ops-scripts/check_cloudwatch_logs.sh b/ops-scripts/check_cloudwatch_logs.sh
new file mode 100755
index 0000000..601dfe4
--- /dev/null
+++ b/ops-scripts/check_cloudwatch_logs.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+#
+#################################################################################
+# Script Name: check_cloudwatch_logs.sh
+# Description: This script checks if logs are being received in specified
+#              AWS CloudWatch Logs streams across multiple log groups. It retrieves 
+#              the latest log event for each stream and checks if the last log 
+#              was received within the specified window period in minutes.
+# Author: Hitesh Bhati
+# Email: hitesh.bhati@napses.com
+# Version: 1.5
+# Date: 2025-01-16
+################################################################################
+
+# Check if LOG_WINDOW_PERIOD argument is passed
+if [ -z "$1" ]; then
+    echo "Error: LOG_WINDOW_PERIOD (in minutes) argument is required."
+    exit 1
+fi
+
+# Convert the LOG_WINDOW_PERIOD from minutes to seconds
+LOG_WINDOW_PERIOD_MINUTES=$1
+LOG_WINDOW_PERIOD_SECONDS=$((LOG_WINDOW_PERIOD_MINUTES * 60))
+
+# Define AWS region (optional, can also be passed via environment variable)
+AWS_REGION=${AWS_REGION:-"ap-south-1"}  # Default to ap-south-1 if not set
+
+# Check if AWS credentials are provided through environment variables
+if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
+    echo "Error: AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables must be set."
+    exit 1
+fi
+
+# Define an array of log groups and their corresponding log streams
+log_groups_and_streams=(
+    '{"nidana": ["nidana"]}' 
+    '{"deon": ["deon-qa"]}'
+    '{"magically": ["magically-qa"]}'
+    '{"circle": ["circle-qa", "circle-prod"]}'
+    # Add more log groups and streams as needed
+)
+
+# Fetch the caller identity to get user details associated with the provided credentials
+CALLER_ID=$(aws sts get-caller-identity --region "$AWS_REGION" --output text --query 'Arn' 2>&1)
+
+# Check if the `aws sts get-caller-identity` command was successful
+if [ $? -eq 0 ]; then
+    # If successful, we print the identity (IAM user or role) associated with the credentials
+    echo "Using AWS identity: $CALLER_ID"
+else
+    # If it fails (no role or user), use the provided access key and secret key directly
+    echo "No role found, using provided access key and secret key."
+fi
+
+# Get the current time in seconds since Unix epoch (for comparison later)
+CURRENT_TIME=$(date +%s)
+
+# Red color code
+RED='\033[0;31m'
+# Reset color code
+NC='\033[0m'  # No Color
+
+# Loop through each log group and its associated log streams
+for entry in "${log_groups_and_streams[@]}"; do
+    # Extract the log group name and the log streams using `jq`
+    LOG_GROUP=$(echo $entry | jq -r 'keys[0]')
+    LOG_STREAMS=$(echo $entry | jq -r '.[keys[0]][]')  # Use [] to extract elements from the array
+
+    echo "Checking logs in log group: $LOG_GROUP"
+    
+    # Loop through each log stream for the current log group
+    for LOG_STREAM in $LOG_STREAMS; do
+        echo "  Checking log stream: $LOG_STREAM"
+        
+        # Get the latest log event from the stream
+        LATEST_LOG_EVENT=$(aws logs get-log-events \
+            --log-group-name "$LOG_GROUP" \
+            --log-stream-name "$LOG_STREAM" \
+            --limit 1 \
+            --region "$AWS_REGION" \
+            --query 'events[0].timestamp' \
+            --output text 2>&1)
+
+        # Check if the AWS command was successful
+        if [ $? -ne 0 ]; then
+            echo "    Error: Failed to retrieve log events. Details: $LATEST_LOG_EVENT"
+            continue
+        fi
+
+        # Check if the log stream has any events
+        if [ "$LATEST_LOG_EVENT" == "None" ]; then
+            echo "    No logs found in the stream '$LOG_STREAM' under the log group '$LOG_GROUP'."
+            continue
+        fi
+
+        # Convert the latest log event timestamp into seconds
+        LATEST_LOG_TIME=$(($LATEST_LOG_EVENT / 1000))
+
+        # Calculate the time difference between the current time and the last log received
+        TIME_DIFF=$((CURRENT_TIME - LATEST_LOG_TIME))
+
+        # Print the time when the last log was received
+        echo "    Last log received at $(date -d @$LATEST_LOG_TIME)"
+
+        # Check if the last log was received within the specified window period
+        if [ "$TIME_DIFF" -le "$LOG_WINDOW_PERIOD_SECONDS" ]; then
+            echo "    Logs are being received within the last $LOG_WINDOW_PERIOD_MINUTES minutes."
+        else
+            # Print the "No logs received" message in red
+            echo -e "    ${RED}No logs received in the last $LOG_WINDOW_PERIOD_MINUTES minutes.${NC}"
+        fi
+    done
+done
diff --git a/ops-scripts/simulate_volume_load.sh b/ops-scripts/simulate_volume_load.sh
new file mode 100755
index 0000000..57d21d8
--- /dev/null
+++ b/ops-scripts/simulate_volume_load.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+
+##################################################################################
+# Script Name: simulate_volume_load.sh
+# Description: This script connects to EC2 instances in a specified AWS region, 
+#              simulates a disk load by creating a temporary file to fill up 
+#              the disk to 93% capacity, and then monitors the disk usage before 
+#              and after the operation. The simulation can be executed on one or 
+#              all instances in parallel.
+#
+# Arguments:
+# - --aws-profile: AWS CLI profile to use for accessing AWS resources (required).
+# - --region: AWS region where the EC2 instances are located (required).
+# - --parallel: Optionally, run the disk load simulation on all instances in parallel.
+#
+# Author: [Your Name/Your Company]
+# Version: 1.0
+# Date Created: [Date]
+# Last Modified: [Date]
+# Contact: [Your Contact Information]
+#
+# Usage Example:
+#   ./simulate_volume_load.sh --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1
+#
+# License: [Your License Information, e.g., MIT License]
+##################################################################################
+
+# Help function
+help() {
+    echo "Usage: $0 --aws-profile <AWS_PROFILE> --region <AWS_REGION> [--parallel]"
+    echo "This script simulates a disk load on EC2 instances by creating a temporary file to simulate load."
+    echo "You can run the simulation on one instance or all instances in parallel."
+    echo
+    echo "Mandatory Arguments:"
+    echo "  --aws-profile   AWS CLI profile to use."
+    echo "  --region        AWS region where the EC2 instances are located."
+    echo
+    echo "Optional Arguments:"
+    echo "  --parallel      Run the disk load simulation on all instances in parallel."
+    echo
+    echo "Example:"
+    echo "  $0 --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1"
+    echo "  $0 --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1 --parallel"
+    echo
+    exit 0
+}
+
+# Function to log messages with timestamp (to both console and log file)
+log() {
+    local message="$1"
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message"
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message" >> "$LOG_FILE"
+}
+
+# Function to log errors in red color (to both console and log file)
+log_error() {
+    local message="$1"
+    echo -e "\e[31m[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $message\e[0m"
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $message" >> "$LOG_FILE"
+}
+
+# Function to fetch all EC2 instance IDs in the specified region
+fetch_instance_ids() {
+    local aws_profile="$1"
+    local aws_region="$2"
+    instance_ids=$(aws ec2 describe-instances \
+        --profile "$aws_profile" \
+        --region "$aws_region" \
+        --filters "Name=instance-state-name,Values=running" \
+        --query "Reservations[*].Instances[*].InstanceId" \
+        --output text)
+
+    echo "$instance_ids"
+}
+
+# Validate and parse command-line arguments
+if [[ $# -eq 0 ]]; then
+    help
+fi
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --aws-profile)
+            AWS_PROFILE="$2"
+            shift 2
+            ;;
+        --region)
+            AWS_REGION="$2"
+            shift 2
+            ;;
+        --parallel)
+            PARALLEL=true
+            shift
+            ;;
+        --help)
+            help
+            ;;
+        *)
+            log_error "Unknown argument: $1"
+            help
+            exit 1
+            ;;
+    esac
+done
+
+# Validate mandatory arguments
+if [[ -z "$AWS_PROFILE" || -z "$AWS_REGION" ]]; then
+    log_error "Both --aws-profile and --region are mandatory arguments."
+    help
+    exit 1
+fi
+
+# Default variables
+IDENTITY_PATH="/home/hitesh/napses/deon/scripts/config/deon.pem"  # SSH key path
+REMOTE_USER="ubuntu"  # Remote SSH user for EC2 (Ubuntu default)
+START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")  # Start time for CloudWatch metrics (optional for alarm checking)
+LOG_FILE="volume_simulation.log"  # Log file name
+
+# Initialize the log file
+echo "Script execution started at $(date)" > "$LOG_FILE"
+
+# Fetch the EC2 instances
+log "Fetching EC2 instances in region: $AWS_REGION"
+INSTANCE_IDS=$(fetch_instance_ids "$AWS_PROFILE" "$AWS_REGION")
+
+if [[ -z "$INSTANCE_IDS" ]]; then
+    log_error "No running EC2 instances found in region $AWS_REGION."
+    exit 1
+fi
+
+log "Found EC2 instances: $INSTANCE_IDS"
+
+# Function to simulate the disk load on an individual instance
+simulate_disk_load() {
+    local instance_id="$1"
+
+    log "Connecting to instance $instance_id and starting disk load simulation..."
+
+    ssh -i "$IDENTITY_PATH" "$REMOTE_USER@$instance_id" \
+        -o ProxyCommand="aws ec2-instance-connect open-tunnel --instance-id $instance_id --profile $AWS_PROFILE --region $AWS_REGION" << 'EOF'
+
+    # Decorative Header
+    echo "=============================================================="
+    echo "Starting disk load simulation on EC2 instance..."
+    echo "=============================================================="
+
+    # Step 1: Show current disk usage in human-readable format
+    echo "Step 1: Checking current disk usage..."
+    df -h /
+    echo "=============================================================="
+
+    # Step 2: Execute df command and store total and available space into variables
+    df_output=$(df /)  # Get the disk usage for the root volume
+    total_space=$(echo "$df_output" | awk 'NR==2 {print $2}')  # Total space in 1K blocks
+    available_space=$(echo "$df_output" | awk 'NR==2 {print $4}')  # Available space in 1K blocks
+
+    # Convert from 1K blocks to bytes for easier calculations
+    total_space_bytes=$((total_space * 1024))  # Total space in bytes
+    available_space_bytes=$((available_space * 1024))  # Available space in bytes
+
+    # Step 3: Calculate the desired output file size to bring volume usage to 93%
+    desired_file_size_bytes=$((available_space_bytes * 93 / 100))
+
+    # Make sure we don’t exceed 90% of total space
+    max_file_size_bytes=$((total_space_bytes * 90 / 100))
+
+    # Use the minimum of the calculated size and the 90% of total space
+    if [ $desired_file_size_bytes -gt $max_file_size_bytes ]; then
+        desired_file_size_bytes=$max_file_size_bytes
+    fi
+
+    # Convert the file size back to MB for the dd command (1MB = 1024*1024 bytes)
+    desired_file_size_mb=$((desired_file_size_bytes / 1024 / 1024))
+
+    # Step 4: Show the calculation result
+    echo "Step 4: Calculating the size of the temporary file..."
+    echo "Desired file size to bring disk usage to 93%: ${desired_file_size_mb}MB"
+    echo "=============================================================="
+
+    # Step 5: Execute dd command to create the file of the desired size
+    echo "Step 5: Creating a temporary file with the calculated size..."
+    dd if=/dev/zero of=/tmp/tempfile bs=1M count=$desired_file_size_mb status=progress
+    echo "Temporary file created. Size: ${desired_file_size_mb}MB"
+    echo "=============================================================="
+
+    # Step 6: Show disk usage again after the temporary file is created
+    echo "Step 6: Checking disk usage after creating the temporary file..."
+    df -h /
+    echo "=============================================================="
+
+    # Step 7: Remove the temporary file to free up space
+    echo "Step 7: Removing the temporary file to free up space..."
+    rm /tmp/tempfile
+    echo "Temporary file removed."
+    echo "=============================================================="
+
+    # Step 8: Show disk usage again after the temporary file is removed
+    echo "Step 8: Checking disk usage after removing the temporary file..."
+    df -h /
+    echo "=============================================================="
+
+    echo "Disk load simulation complete."
+
+EOF
+}
+
+# Run the disk load simulation
+if [[ "$PARALLEL" == true ]]; then
+    log "Running the disk load simulation on all instances in parallel..."
+    for INSTANCE_ID in $INSTANCE_IDS; do
+        simulate_disk_load "$INSTANCE_ID" &
+    done
+    wait  # Wait for all background jobs to finish
+else
+    log "Running the disk load simulation on a single instance ($INSTANCE_ID)..."
+    # Default to the first instance if running sequentially
+    FIRST_INSTANCE_ID=$(echo "$INSTANCE_IDS" | head -n 1)
+    simulate_disk_load "$FIRST_INSTANCE_ID"
+fi
+
+log "Script execution completed at $(date)."
diff --git a/ops-scripts/stress_ec2_with_alert_check.sh b/ops-scripts/stress_ec2_with_alert_check.sh
new file mode 100755
index 0000000..4173d3c
--- /dev/null
+++ b/ops-scripts/stress_ec2_with_alert_check.sh
@@ -0,0 +1,252 @@
+#!/bin/bash
+
+##################################################################################
+# Script Name: stress_ec2_with_alert_check.sh
+# Description: This script performs a stress test on EC2 instances by simulating 
+#              high CPU, then checks for CloudWatch CPU utilization 
+#              alarms triggered by the stress test. The script can stress multiple 
+#              instances in parallel and is capable of checking CloudWatch alarms 
+#              associated with those instances after a specified wait time.
+# 
+# The script performs the following:
+# 1. Fetches a list of EC2 instances that are in the 'running' state.
+# 2. Executes stress tests to simulate high CPU utilization (above 80%) on the instances.
+# 3. Waits for 5 minutes to allow alarms to trigger.
+# 4. Checks for CloudWatch alarms triggered by CPU utilization surpassing 80%.
+#
+# Author: Hitesh Bhati
+# Version: 2.7
+# Date Created: 2025-01-17 
+# Last Modified: 2025-01-28
+# Contact: hitesh.bhati@napses.com
+#
+##################################################################################
+
+# Help function
+help() {
+    echo "Usage: $0 [OPTIONS]"
+    echo "Perform a stress test on EC2 instances and check for CloudWatch CPU utilization alarms."
+    echo
+    echo "Mandatory Arguments:"
+    echo "  AWS_PROFILE          AWS CLI profile to use (default: 'default')."
+    echo "  AWS_REGION           AWS region where instances are located (default: 'ap-southeast-1')."
+    echo
+    echo "Optional Arguments:"
+    echo "  -a, --alarm-name     CloudWatch alarm name to check (default: 'UI-EC2-CPU-Utilization-High')."
+    echo "  -p, --parallel       Stress all instances in parallel (default: 'no')."
+    echo "  -h, --help           Show this help message and exit."
+    echo
+    echo "Examples:"
+    echo "  $0 my-aws-profile ap-southeast-1"
+    echo "  $0 my-aws-profile ap-southeast-1 -a MyAlarmName -p yes"
+    echo
+    exit 0
+}
+
+# Default values
+AWS_PROFILE=""
+AWS_REGION=""
+ALARM_NAME="UI-EC2-CPU-Utilization-High"
+STRESS_ALL_PARALLEL="no"
+IDENTITY_PATH="/home/hitesh/napses/deon/scripts/config/deon.pem"  # SSH key path
+REMOTE_USER="ubuntu"  # Remote SSH user for EC2 (Ubuntu default)
+
+# Parse command-line arguments
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -h|--help)
+            help
+            ;;
+        -a|--alarm-name)
+            ALARM_NAME="$2"
+            shift 2
+            ;;
+        -p|--parallel)
+            STRESS_ALL_PARALLEL="$2"
+            shift 2
+            ;;
+        *)
+            if [[ -z "$AWS_PROFILE" ]]; then
+                AWS_PROFILE="$1"
+            elif [[ -z "$AWS_REGION" ]]; then
+                AWS_REGION="$1"
+            else
+                echo "Error: Unknown argument '$1'."
+                help
+                exit 1
+            fi
+            shift
+            ;;
+    esac
+done
+
+# Validate mandatory arguments
+if [[ -z "$AWS_PROFILE" || -z "$AWS_REGION" ]]; then
+    echo "Error: AWS_PROFILE and AWS_REGION are mandatory arguments."
+    help
+    exit 1
+fi
+
+# Variables
+INSTANCE_IDS=""  # Will store fetched instance IDs
+STRESSED_INSTANCE_IDS=""  # Will store instances that were stressed
+START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")  # Start time for CloudWatch metrics
+REPORT_FILE="stress_test_report_$(date +'%Y%m%d_%H%M%S').log"  # Report file
+
+# Logging function
+log() {
+    local message="$1"
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message" | tee -a "$REPORT_FILE"
+}
+
+# Function to fetch instance IDs
+fetch_instance_ids() {
+    log "Fetching instance IDs in region: $AWS_REGION"
+    INSTANCE_IDS=$(aws ec2 describe-instances \
+        --profile "$AWS_PROFILE" \
+        --region "$AWS_REGION" \
+        --filters "Name=instance-state-name,Values=running" \
+        --query "Reservations[*].Instances[*].InstanceId" \
+        --output text)
+
+    if [ -z "$INSTANCE_IDS" ]; then
+        log "Error: No running instances found."
+        exit 1
+    fi
+    log "Fetched instance IDs: $INSTANCE_IDS"
+}
+
+# Function to validate the instance ID
+validate_instance_id() {
+    local instance_id="$1"
+    log "Validating instance ID: $instance_id"
+    aws ec2 describe-instances \
+        --profile "$AWS_PROFILE" \
+        --region "$AWS_REGION" \
+        --instance-ids "$instance_id" &> /dev/null
+
+    if [ $? -ne 0 ]; then
+        log "Error: Instance ID $instance_id is invalid or does not exist."
+        return 1
+    fi
+    log "Instance ID $instance_id is valid."
+}
+
+# Function to fetch CloudWatch alarms associated with an EC2 instance
+get_alarms_for_instance() {
+    local instance_id="$1"
+    log "Fetching CloudWatch alarms associated with instance: $instance_id"
+    ALARMS=$(aws cloudwatch describe-alarms \
+        --profile "$AWS_PROFILE" \
+        --region "$AWS_REGION" \
+        --query "MetricAlarms[?Dimensions[?Name=='InstanceId' && Value=='$instance_id']].AlarmName" \
+        --output text)
+
+    if [ -z "$ALARMS" ]; then
+        log "No alarms found for instance: $instance_id."
+    else
+        log "Found alarms: $ALARMS"
+    fi
+}
+
+# Function to check if an alarm was triggered
+get_alarm_triggered_history() {
+    local alarm_name="$1"
+    local instance_id="$2"
+    local end_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+
+    log "Checking CloudWatch alarm history for alarm: $alarm_name on instance: $instance_id"
+    latest_timestamp=$(aws cloudwatch describe-alarm-history \
+        --profile "$AWS_PROFILE" \
+        --region "$AWS_REGION" \
+        --alarm-name "$alarm_name" \
+        --start-date "$START_TIME" \
+        --end-date "$end_time" \
+        --history-item-type "StateUpdate" \
+        --query "AlarmHistoryItems[?HistorySummary=='Alarm updated from OK to ALARM'] | [0].Timestamp" \
+        --output text)
+
+    if [[ "$latest_timestamp" != "None" && "$latest_timestamp" > "$START_TIME" ]]; then
+        log "Alarm triggered for instance $instance_id at $latest_timestamp."
+    else
+        log "No matching alarm history found for instance $instance_id."
+    fi
+}
+
+# Function to run the stress test on an EC2 instance
+run_stress_test() {
+    local instance_id="$1"
+    log "Starting stress test on instance: $instance_id"
+
+    ssh -i "$IDENTITY_PATH" "$REMOTE_USER@$instance_id" \
+        -o ProxyCommand="aws ec2-instance-connect open-tunnel --instance-id $instance_id --profile $AWS_PROFILE --region $AWS_REGION" \
+        -t << 'EOF'
+        # Install dependencies if not present
+        if ! command -v stress-ng &> /dev/null; then
+            echo "Installing stress-ng..."
+            sudo apt-get update -y || { echo "Failed to update package list"; exit 1; }
+            sudo apt-get install -y stress-ng || { echo "Failed to install stress-ng"; exit 1; }
+        fi
+
+        # Run stress-ng commands to simulate CPU load above 80% for a sustained period
+        echo "Simulating high CPU load (over 80%)..."
+        
+        # Increase the number of CPU stressors and use more intensive methods
+        sudo stress-ng --cpu 16 --cpu-method fft --timeout 300s --metrics-brief || { echo "Failed to run stress-ng"; exit 1; }
+        
+        # Ensure a sustained CPU load for 5 minutes or more
+        sudo stress-ng --cpu 16 --cpu-method matrixprod --timeout 300s --metrics-brief || { echo "Failed to run stress-ng"; exit 1; }
+
+EOF
+
+    if [ $? -ne 0 ]; then
+        log "Error: Stress test failed on instance $instance_id."
+        return 1
+    fi
+    log "Stress test completed on instance $instance_id."
+    STRESSED_INSTANCE_IDS="$STRESSED_INSTANCE_IDS $instance_id"  # Add to stressed instances list
+}
+
+
+# Main function
+main() {
+    log "Starting script execution."
+    fetch_instance_ids
+
+    if [[ "$STRESS_ALL_PARALLEL" == "yes" ]]; then
+        log "Stressing all instances in parallel."
+        for INSTANCE_ID in $INSTANCE_IDS; do
+            if validate_instance_id "$INSTANCE_ID"; then
+                run_stress_test "$INSTANCE_ID" &
+            fi
+        done
+        # Wait for all background processes to complete
+        wait
+    else
+        log "Stressing only one instance."
+        INSTANCE_ID=$(echo "$INSTANCE_IDS" | head -n 1)  # Take the first instance from the list
+        if validate_instance_id "$INSTANCE_ID"; then
+            run_stress_test "$INSTANCE_ID"
+        fi
+    fi
+
+    log "Stress tests completed. Checking CloudWatch alarms..."
+    
+    # Check alarms for all stressed instances
+    for INSTANCE_ID in $STRESSED_INSTANCE_IDS; do
+        log "Checking alarms for instance: $INSTANCE_ID"
+        get_alarms_for_instance "$INSTANCE_ID"
+
+        if [ -n "$ALARMS" ]; then
+            for ALARM_NAME in $ALARMS; do
+                sleep 5m
+                get_alarm_triggered_history "$ALARM_NAME" "$INSTANCE_ID"
+            done
+        fi
+    done
+
+    log "Script execution completed. Report saved to $REPORT_FILE."
+}
+
+# Execute the main function
+main
diff --git a/ops-scripts/trigger_and_verify_cloudwatch_alarms.sh b/ops-scripts/trigger_and_verify_cloudwatch_alarms.sh
new file mode 100755
index 0000000..3b1f3b2
--- /dev/null
+++ b/ops-scripts/trigger_and_verify_cloudwatch_alarms.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+##################################################################################
+# Script Name: trigger_and_verify_cloudwatch_alarms.sh
+# Description: This script manually triggers CloudWatch alarms associated with 
+#              running EC2 instances in a specific region, and verifies whether 
+#              the alarms were triggered successfully.
+# 
+# The script performs the following steps:
+# 1. Fetches a list of running EC2 instances in the specified region.
+# 2. Retrieves the CloudWatch alarms associated with each EC2 instance.
+# 3. Manually triggers each alarm by setting the state to 'ALARM'.
+# 4. Verifies if the alarm was triggered by checking CloudWatch alarm history.
+# 5. Logs all actions and results to a log file for reference.
+#
+# Author: Hitesh Bhati
+# Version: 1.2
+# Date Created: 2025-01-12
+# Last Modified: 2025-01-28
+# Contact: hitesh.bhati@napses.com
+
+# Help function
+help() {
+    echo "Usage: $0 --aws-profile <AWS_PROFILE> --region <AWS_REGION>"
+    echo "Manually trigger CloudWatch alarms associated with running EC2 instances and verify their triggering."
+    echo
+    echo "Mandatory Arguments:"
+    echo "  --aws-profile   AWS CLI profile to use."
+    echo "  --region        AWS region where instances are located."
+    echo
+    echo "Example:"
+    echo "  $0 --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1"
+    echo
+    exit 0
+}
+
+# Function to log messages with timestamp
+log() {
+    local message="$1"
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message" | tee -a "$LOG_FILE"
+}
+
+# Function to log errors in red color
+log_error() {
+    local message="$1"
+    echo -e "\e[31m[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $message\e[0m" | tee -a "$LOG_FILE"
+}
+
+# Validate and parse command-line arguments
+if [[ $# -eq 0 ]]; then
+    help
+fi
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --aws-profile)
+            AWS_PROFILE="$2"
+            shift 2
+            ;;
+        --region)
+            AWS_REGION="$2"
+            shift 2
+            ;;
+        --help)
+            help
+            ;;
+        *)
+            log_error "Unknown argument: $1"
+            help
+            exit 1
+            ;;
+    esac
+done
+
+# Validate mandatory arguments
+if [[ -z "$AWS_PROFILE" || -z "$AWS_REGION" ]]; then
+    log_error "Both --aws-profile and --region are mandatory arguments."
+    help
+    exit 1
+fi
+
+# Log file
+LOG_FILE="cloudwatch_alarm_trigger_$(date +'%Y%m%d_%H%M%S').log"
+touch "$LOG_FILE"
+
+# Script start time
+START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+log "Script start time: $START_TIME"
+
+# Get all running instances
+log "Fetching running instances in region: $AWS_REGION"
+INSTANCE_IDS=$(aws ec2 describe-instances \
+    --profile "$AWS_PROFILE" \
+    --region "$AWS_REGION" \
+    --filters "Name=instance-state-name,Values=running" \
+    --query "Reservations[*].Instances[*].InstanceId" \
+    --output text)
+
+if [[ -z "$INSTANCE_IDS" ]]; then
+    log_error "No running instances found."
+    exit 1
+fi
+log "Found running instances: $INSTANCE_IDS"
+
+# Iterate through each instance and trigger associated alarms
+for INSTANCE_ID in $INSTANCE_IDS; do
+    log "Processing instance: $INSTANCE_ID"
+
+    # Get associated alarms for the instance
+    ALARM_NAMES=$(aws cloudwatch describe-alarms \
+        --profile "$AWS_PROFILE" \
+        --region "$AWS_REGION" \
+        --query "MetricAlarms[?Dimensions[?Name=='InstanceId' && Value=='$INSTANCE_ID']].AlarmName" \
+        --output text)
+
+    if [[ -z "$ALARM_NAMES" ]]; then
+        log "No CloudWatch alarms found for instance: $INSTANCE_ID"
+        continue
+    fi
+
+    log "Found alarms for instance $INSTANCE_ID: $ALARM_NAMES"
+
+    # Trigger each alarm
+    for ALARM_NAME in $ALARM_NAMES; do
+        log "Triggering alarm: $ALARM_NAME"
+        aws cloudwatch set-alarm-state \
+            --alarm-name "$ALARM_NAME" \
+            --state-value ALARM \
+            --state-reason "Manually setting alarm state for testing" \
+            --profile "$AWS_PROFILE" \
+            --region "$AWS_REGION"
+
+        if [[ $? -ne 0 ]]; then
+            log_error "Failed to trigger alarm: $ALARM_NAME"
+            continue
+        fi
+
+        log "Successfully triggered alarm: $ALARM_NAME"
+
+        # Verify if the alarm was triggered
+        END_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+        log "Verifying alarm triggering for: $ALARM_NAME"
+        LAST_TRIGGERED_TIME=$(aws cloudwatch describe-alarm-history \
+            --profile "$AWS_PROFILE" \
+            --region "$AWS_REGION" \
+            --alarm-name "$ALARM_NAME" \
+            --start-date "$START_TIME" \
+            --end-date "$END_TIME" \
+            --history-item-type "StateUpdate" \
+            --query "AlarmHistoryItems[?HistorySummary=='Alarm updated from OK to ALARM'] | [0].Timestamp" \
+            --output text)
+
+        if [[ -z "$LAST_TRIGGERED_TIME" ]]; then
+            log_error "Alarm $ALARM_NAME was not triggered."
+        else
+            log "Alarm $ALARM_NAME was last triggered at: $LAST_TRIGGERED_TIME"
+            if [[ "$LAST_TRIGGERED_TIME" > "$START_TIME" ]]; then
+                log "Verification successful: Alarm $ALARM_NAME was triggered after the script started."
+            else
+                log_error "Verification failed: Alarm $ALARM_NAME was not triggered after the script started."
+            fi
+        fi
+    done
+done
+
+log "Script execution completed. Logs saved to: $LOG_FILE"
\ No newline at end of file