Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions ops-scripts/check_cloudwatch_logs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/bin/bash
#
#################################################################################
# Script Name: check_cloudwatch_logs.sh
# Description: This script checks if logs are being received in specified
# AWS CloudWatch Logs streams across multiple log groups. It retrieves
# the latest log event for each stream and checks if the last log
# was received within the specified window period in minutes.
# Author: Hitesh Bhati
# Email: hitesh.bhati@napses.com
# Version: 1.5
# Date: 2025-01-16
################################################################################

# Check if LOG_WINDOW_PERIOD argument is passed
if [ -z "$1" ]; then
echo "Error: LOG_WINDOW_PERIOD (in minutes) argument is required."
exit 1
fi

# Convert the LOG_WINDOW_PERIOD from minutes to seconds
LOG_WINDOW_PERIOD_MINUTES=$1
LOG_WINDOW_PERIOD_SECONDS=$((LOG_WINDOW_PERIOD_MINUTES * 60))

# Define AWS region (optional, can also be passed via environment variable)
AWS_REGION=${AWS_REGION:-"ap-south-1"} # Default to ap-south-1 if not set

# Check if AWS credentials are provided through environment variables
if [ -z "$AWS_ACCESS_KEY_ID" ] || [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
echo "Error: AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables must be set."
exit 1
fi

# Define an array of log groups and their corresponding log streams
log_groups_and_streams=(
'{"nidana": ["nidana"]}'
'{"deon": ["deon-qa"]}'
'{"magically": ["magically-qa"]}'
'{"circle": ["circle-qa", "circle-prod"]}'
# Add more log groups and streams as needed
)

# Fetch the caller identity to get user details associated with the provided credentials
CALLER_ID=$(aws sts get-caller-identity --region "$AWS_REGION" --output text --query 'Arn' 2>&1)

# Check if the `aws sts get-caller-identity` command was successful
if [ $? -eq 0 ]; then
# If successful, we print the identity (IAM user or role) associated with the credentials
echo "Using AWS identity: $CALLER_ID"
else
# If it fails (no role or user), use the provided access key and secret key directly
echo "No role found, using provided access key and secret key."
fi

# Get the current time in seconds since Unix epoch (for comparison later)
CURRENT_TIME=$(date +%s)

# Red color code
RED='\033[0;31m'
# Reset color code
NC='\033[0m' # No Color

# Loop through each log group and its associated log streams
for entry in "${log_groups_and_streams[@]}"; do
# Extract the log group name and the log streams using `jq`
LOG_GROUP=$(echo $entry | jq -r 'keys[0]')
LOG_STREAMS=$(echo $entry | jq -r '.[keys[0]][]') # Use [] to extract elements from the array

echo "Checking logs in log group: $LOG_GROUP"

# Loop through each log stream for the current log group
for LOG_STREAM in $LOG_STREAMS; do
echo " Checking log stream: $LOG_STREAM"

# Get the latest log event from the stream
LATEST_LOG_EVENT=$(aws logs get-log-events \
--log-group-name "$LOG_GROUP" \
--log-stream-name "$LOG_STREAM" \
--limit 1 \
--region "$AWS_REGION" \
--query 'events[0].timestamp' \
--output text 2>&1)

# Check if the AWS command was successful
if [ $? -ne 0 ]; then
echo " Error: Failed to retrieve log events. Details: $LATEST_LOG_EVENT"
continue
fi

# Check if the log stream has any events
if [ "$LATEST_LOG_EVENT" == "None" ]; then
echo " No logs found in the stream '$LOG_STREAM' under the log group '$LOG_GROUP'."
continue
fi

# Convert the latest log event timestamp into seconds
LATEST_LOG_TIME=$(($LATEST_LOG_EVENT / 1000))

# Calculate the time difference between the current time and the last log received
TIME_DIFF=$((CURRENT_TIME - LATEST_LOG_TIME))

# Print the time when the last log was received
echo " Last log received at $(date -d @$LATEST_LOG_TIME)"

# Check if the last log was received within the specified window period
if [ "$TIME_DIFF" -le "$LOG_WINDOW_PERIOD_SECONDS" ]; then
echo " Logs are being received within the last $LOG_WINDOW_PERIOD_MINUTES minutes."
else
# Print the "No logs received" message in red
echo -e " ${RED}No logs received in the last $LOG_WINDOW_PERIOD_MINUTES minutes.${NC}"
fi
done
done
221 changes: 221 additions & 0 deletions ops-scripts/simulate_volume_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#!/bin/bash

##################################################################################
# Script Name: simulate_volume_load.sh
# Description: This script connects to EC2 instances in a specified AWS region,
# simulates a disk load by creating a temporary file to fill up
# the disk to 93% capacity, and then monitors the disk usage before
# and after the operation. The simulation can be executed on one or
# all instances in parallel.
#
# Arguments:
# - --aws-profile: AWS CLI profile to use for accessing AWS resources (required).
# - --region: AWS region where the EC2 instances are located (required).
# - --parallel: Optionally, run the disk load simulation on all instances in parallel.
#
# Author: [Your Name/Your Company]
# Version: 1.0
# Date Created: [Date]
# Last Modified: [Date]
# Contact: [Your Contact Information]
#
# Usage Example:
# ./simulate_volume_load.sh --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1
#
# License: [Your License Information, e.g., MIT License]
##################################################################################

# Help function
help() {
echo "Usage: $0 --aws-profile <AWS_PROFILE> --region <AWS_REGION> [--parallel]"
echo "This script simulates a disk load on EC2 instances by creating a temporary file to simulate load."
echo "You can run the simulation on one instance or all instances in parallel."
echo
echo "Mandatory Arguments:"
echo " --aws-profile AWS CLI profile to use."
echo " --region AWS region where the EC2 instances are located."
echo
echo "Optional Arguments:"
echo " --parallel Run the disk load simulation on all instances in parallel."
echo
echo "Example:"
echo " $0 --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1"
echo " $0 --aws-profile 474888828713_AdministratorAccess --region ap-southeast-1 --parallel"
echo
exit 0
}

# Function to log messages with timestamp (to both console and log file)
log() {
local message="$1"
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message"
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $message" >> "$LOG_FILE"
}

# Function to log errors in red color (to both console and log file)
log_error() {
local message="$1"
echo -e "\e[31m[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $message\e[0m"
echo "[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $message" >> "$LOG_FILE"
}

# Function to fetch all EC2 instance IDs in the specified region
fetch_instance_ids() {
local aws_profile="$1"
local aws_region="$2"
instance_ids=$(aws ec2 describe-instances \
--profile "$aws_profile" \
--region "$aws_region" \
--filters "Name=instance-state-name,Values=running" \
--query "Reservations[*].Instances[*].InstanceId" \
--output text)

echo "$instance_ids"
}

# Validate and parse command-line arguments
if [[ $# -eq 0 ]]; then
help
fi

while [[ $# -gt 0 ]]; do
case "$1" in
--aws-profile)
AWS_PROFILE="$2"
shift 2
;;
--region)
AWS_REGION="$2"
shift 2
;;
--parallel)
PARALLEL=true
shift
;;
--help)
help
;;
*)
log_error "Unknown argument: $1"
help
exit 1
;;
esac
done

# Validate mandatory arguments
if [[ -z "$AWS_PROFILE" || -z "$AWS_REGION" ]]; then
log_error "Both --aws-profile and --region are mandatory arguments."
help
exit 1
fi

# Default variables
IDENTITY_PATH="/home/hitesh/napses/deon/scripts/config/deon.pem" # SSH key path
REMOTE_USER="ubuntu" # Remote SSH user for EC2 (Ubuntu default)
START_TIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ") # Start time for CloudWatch metrics (optional for alarm checking)
LOG_FILE="volume_simulation.log" # Log file name

# Initialize the log file
echo "Script execution started at $(date)" > "$LOG_FILE"

# Fetch the EC2 instances
log "Fetching EC2 instances in region: $AWS_REGION"
INSTANCE_IDS=$(fetch_instance_ids "$AWS_PROFILE" "$AWS_REGION")

if [[ -z "$INSTANCE_IDS" ]]; then
log_error "No running EC2 instances found in region $AWS_REGION."
exit 1
fi

log "Found EC2 instances: $INSTANCE_IDS"

# Function to simulate the disk load on an individual instance
simulate_disk_load() {
local instance_id="$1"

log "Connecting to instance $instance_id and starting disk load simulation..."

ssh -i "$IDENTITY_PATH" "$REMOTE_USER@$instance_id" \
-o ProxyCommand="aws ec2-instance-connect open-tunnel --instance-id $instance_id --profile $AWS_PROFILE --region $AWS_REGION" << 'EOF'

# Decorative Header
echo "=============================================================="
echo "Starting disk load simulation on EC2 instance..."
echo "=============================================================="

# Step 1: Show current disk usage in human-readable format
echo "Step 1: Checking current disk usage..."
df -h /
echo "=============================================================="

# Step 2: Execute df command and store total and available space into variables
df_output=$(df /) # Get the disk usage for the root volume
total_space=$(echo "$df_output" | awk 'NR==2 {print $2}') # Total space in 1K blocks
available_space=$(echo "$df_output" | awk 'NR==2 {print $4}') # Available space in 1K blocks

# Convert from 1K blocks to bytes for easier calculations
total_space_bytes=$((total_space * 1024)) # Total space in bytes
available_space_bytes=$((available_space * 1024)) # Available space in bytes

# Step 3: Calculate the desired output file size to bring volume usage to 93%
desired_file_size_bytes=$((available_space_bytes * 93 / 100))

# Make sure we don’t exceed 90% of total space
max_file_size_bytes=$((total_space_bytes * 90 / 100))

# Use the minimum of the calculated size and the 90% of total space
if [ $desired_file_size_bytes -gt $max_file_size_bytes ]; then
desired_file_size_bytes=$max_file_size_bytes
fi

# Convert the file size back to MB for the dd command (1MB = 1024*1024 bytes)
desired_file_size_mb=$((desired_file_size_bytes / 1024 / 1024))

# Step 4: Show the calculation result
echo "Step 4: Calculating the size of the temporary file..."
echo "Desired file size to bring disk usage to 93%: ${desired_file_size_mb}MB"
echo "=============================================================="

# Step 5: Execute dd command to create the file of the desired size
echo "Step 5: Creating a temporary file with the calculated size..."
dd if=/dev/zero of=/tmp/tempfile bs=1M count=$desired_file_size_mb status=progress
echo "Temporary file created. Size: ${desired_file_size_mb}MB"
echo "=============================================================="

# Step 6: Show disk usage again after the temporary file is created
echo "Step 6: Checking disk usage after creating the temporary file..."
df -h /
echo "=============================================================="

# Step 7: Remove the temporary file to free up space
echo "Step 7: Removing the temporary file to free up space..."
rm /tmp/tempfile
echo "Temporary file removed."
echo "=============================================================="

# Step 8: Show disk usage again after the temporary file is removed
echo "Step 8: Checking disk usage after removing the temporary file..."
df -h /
echo "=============================================================="

echo "Disk load simulation complete."

EOF
}

# Run the disk load simulation
if [[ "$PARALLEL" == true ]]; then
log "Running the disk load simulation on all instances in parallel..."
for INSTANCE_ID in $INSTANCE_IDS; do
simulate_disk_load "$INSTANCE_ID" &
done
wait # Wait for all background jobs to finish
else
log "Running the disk load simulation on a single instance ($INSTANCE_ID)..."
# Default to the first instance if running sequentially
FIRST_INSTANCE_ID=$(echo "$INSTANCE_IDS" | head -n 1)
simulate_disk_load "$FIRST_INSTANCE_ID"
fi

log "Script execution completed at $(date)."
Loading