π Monitoring & Alerting #52
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: π Monitoring & Alerting | |
| on: | |
| # Run after deployments | |
| workflow_run: | |
| workflows: ["π CI/CD Pipeline"] | |
| types: | |
| - completed | |
| # Scheduled health monitoring | |
| schedule: | |
| - cron: '*/15 * * * *' # Every 15 minutes | |
| # Manual monitoring trigger | |
| workflow_dispatch: | |
| inputs: | |
| monitoring_type: | |
| description: 'Type of monitoring to perform' | |
| required: false | |
| default: 'full' | |
| type: choice | |
| options: | |
| - 'full' | |
| - 'health-only' | |
| - 'performance-only' | |
| jobs: | |
| monitor-deployment: | |
| runs-on: ubuntu-latest | |
| if: github.event.workflow_run.conclusion != 'skipped' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' | |
| steps: | |
| - name: π₯ Checkout code | |
| uses: actions/checkout@v4 | |
| - name: π Check Deployment Status | |
| id: deployment-status | |
| run: | | |
| if [ "${{ github.event_name }}" = "workflow_run" ]; then | |
| WORKFLOW_CONCLUSION="${{ github.event.workflow_run.conclusion }}" | |
| echo "deployment_status=$WORKFLOW_CONCLUSION" >> $GITHUB_OUTPUT | |
| echo "triggered_by=workflow_run" >> $GITHUB_OUTPUT | |
| if [ "$WORKFLOW_CONCLUSION" = "failure" ]; then | |
| echo "π¨ Deployment failed - CI/CD Pipeline concluded with failure" | |
| echo "deployment_failed=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "β Deployment completed successfully" | |
| echo "deployment_failed=false" >> $GITHUB_OUTPUT | |
| fi | |
| else | |
| echo "deployment_status=scheduled_check" >> $GITHUB_OUTPUT | |
| echo "triggered_by=schedule" >> $GITHUB_OUTPUT | |
| echo "deployment_failed=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: π₯ Application Health Monitoring | |
| id: health-monitoring | |
| if: github.event.inputs.monitoring_type != 'performance-only' | |
| run: | | |
| echo "π₯ Starting comprehensive health monitoring..." | |
| HEALTH_URL="https://local-loop.vercel.app/api/health" | |
| MAIN_URL="https://local-loop.vercel.app" | |
| # Initialize status tracking | |
| OVERALL_STATUS="healthy" | |
| ISSUES_FOUND="" | |
| # Test health endpoint | |
| echo "Testing health endpoint..." | |
| HEALTH_RESPONSE=$(curl -s -w "%{http_code}" "$HEALTH_URL" || echo "000connection_failed") | |
| HEALTH_HTTP_CODE="${HEALTH_RESPONSE: -3}" | |
| HEALTH_BODY="${HEALTH_RESPONSE%???}" | |
| echo "Health endpoint HTTP code: $HEALTH_HTTP_CODE" | |
| if [ "$HEALTH_HTTP_CODE" = "200" ]; then | |
| echo "β Health endpoint responding correctly" | |
| # Parse health check details | |
| if echo "$HEALTH_BODY" | jq -e '.status == "healthy"' > /dev/null 2>&1; then | |
| echo "β Application reports healthy status" | |
| else | |
| echo "β οΈ Application reports non-healthy status" | |
| OVERALL_STATUS="degraded" | |
| ISSUES_FOUND="$ISSUES_FOUND\n- Application health status not healthy" | |
| fi | |
| else | |
| echo "β Health endpoint failed with HTTP $HEALTH_HTTP_CODE" | |
| OVERALL_STATUS="unhealthy" | |
| ISSUES_FOUND="$ISSUES_FOUND\n- Health endpoint returning HTTP $HEALTH_HTTP_CODE" | |
| fi | |
| # Test main application endpoint | |
| echo "Testing main application..." | |
| MAIN_RESPONSE=$(curl -s -w "%{http_code}" "$MAIN_URL" || echo "000") | |
| MAIN_HTTP_CODE="${MAIN_RESPONSE: -3}" | |
| echo "Main app HTTP code: $MAIN_HTTP_CODE" | |
| if [ "$MAIN_HTTP_CODE" = "200" ]; then | |
| echo "β Main application responding correctly" | |
| else | |
| echo "β Main application failed with HTTP $MAIN_HTTP_CODE" | |
| OVERALL_STATUS="unhealthy" | |
| ISSUES_FOUND="$ISSUES_FOUND\n- Main application returning HTTP $MAIN_HTTP_CODE" | |
| fi | |
| # Test critical API endpoints | |
| echo "Testing critical API endpoints..." | |
| EVENTS_RESPONSE=$(curl -s -w "%{http_code}" "$MAIN_URL/api/events" || echo "000") | |
| EVENTS_HTTP_CODE="${EVENTS_RESPONSE: -3}" | |
| if [ "$EVENTS_HTTP_CODE" = "200" ]; then | |
| echo "β Events API responding correctly" | |
| else | |
| echo "β οΈ Events API returned HTTP $EVENTS_HTTP_CODE" | |
| if [ "$OVERALL_STATUS" = "healthy" ]; then | |
| OVERALL_STATUS="degraded" | |
| fi | |
| ISSUES_FOUND="$ISSUES_FOUND\n- Events API returning HTTP $EVENTS_HTTP_CODE" | |
| fi | |
| # Set outputs | |
| echo "overall_status=$OVERALL_STATUS" >> $GITHUB_OUTPUT | |
| echo "health_http_code=$HEALTH_HTTP_CODE" >> $GITHUB_OUTPUT | |
| echo "main_http_code=$MAIN_HTTP_CODE" >> $GITHUB_OUTPUT | |
| echo "events_http_code=$EVENTS_HTTP_CODE" >> $GITHUB_OUTPUT | |
| if [ -n "$ISSUES_FOUND" ]; then | |
| # Remove leading newline | |
| ISSUES_CLEAN=$(echo -e "$ISSUES_FOUND" | sed '/^$/d') | |
| echo "issues_found<<EOF" >> $GITHUB_OUTPUT | |
| echo "$ISSUES_CLEAN" >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| fi | |
| - name: β‘ Performance Monitoring | |
| id: performance-monitoring | |
| if: github.event.inputs.monitoring_type != 'health-only' | |
| run: | | |
| echo "β‘ Starting performance monitoring..." | |
| MAIN_URL="https://local-loop.vercel.app" | |
| HEALTH_URL="https://local-loop.vercel.app/api/health" | |
| # Measure response times | |
| echo "Measuring response times..." | |
| # Main page response time | |
| MAIN_TIME=$(curl -o /dev/null -s -w "%{time_total}" "$MAIN_URL" || echo "0") | |
| echo "Main page response time: ${MAIN_TIME}s" | |
| # Health endpoint response time | |
| HEALTH_TIME=$(curl -o /dev/null -s -w "%{time_total}" "$HEALTH_URL" || echo "0") | |
| echo "Health endpoint response time: ${HEALTH_TIME}s" | |
| # API response time | |
| API_TIME=$(curl -o /dev/null -s -w "%{time_total}" "$MAIN_URL/api/events" || echo "0") | |
| echo "API response time: ${API_TIME}s" | |
| # Performance thresholds (in seconds) | |
| MAIN_THRESHOLD=3.0 | |
| HEALTH_THRESHOLD=1.0 | |
| API_THRESHOLD=2.0 | |
| PERFORMANCE_ISSUES="" | |
| # Check thresholds | |
| if [ "$(echo "$MAIN_TIME > $MAIN_THRESHOLD" | bc -l 2>/dev/null || echo 0)" = "1" ]; then | |
| echo "β οΈ Main page response time exceeds threshold (${MAIN_TIME}s > ${MAIN_THRESHOLD}s)" | |
| PERFORMANCE_ISSUES="$PERFORMANCE_ISSUES\n- Main page slow: ${MAIN_TIME}s" | |
| fi | |
| if [ "$(echo "$HEALTH_TIME > $HEALTH_THRESHOLD" | bc -l 2>/dev/null || echo 0)" = "1" ]; then | |
| echo "β οΈ Health endpoint response time exceeds threshold (${HEALTH_TIME}s > ${HEALTH_THRESHOLD}s)" | |
| PERFORMANCE_ISSUES="$PERFORMANCE_ISSUES\n- Health endpoint slow: ${HEALTH_TIME}s" | |
| fi | |
| if [ "$(echo "$API_TIME > $API_THRESHOLD" | bc -l 2>/dev/null || echo 0)" = "1" ]; then | |
| echo "β οΈ API response time exceeds threshold (${API_TIME}s > ${API_THRESHOLD}s)" | |
| PERFORMANCE_ISSUES="$PERFORMANCE_ISSUES\n- API slow: ${API_TIME}s" | |
| fi | |
| # Set outputs | |
| echo "main_response_time=$MAIN_TIME" >> $GITHUB_OUTPUT | |
| echo "health_response_time=$HEALTH_TIME" >> $GITHUB_OUTPUT | |
| echo "api_response_time=$API_TIME" >> $GITHUB_OUTPUT | |
| if [ -n "$PERFORMANCE_ISSUES" ]; then | |
| echo "performance_issues<<EOF" >> $GITHUB_OUTPUT | |
| echo -e "$PERFORMANCE_ISSUES" | sed '/^$/d' >> $GITHUB_OUTPUT | |
| echo "EOF" >> $GITHUB_OUTPUT | |
| fi | |
| - name: π¨ Alert on Critical Issues | |
| if: | | |
| steps.deployment-status.outputs.deployment_failed == 'true' || | |
| steps.health-monitoring.outputs.overall_status == 'unhealthy' || | |
| (steps.health-monitoring.outputs.overall_status == 'degraded' && github.event_name == 'workflow_run') | |
| run: | | |
| echo "π¨ CRITICAL ALERT: Production issues detected!" | |
| echo "==========================" | |
| echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%S.%3NZ)" | |
| echo "Trigger: ${{ steps.deployment-status.outputs.triggered_by }}" | |
| if [ "${{ steps.deployment-status.outputs.deployment_failed }}" = "true" ]; then | |
| echo "π΄ DEPLOYMENT FAILURE" | |
| echo "- CI/CD Pipeline failed" | |
| echo "- Check workflow: https://github.com/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}" | |
| fi | |
| if [ "${{ steps.health-monitoring.outputs.overall_status }}" = "unhealthy" ]; then | |
| echo "π΄ APPLICATION UNHEALTHY" | |
| echo "Issues found:" | |
| echo "${{ steps.health-monitoring.outputs.issues_found }}" | |
| elif [ "${{ steps.health-monitoring.outputs.overall_status }}" = "degraded" ]; then | |
| echo "π‘ APPLICATION DEGRADED" | |
| echo "Issues found:" | |
| echo "${{ steps.health-monitoring.outputs.issues_found }}" | |
| fi | |
| echo "" | |
| echo "π Monitor dashboard: https://github.com/${{ github.repository }}/actions" | |
| echo "π₯ Health check: https://local-loop.vercel.app/api/health" | |
| echo "π Application: https://local-loop.vercel.app" | |
| - name: β οΈ Alert on Performance Issues | |
| if: steps.performance-monitoring.outputs.performance_issues != '' | |
| run: | | |
| echo "β οΈ PERFORMANCE ALERT: Slow response times detected" | |
| echo "==========================" | |
| echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%S.%3NZ)" | |
| echo "" | |
| echo "Performance issues:" | |
| echo "${{ steps.performance-monitoring.outputs.performance_issues }}" | |
| echo "" | |
| echo "Current response times:" | |
| echo "- Main page: ${{ steps.performance-monitoring.outputs.main_response_time }}s" | |
| echo "- Health endpoint: ${{ steps.performance-monitoring.outputs.health_response_time }}s" | |
| echo "- API: ${{ steps.performance-monitoring.outputs.api_response_time }}s" | |
| - name: π Log Monitoring Summary | |
| if: always() | |
| run: | | |
| echo "π Monitoring Summary" | |
| echo "====================" | |
| echo "Timestamp: $(date -u +%Y-%m-%dT%H:%M:%S.%3NZ)" | |
| echo "Trigger: ${{ steps.deployment-status.outputs.triggered_by }}" | |
| echo "Deployment Status: ${{ steps.deployment-status.outputs.deployment_status }}" | |
| if [ "${{ github.event.inputs.monitoring_type }}" != "performance-only" ]; then | |
| echo "Health Status: ${{ steps.health-monitoring.outputs.overall_status }}" | |
| echo "Health Endpoint: HTTP ${{ steps.health-monitoring.outputs.health_http_code }}" | |
| echo "Main App: HTTP ${{ steps.health-monitoring.outputs.main_http_code }}" | |
| echo "Events API: HTTP ${{ steps.health-monitoring.outputs.events_http_code }}" | |
| fi | |
| if [ "${{ github.event.inputs.monitoring_type }}" != "health-only" ]; then | |
| echo "Main Response Time: ${{ steps.performance-monitoring.outputs.main_response_time }}s" | |
| echo "Health Response Time: ${{ steps.performance-monitoring.outputs.health_response_time }}s" | |
| echo "API Response Time: ${{ steps.performance-monitoring.outputs.api_response_time }}s" | |
| fi |