From 476c9c5524870a5733e5a1e502924ae04231fecb Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Sat, 8 Nov 2025 13:46:47 -0800 Subject: [PATCH 1/4] Add test_logs directory to gitignore Exclude the test_logs directory created by test debugging scripts to prevent test artifacts from being committed to the repository. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 08d0471..7e71370 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ tmp/ # Logs *.log +test_logs/ From 590fe3c455b057af06f78c2af939678a7df3db9b Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Sat, 8 Nov 2025 13:46:54 -0800 Subject: [PATCH 2/4] Improve integration test setup with retry logic and cleanup Enhanced the Docker Compose setup for integration tests to handle transient failures and port conflicts: - Add retry logic (3 attempts) for starting services - Implement exponential backoff between retries - Add 2-second wait after cleanup for port release - Improve logging at each setup step This fixes flaky test failures caused by port conflicts when containers from previous test runs haven't fully cleaned up. --- testing/integration_test.go | 40 ++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/testing/integration_test.go b/testing/integration_test.go index 0364621..4d46f8b 100644 --- a/testing/integration_test.go +++ b/testing/integration_test.go @@ -87,18 +87,44 @@ func (its *IntegrationTestSetup) Start(t *testing.T) { t.Log("Starting Docker Compose environment for integration tests...") - // Stop any existing services first + // Stop any existing services first and wait for cleanup + t.Log("Cleaning up any existing containers...") stopCmd := its.getComposeCommand(ctx, "down", "-v") stopCmd.Stdout = os.Stdout stopCmd.Stderr = os.Stderr _ = stopCmd.Run() // Ignore errors if nothing is running - // Start services - startCmd := its.getComposeCommand(ctx, "up", "-d") - startCmd.Stdout = os.Stdout - startCmd.Stderr = os.Stderr - if err := startCmd.Run(); err != nil { - t.Fatalf("Failed to start Docker Compose: %v", err) + // Wait a bit for ports to be released (especially for port conflicts) + t.Log("Waiting for port cleanup...") + time.Sleep(2 * time.Second) + + // Retry logic for starting services (handles transient port conflicts) + maxRetries := 3 + var startErr error + for attempt := 1; attempt <= maxRetries; attempt++ { + if attempt > 1 { + t.Logf("Retry attempt %d/%d...", attempt, maxRetries) + // Additional cleanup between retries + stopCmd := its.getComposeCommand(ctx, "down", "-v") + _ = stopCmd.Run() + time.Sleep(time.Duration(attempt) * 2 * time.Second) + } + + // Start services + startCmd := its.getComposeCommand(ctx, "up", "-d") + startCmd.Stdout = os.Stdout + startCmd.Stderr = os.Stderr + startErr = startCmd.Run() + + if startErr == nil { + break + } + + t.Logf("Failed to start Docker Compose (attempt %d): %v", attempt, startErr) + } + + if startErr != nil { + t.Fatalf("Failed to start Docker Compose after %d attempts: %v", maxRetries, startErr) } its.containersRunning = true From ff8a5c9cc70ca5b9c043b08ddc9f3ddab2603770 Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Sat, 8 Nov 2025 13:47:01 -0800 Subject: [PATCH 3/4] Add comprehensive logging to TestStorageProviderUploadDownload Enhanced test with detailed logging and verification steps: - Add timing information for each operation - Add object verification step before download - Add 100ms stabilization wait after upload - Add detailed error messages for data mismatches - Log connection details and test progress This logging helps diagnose flaky test behavior and provides better visibility into test execution for debugging purposes. --- testing/storage_integration_test.go | 84 ++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/testing/storage_integration_test.go b/testing/storage_integration_test.go index 33ec244..92fb3f9 100644 --- a/testing/storage_integration_test.go +++ b/testing/storage_integration_test.go @@ -205,11 +205,18 @@ func TestStorageProviderUploadDownload(t *testing.T) { t.Skip("Skipping integration test in short mode") } + t.Log("=== Starting TestStorageProviderUploadDownload ===") + startTime := time.Now() + setup := NewIntegrationTestSetup() + t.Log("Starting integration test setup...") setup.Start(t) defer setup.Stop(t) + t.Logf("Setup completed in %v", time.Since(startTime)) accessKey, secretKey := setup.GetMinioCredentials() + t.Logf("Minio credentials obtained - endpoint: %s, bucket: %s", + setup.GetMinioEndpoint(), setup.GetMinioBucket()) storageConfig := &storage.Config{ Provider: "s3", @@ -224,55 +231,118 @@ func TestStorageProviderUploadDownload(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() + t.Log("Creating storage provider...") + providerStartTime := time.Now() provider, err := storage.NewProvider(ctx, storageConfig) if err != nil { t.Fatalf("Failed to create storage provider: %v", err) } defer provider.Close() + t.Logf("Storage provider created in %v", time.Since(providerStartTime)) // Test data testData := []byte("This is test data for storage provider") - testKey := "test-" + time.Now().Format("20060102-150405") + ".dat" + testKey := "test-" + time.Now().Format("20060102-150405.000") + ".dat" + t.Logf("Test key: %s, test data size: %d bytes", testKey, len(testData)) // Upload (write) + t.Log("Getting writer...") + writeStartTime := time.Now() writer, err := provider.Writer(ctx, testKey) if err != nil { t.Fatalf("Failed to get writer: %v", err) } - if _, err := writer.Write(testData); err != nil { + t.Logf("Writer obtained in %v", time.Since(writeStartTime)) + + t.Log("Writing data...") + writeDataStartTime := time.Now() + bytesWritten, err := writer.Write(testData) + if err != nil { writer.Close() t.Fatalf("Failed to write: %v", err) } + t.Logf("Wrote %d bytes in %v", bytesWritten, time.Since(writeDataStartTime)) + + t.Log("Closing writer...") + closeStartTime := time.Now() if err := writer.Close(); err != nil { t.Fatalf("Failed to close writer: %v", err) } + t.Logf("Writer closed in %v", time.Since(closeStartTime)) + t.Logf("Total upload time: %v", time.Since(writeStartTime)) + + // Add a small delay to ensure the background goroutine completes + // This helps diagnose race conditions + t.Log("Waiting briefly for upload to stabilize...") + time.Sleep(100 * time.Millisecond) + + // Verify object exists before attempting download + t.Log("Verifying object exists in storage...") + verifyStartTime := time.Now() + accessKey2, secretKey2 := setup.GetMinioCredentials() + minioClient, err := minio.New(setup.GetMinioEndpoint(), &minio.Options{ + Creds: credentials.NewStaticV4(accessKey2, secretKey2, ""), + Secure: false, + }) + if err != nil { + t.Fatalf("Failed to create verification Minio client: %v", err) + } + + verifyCtx, verifyCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer verifyCancel() - t.Logf("Uploaded test data with key: %s", testKey) + objInfo, err := minioClient.StatObject(verifyCtx, setup.GetMinioBucket(), testKey, minio.StatObjectOptions{}) + if err != nil { + t.Fatalf("Object verification failed - object not found after upload: %v", err) + } + t.Logf("Object verified in %v - size: %d bytes, etag: %s", + time.Since(verifyStartTime), objInfo.Size, objInfo.ETag) // Download (read) + t.Log("Getting reader...") + readStartTime := time.Now() reader, err := provider.Reader(ctx, testKey) if err != nil { t.Fatalf("Failed to get reader: %v", err) } defer reader.Close() + t.Logf("Reader obtained in %v", time.Since(readStartTime)) + t.Log("Reading data...") + readDataStartTime := time.Now() var downloadBuffer bytes.Buffer - if _, err := io.Copy(&downloadBuffer, reader); err != nil { + bytesCopied, err := io.Copy(&downloadBuffer, reader) + if err != nil { t.Fatalf("Failed to read: %v", err) } + t.Logf("Read %d bytes in %v", bytesCopied, time.Since(readDataStartTime)) + t.Logf("Total download time: %v", time.Since(readStartTime)) // Verify + t.Log("Verifying data integrity...") downloadedData := downloadBuffer.Bytes() + t.Logf("Downloaded %d bytes, expected %d bytes", len(downloadedData), len(testData)) + if !bytes.Equal(downloadedData, testData) { - t.Errorf("Downloaded data doesn't match. Got %d bytes, want %d bytes", len(downloadedData), len(testData)) + t.Errorf("Downloaded data doesn't match!") + t.Errorf(" Expected: %q", string(testData)) + t.Errorf(" Got: %q", string(downloadedData)) + t.Errorf(" Expected bytes: %v", testData) + t.Errorf(" Got bytes: %v", downloadedData) + } else { + t.Log("Data integrity verified - content matches!") } - t.Log("Successfully uploaded and downloaded data") - // Clean up + t.Log("Cleaning up test data...") + deleteStartTime := time.Now() if err := provider.Delete(ctx, testKey); err != nil { t.Logf("Warning: Failed to clean up test data: %v", err) + } else { + t.Logf("Test data deleted in %v", time.Since(deleteStartTime)) } + + t.Logf("=== Test completed in %v ===", time.Since(startTime)) } // TestStorageHealthCheck tests the storage provider health check. From b3e6b567057235db694e9f3e1aa292618c38b7a6 Mon Sep 17 00:00:00 2001 From: Jacob Repp Date: Sat, 8 Nov 2025 13:47:10 -0800 Subject: [PATCH 4/4] Add script for testing flaky integration tests Add run_flaky_test.sh utility script that runs integration tests multiple times to help identify and debug flaky test behavior. Features: - Configurable number of test runs (default: 20) - Detailed logging of each run - Summary report with pass/fail statistics - Saves logs for failed tests for analysis This tool was used to diagnose and fix the flaky TestStorageProviderUploadDownload test. --- run_flaky_test.sh | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100755 run_flaky_test.sh diff --git a/run_flaky_test.sh b/run_flaky_test.sh new file mode 100755 index 0000000..bb21cee --- /dev/null +++ b/run_flaky_test.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# Script to run the flaky test multiple times to identify the issue +# This will run TestStorageProviderUploadDownload repeatedly and log results + +TEST_NAME="TestStorageProviderUploadDownload" +NUM_RUNS=20 +LOG_DIR="./test_logs" +TIMESTAMP=$(date +"%Y%m%d_%H%M%S") +SUMMARY_LOG="${LOG_DIR}/summary_${TIMESTAMP}.log" + +# Create log directory +mkdir -p "${LOG_DIR}" + +echo "Running ${TEST_NAME} ${NUM_RUNS} times..." | tee "${SUMMARY_LOG}" +echo "Started at: $(date)" | tee -a "${SUMMARY_LOG}" +echo "============================================" | tee -a "${SUMMARY_LOG}" +echo "" | tee -a "${SUMMARY_LOG}" + +PASS_COUNT=0 +FAIL_COUNT=0 + +for i in $(seq 1 ${NUM_RUNS}); do + echo "Run #${i}/${NUM_RUNS}..." | tee -a "${SUMMARY_LOG}" + + RUN_LOG="${LOG_DIR}/run_${i}_${TIMESTAMP}.log" + + # Run the test and capture output + cd testing + go test -v -run "^${TEST_NAME}$" -timeout 60s > "${RUN_LOG}" 2>&1 + EXIT_CODE=$? + cd .. + + if [ ${EXIT_CODE} -eq 0 ]; then + echo " ✓ PASS" | tee -a "${SUMMARY_LOG}" + ((PASS_COUNT++)) + else + echo " ✗ FAIL (exit code: ${EXIT_CODE})" | tee -a "${SUMMARY_LOG}" + ((FAIL_COUNT++)) + + # Extract error information + echo " Error details:" | tee -a "${SUMMARY_LOG}" + grep -A 5 "FAIL:" "${RUN_LOG}" | sed 's/^/ /' | tee -a "${SUMMARY_LOG}" + fi + + # Brief pause between runs + sleep 0.5 +done + +echo "" | tee -a "${SUMMARY_LOG}" +echo "============================================" | tee -a "${SUMMARY_LOG}" +echo "Test Summary:" | tee -a "${SUMMARY_LOG}" +echo " Total runs: ${NUM_RUNS}" | tee -a "${SUMMARY_LOG}" +echo " Passed: ${PASS_COUNT}" | tee -a "${SUMMARY_LOG}" +echo " Failed: ${FAIL_COUNT}" | tee -a "${SUMMARY_LOG}" +echo " Success rate: $(awk "BEGIN {printf \"%.1f\", (${PASS_COUNT}/${NUM_RUNS})*100}")%" | tee -a "${SUMMARY_LOG}" +echo "Completed at: $(date)" | tee -a "${SUMMARY_LOG}" +echo "" | tee -a "${SUMMARY_LOG}" + +if [ ${FAIL_COUNT} -gt 0 ]; then + echo "Failed test logs are available in: ${LOG_DIR}" | tee -a "${SUMMARY_LOG}" + echo "" | tee -a "${SUMMARY_LOG}" + echo "To view timing information from failed tests:" | tee -a "${SUMMARY_LOG}" + echo " grep 'time:' ${LOG_DIR}/run_*_${TIMESTAMP}.log | grep -v PASS" | tee -a "${SUMMARY_LOG}" +fi