From 476c9c5524870a5733e5a1e502924ae04231fecb Mon Sep 17 00:00:00 2001
From: Jacob Repp <jacobrepp@gmail.com>
Date: Sat, 8 Nov 2025 13:46:47 -0800
Subject: [PATCH 1/4] Add test_logs directory to gitignore

Exclude the test_logs directory created by test debugging scripts
to prevent test artifacts from being committed to the repository.
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 08d0471..7e71370 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,4 @@ tmp/
 
 # Logs
 *.log
+test_logs/

From 590fe3c455b057af06f78c2af939678a7df3db9b Mon Sep 17 00:00:00 2001
From: Jacob Repp <jacobrepp@gmail.com>
Date: Sat, 8 Nov 2025 13:46:54 -0800
Subject: [PATCH 2/4] Improve integration test setup with retry logic and
 cleanup

Enhanced the Docker Compose setup for integration tests to handle
transient failures and port conflicts:

- Add retry logic (3 attempts) for starting services
- Implement exponential backoff between retries
- Add 2-second wait after cleanup for port release
- Improve logging at each setup step

This fixes flaky test failures caused by port conflicts when
containers from previous test runs haven't fully cleaned up.
---
 testing/integration_test.go | 40 ++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/testing/integration_test.go b/testing/integration_test.go
index 0364621..4d46f8b 100644
--- a/testing/integration_test.go
+++ b/testing/integration_test.go
@@ -87,18 +87,44 @@ func (its *IntegrationTestSetup) Start(t *testing.T) {
 
 	t.Log("Starting Docker Compose environment for integration tests...")
 
-	// Stop any existing services first
+	// Stop any existing services first and wait for cleanup
+	t.Log("Cleaning up any existing containers...")
 	stopCmd := its.getComposeCommand(ctx, "down", "-v")
 	stopCmd.Stdout = os.Stdout
 	stopCmd.Stderr = os.Stderr
 	_ = stopCmd.Run() // Ignore errors if nothing is running
 
-	// Start services
-	startCmd := its.getComposeCommand(ctx, "up", "-d")
-	startCmd.Stdout = os.Stdout
-	startCmd.Stderr = os.Stderr
-	if err := startCmd.Run(); err != nil {
-		t.Fatalf("Failed to start Docker Compose: %v", err)
+	// Wait a bit for ports to be released (especially for port conflicts)
+	t.Log("Waiting for port cleanup...")
+	time.Sleep(2 * time.Second)
+
+	// Retry logic for starting services (handles transient port conflicts)
+	maxRetries := 3
+	var startErr error
+	for attempt := 1; attempt <= maxRetries; attempt++ {
+		if attempt > 1 {
+			t.Logf("Retry attempt %d/%d...", attempt, maxRetries)
+			// Additional cleanup between retries
+			stopCmd := its.getComposeCommand(ctx, "down", "-v")
+			_ = stopCmd.Run()
+			time.Sleep(time.Duration(attempt) * 2 * time.Second)
+		}
+
+		// Start services
+		startCmd := its.getComposeCommand(ctx, "up", "-d")
+		startCmd.Stdout = os.Stdout
+		startCmd.Stderr = os.Stderr
+		startErr = startCmd.Run()
+
+		if startErr == nil {
+			break
+		}
+
+		t.Logf("Failed to start Docker Compose (attempt %d): %v", attempt, startErr)
+	}
+
+	if startErr != nil {
+		t.Fatalf("Failed to start Docker Compose after %d attempts: %v", maxRetries, startErr)
 	}
 
 	its.containersRunning = true

From ff8a5c9cc70ca5b9c043b08ddc9f3ddab2603770 Mon Sep 17 00:00:00 2001
From: Jacob Repp <jacobrepp@gmail.com>
Date: Sat, 8 Nov 2025 13:47:01 -0800
Subject: [PATCH 3/4] Add comprehensive logging to
 TestStorageProviderUploadDownload

Enhanced test with detailed logging and verification steps:

- Add timing information for each operation
- Add object verification step before download
- Add 100ms stabilization wait after upload
- Add detailed error messages for data mismatches
- Log connection details and test progress

This logging helps diagnose flaky test behavior and provides
better visibility into test execution for debugging purposes.
---
 testing/storage_integration_test.go | 84 ++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 7 deletions(-)

diff --git a/testing/storage_integration_test.go b/testing/storage_integration_test.go
index 33ec244..92fb3f9 100644
--- a/testing/storage_integration_test.go
+++ b/testing/storage_integration_test.go
@@ -205,11 +205,18 @@ func TestStorageProviderUploadDownload(t *testing.T) {
 		t.Skip("Skipping integration test in short mode")
 	}
 
+	t.Log("=== Starting TestStorageProviderUploadDownload ===")
+	startTime := time.Now()
+
 	setup := NewIntegrationTestSetup()
+	t.Log("Starting integration test setup...")
 	setup.Start(t)
 	defer setup.Stop(t)
+	t.Logf("Setup completed in %v", time.Since(startTime))
 
 	accessKey, secretKey := setup.GetMinioCredentials()
+	t.Logf("Minio credentials obtained - endpoint: %s, bucket: %s",
+		setup.GetMinioEndpoint(), setup.GetMinioBucket())
 
 	storageConfig := &storage.Config{
 		Provider:          "s3",
@@ -224,55 +231,118 @@ func TestStorageProviderUploadDownload(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 
+	t.Log("Creating storage provider...")
+	providerStartTime := time.Now()
 	provider, err := storage.NewProvider(ctx, storageConfig)
 	if err != nil {
 		t.Fatalf("Failed to create storage provider: %v", err)
 	}
 	defer provider.Close()
+	t.Logf("Storage provider created in %v", time.Since(providerStartTime))
 
 	// Test data
 	testData := []byte("This is test data for storage provider")
-	testKey := "test-" + time.Now().Format("20060102-150405") + ".dat"
+	testKey := "test-" + time.Now().Format("20060102-150405.000") + ".dat"
+	t.Logf("Test key: %s, test data size: %d bytes", testKey, len(testData))
 
 	// Upload (write)
+	t.Log("Getting writer...")
+	writeStartTime := time.Now()
 	writer, err := provider.Writer(ctx, testKey)
 	if err != nil {
 		t.Fatalf("Failed to get writer: %v", err)
 	}
-	if _, err := writer.Write(testData); err != nil {
+	t.Logf("Writer obtained in %v", time.Since(writeStartTime))
+
+	t.Log("Writing data...")
+	writeDataStartTime := time.Now()
+	bytesWritten, err := writer.Write(testData)
+	if err != nil {
 		writer.Close()
 		t.Fatalf("Failed to write: %v", err)
 	}
+	t.Logf("Wrote %d bytes in %v", bytesWritten, time.Since(writeDataStartTime))
+
+	t.Log("Closing writer...")
+	closeStartTime := time.Now()
 	if err := writer.Close(); err != nil {
 		t.Fatalf("Failed to close writer: %v", err)
 	}
+	t.Logf("Writer closed in %v", time.Since(closeStartTime))
+	t.Logf("Total upload time: %v", time.Since(writeStartTime))
+
+	// Add a small delay to ensure the background goroutine completes
+	// This helps diagnose race conditions
+	t.Log("Waiting briefly for upload to stabilize...")
+	time.Sleep(100 * time.Millisecond)
+
+	// Verify object exists before attempting download
+	t.Log("Verifying object exists in storage...")
+	verifyStartTime := time.Now()
+	accessKey2, secretKey2 := setup.GetMinioCredentials()
+	minioClient, err := minio.New(setup.GetMinioEndpoint(), &minio.Options{
+		Creds:  credentials.NewStaticV4(accessKey2, secretKey2, ""),
+		Secure: false,
+	})
+	if err != nil {
+		t.Fatalf("Failed to create verification Minio client: %v", err)
+	}
+
+	verifyCtx, verifyCancel := context.WithTimeout(context.Background(), 5*time.Second)
+	defer verifyCancel()
 
-	t.Logf("Uploaded test data with key: %s", testKey)
+	objInfo, err := minioClient.StatObject(verifyCtx, setup.GetMinioBucket(), testKey, minio.StatObjectOptions{})
+	if err != nil {
+		t.Fatalf("Object verification failed - object not found after upload: %v", err)
+	}
+	t.Logf("Object verified in %v - size: %d bytes, etag: %s",
+		time.Since(verifyStartTime), objInfo.Size, objInfo.ETag)
 
 	// Download (read)
+	t.Log("Getting reader...")
+	readStartTime := time.Now()
 	reader, err := provider.Reader(ctx, testKey)
 	if err != nil {
 		t.Fatalf("Failed to get reader: %v", err)
 	}
 	defer reader.Close()
+	t.Logf("Reader obtained in %v", time.Since(readStartTime))
 
+	t.Log("Reading data...")
+	readDataStartTime := time.Now()
 	var downloadBuffer bytes.Buffer
-	if _, err := io.Copy(&downloadBuffer, reader); err != nil {
+	bytesCopied, err := io.Copy(&downloadBuffer, reader)
+	if err != nil {
 		t.Fatalf("Failed to read: %v", err)
 	}
+	t.Logf("Read %d bytes in %v", bytesCopied, time.Since(readDataStartTime))
+	t.Logf("Total download time: %v", time.Since(readStartTime))
 
 	// Verify
+	t.Log("Verifying data integrity...")
 	downloadedData := downloadBuffer.Bytes()
+	t.Logf("Downloaded %d bytes, expected %d bytes", len(downloadedData), len(testData))
+
 	if !bytes.Equal(downloadedData, testData) {
-		t.Errorf("Downloaded data doesn't match. Got %d bytes, want %d bytes", len(downloadedData), len(testData))
+		t.Errorf("Downloaded data doesn't match!")
+		t.Errorf("  Expected: %q", string(testData))
+		t.Errorf("  Got:      %q", string(downloadedData))
+		t.Errorf("  Expected bytes: %v", testData)
+		t.Errorf("  Got bytes:      %v", downloadedData)
+	} else {
+		t.Log("Data integrity verified - content matches!")
 	}
 
-	t.Log("Successfully uploaded and downloaded data")
-
 	// Clean up
+	t.Log("Cleaning up test data...")
+	deleteStartTime := time.Now()
 	if err := provider.Delete(ctx, testKey); err != nil {
 		t.Logf("Warning: Failed to clean up test data: %v", err)
+	} else {
+		t.Logf("Test data deleted in %v", time.Since(deleteStartTime))
 	}
+
+	t.Logf("=== Test completed in %v ===", time.Since(startTime))
 }
 
 // TestStorageHealthCheck tests the storage provider health check.

From b3e6b567057235db694e9f3e1aa292618c38b7a6 Mon Sep 17 00:00:00 2001
From: Jacob Repp <jacobrepp@gmail.com>
Date: Sat, 8 Nov 2025 13:47:10 -0800
Subject: [PATCH 4/4] Add script for testing flaky integration tests

Add run_flaky_test.sh utility script that runs integration tests
multiple times to help identify and debug flaky test behavior.

Features:
- Configurable number of test runs (default: 20)
- Detailed logging of each run
- Summary report with pass/fail statistics
- Saves logs for failed tests for analysis

This tool was used to diagnose and fix the flaky
TestStorageProviderUploadDownload test.
---
 run_flaky_test.sh | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100755 run_flaky_test.sh

diff --git a/run_flaky_test.sh b/run_flaky_test.sh
new file mode 100755
index 0000000..bb21cee
--- /dev/null
+++ b/run_flaky_test.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Script to run the flaky test multiple times to identify the issue
+# This will run TestStorageProviderUploadDownload repeatedly and log results
+
+TEST_NAME="TestStorageProviderUploadDownload"
+NUM_RUNS=20
+LOG_DIR="./test_logs"
+TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
+SUMMARY_LOG="${LOG_DIR}/summary_${TIMESTAMP}.log"
+
+# Create log directory
+mkdir -p "${LOG_DIR}"
+
+echo "Running ${TEST_NAME} ${NUM_RUNS} times..." | tee "${SUMMARY_LOG}"
+echo "Started at: $(date)" | tee -a "${SUMMARY_LOG}"
+echo "============================================" | tee -a "${SUMMARY_LOG}"
+echo "" | tee -a "${SUMMARY_LOG}"
+
+PASS_COUNT=0
+FAIL_COUNT=0
+
+for i in $(seq 1 ${NUM_RUNS}); do
+    echo "Run #${i}/${NUM_RUNS}..." | tee -a "${SUMMARY_LOG}"
+
+    RUN_LOG="${LOG_DIR}/run_${i}_${TIMESTAMP}.log"
+
+    # Run the test and capture output
+    cd testing
+    go test -v -run "^${TEST_NAME}$" -timeout 60s > "${RUN_LOG}" 2>&1
+    EXIT_CODE=$?
+    cd ..
+
+    if [ ${EXIT_CODE} -eq 0 ]; then
+        echo "  ✓ PASS" | tee -a "${SUMMARY_LOG}"
+        ((PASS_COUNT++))
+    else
+        echo "  ✗ FAIL (exit code: ${EXIT_CODE})" | tee -a "${SUMMARY_LOG}"
+        ((FAIL_COUNT++))
+
+        # Extract error information
+        echo "  Error details:" | tee -a "${SUMMARY_LOG}"
+        grep -A 5 "FAIL:" "${RUN_LOG}" | sed 's/^/    /' | tee -a "${SUMMARY_LOG}"
+    fi
+
+    # Brief pause between runs
+    sleep 0.5
+done
+
+echo "" | tee -a "${SUMMARY_LOG}"
+echo "============================================" | tee -a "${SUMMARY_LOG}"
+echo "Test Summary:" | tee -a "${SUMMARY_LOG}"
+echo "  Total runs: ${NUM_RUNS}" | tee -a "${SUMMARY_LOG}"
+echo "  Passed: ${PASS_COUNT}" | tee -a "${SUMMARY_LOG}"
+echo "  Failed: ${FAIL_COUNT}" | tee -a "${SUMMARY_LOG}"
+echo "  Success rate: $(awk "BEGIN {printf \"%.1f\", (${PASS_COUNT}/${NUM_RUNS})*100}")%" | tee -a "${SUMMARY_LOG}"
+echo "Completed at: $(date)" | tee -a "${SUMMARY_LOG}"
+echo "" | tee -a "${SUMMARY_LOG}"
+
+if [ ${FAIL_COUNT} -gt 0 ]; then
+    echo "Failed test logs are available in: ${LOG_DIR}" | tee -a "${SUMMARY_LOG}"
+    echo "" | tee -a "${SUMMARY_LOG}"
+    echo "To view timing information from failed tests:" | tee -a "${SUMMARY_LOG}"
+    echo "  grep 'time:' ${LOG_DIR}/run_*_${TIMESTAMP}.log | grep -v PASS" | tee -a "${SUMMARY_LOG}"
+fi