diff --git a/.github/prompts/run-integration-testing-guide.prompt.md b/.github/prompts/run-integration-testing-guide.prompt.md new file mode 100644 index 0000000..dafa2bf --- /dev/null +++ b/.github/prompts/run-integration-testing-guide.prompt.md @@ -0,0 +1,153 @@ +--- +mode: agent +--- + +# Integration Testing Guide Execution Instructions + +As an expert system administrator, you will execute the **complete integration testing process** following the [Integration Testing Guide](../../docs/guides/integration-testing-guide.md). + +## 📋 Overview + +This guide performs a **full end-to-end integration test** that includes: + +1. **Clean existing state** (VM, application data, certificates) +2. **Deploy fresh infrastructure** (VM with Ubuntu 24.04) +3. **Wait for cloud-init completion** (system provisioning) +4. **Run comprehensive integration tests** (services, connectivity, functionality) +5. **Perform smoke testing** (external validation with official client tools) +6. **Clean up resources** (return to clean state) + +**Expected Duration**: ~8-12 minutes total +**Prerequisites**: Must have completed initial setup (`make test-prereq`) + +## 🎯 Execution Requirements + +### CRITICAL Rules to Follow: + +1. **Sequential Execution**: Follow steps in exact order - do NOT skip or reorder +2. **No Command Modifications**: Execute commands exactly as written in the guide +3. **Working Directory**: Always run from project root directory +4. **Error Handling**: Document any failures or deviations immediately +5. **Complete Process**: Execute the entire guide from start to finish + +### What Gets Cleaned (Destructive Operations): + +- **Virtual Machine**: Complete VM destruction and recreation +- **Application Storage**: Database, SSL certificates, configuration files +- **OpenTofu State**: Infrastructure state reset +- **libvirt Resources**: VM disks, cloud-init ISOs, network configurations + +## 📝 Step-by-Step Instructions + +### Phase 1: Preparation and Cleanup + +- **Step 1.1-1.8**: Clean existing infrastructure and application state +- **Critical**: Step 1.8 (Clean Application Storage) is destructive but recommended +- **Outcome**: Clean slate for fresh deployment + +### Phase 2: Infrastructure Deployment + +- **Step 2.1-2.4**: Deploy VM with OpenTofu/Terraform +- **Critical**: Wait for cloud-init completion (Step 3) +- **Outcome**: Provisioned VM with Torrust Tracker ready + +### Phase 3: Integration Testing + +- **Step 4**: Run comprehensive integration tests +- **Step 5**: Optional manual verification +- **Step 6**: Optional performance testing +- **Outcome**: Validated working system + +### Phase 4: External Validation + +- **Step 7**: External smoke testing with official client tools +- **Reference**: Use [Smoke Testing Guide](../../docs/guides/smoke-testing-guide.md) for details +- **Outcome**: Black-box validation of tracker functionality + +### Phase 5: Cleanup + +- **Step 8**: Clean up all resources +- **Step 9**: Review insights and best practices +- **Outcome**: Return to clean state + +## 🚨 Important Notes + +### SSH Key Configuration + +- **Required**: Must configure SSH keys before deployment +- **Location**: `infrastructure/terraform/local.tfvars` +- **Template**: Available in `infrastructure/terraform/terraform.tfvars.example` + +### Cloud-Init Wait Time + +- **Critical**: DO NOT skip Step 3 (cloud-init completion) +- **Duration**: 2-3 minutes typically +- **Failure Mode**: SSH connection failures if rushed + +### Error Documentation + +- **Immediate**: Document any command failures or unexpected outputs +- **Location**: Add issues directly to the integration testing guide +- **Format**: Include error messages, context, and resolution steps + +### Non-Standard Commands + +- **Approval Required**: Only execute commands not in the guide if absolutely necessary +- **Documentation**: Clearly indicate when deviating from guide +- **Justification**: Explain why the deviation was needed + +## 🔧 Troubleshooting Guidance + +### Common Issues and Solutions: + +1. **"Command not found"**: Verify you're in project root directory +2. **SSH connection failures**: Ensure cloud-init has completed +3. **libvirt permission errors**: Check user is in libvirt group +4. **VM deployment timeouts**: Normal during cloud-init, wait longer +5. **Storage volume conflicts**: Run manual cleanup steps from guide + +### When to Deviate from Guide: + +- **System-specific issues**: Different Linux distributions may need adjustments +- **Network configuration**: Firewall or DNS issues requiring resolution +- **Permission problems**: User/group configuration fixes +- **Always document**: Any deviations with full explanation + +## 📊 Success Criteria + +### Integration Test Success Indicators: + +- ✅ All services start successfully (Docker Compose) +- ✅ Tracker responds to UDP/HTTP requests +- ✅ API endpoints return expected data +- ✅ Grafana dashboards display metrics +- ✅ MySQL database is accessible and functional + +### Smoke Test Success Indicators: + +- ✅ UDP tracker clients receive responses +- ✅ HTTP tracker clients receive responses +- ✅ API health checks return "Ok" +- ✅ Statistics endpoints return valid data +- ✅ Metrics endpoints return Prometheus data + +## 🎯 Final Deliverables + +Upon completion, you should have: + +1. **Executed Complete Guide**: All steps from 1.1 through 9 +2. **Documented Issues**: Any problems encountered and how they were resolved +3. **Validated Functionality**: Both integration and smoke tests passed +4. **Clean State**: All resources cleaned up and ready for next test +5. **Updated Documentation**: Any guide improvements or corrections needed + +## 📖 Additional Resources + +- **Integration Testing Guide**: [docs/guides/integration-testing-guide.md](../../docs/guides/integration-testing-guide.md) +- **Smoke Testing Guide**: [docs/guides/smoke-testing-guide.md](../../docs/guides/smoke-testing-guide.md) +- **Quick Start Guide**: [docs/infrastructure/quick-start.md](../../docs/infrastructure/quick-start.md) +- **Troubleshooting**: See infrastructure documentation for libvirt and OpenTofu issues + +--- + +**Remember**: This is a comprehensive test that validates the entire deployment pipeline. Take your time, follow each step carefully, and document everything for future improvements. diff --git a/.yamllint-ci.yml b/.yamllint-ci.yml index c700212..c10a1bc 100644 --- a/.yamllint-ci.yml +++ b/.yamllint-ci.yml @@ -1,5 +1,8 @@ extends: default +ignore: | + application/storage/ + rules: line-length: max: 120 # More reasonable for infrastructure code diff --git a/Makefile b/Makefile index e6cd2e3..66251d4 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Makefile for Torrust Tracker Local Testing Infrastructure -.PHONY: help init plan apply destroy test clean status refresh-state ssh install-deps console vm-console lint lint-yaml lint-shell lint-markdown +.PHONY: help init plan apply destroy test clean status refresh-state ssh install-deps console vm-console lint lint-yaml lint-shell lint-markdown configure-local configure-production validate-config validate-config-production deploy-local deploy-production start-services stop-services # Default variables VM_NAME ?= torrust-tracker-demo @@ -189,7 +189,23 @@ clean-and-fix: ## Clean up all VMs and fix libvirt permissions @cd $(TERRAFORM_DIR) && rm -f terraform.tfstate terraform.tfstate.backup .terraform.lock.hcl 2>/dev/null || true @echo "3. Cleaning libvirt images:" @sudo rm -f /var/lib/libvirt/images/torrust-tracker-demo* /var/lib/libvirt/images/ubuntu-24.04-base.qcow2 2>/dev/null || true - @echo "4. Fixing libvirt setup:" + @echo "4. Cleaning application storage (generated configuration files):" + @if [ -d "application/storage" ]; then \ + echo " WARNING: This will delete all generated configuration files in application/storage/"; \ + echo " This includes nginx configs, tracker configs, and any cached data."; \ + echo " These files will be regenerated when you run 'make configure-local'."; \ + read -p " Do you want to delete application/storage? (y/N): " confirm; \ + if [ "$$confirm" = "y" ] || [ "$$confirm" = "Y" ]; then \ + echo " Removing application/storage..."; \ + rm -rf application/storage; \ + echo " ✓ Application storage cleaned"; \ + else \ + echo " Skipping application/storage cleanup"; \ + fi; \ + else \ + echo " No application/storage directory found"; \ + fi + @echo "5. Fixing libvirt setup:" @$(MAKE) fix-libvirt @echo "✓ Clean up complete. You can now run 'make apply' safely." @@ -342,3 +358,61 @@ vm-console: ## Access VM graphical console (GUI) echo "virt-viewer not found. Please install it:"; \ echo " sudo apt install virt-viewer"; \ fi + +# Configuration Management Targets +configure-local: ## Generate local environment configuration + @echo "Generating local environment configuration..." + @infrastructure/scripts/configure-env.sh local + +configure-production: ## Generate production environment configuration (requires secrets) + @echo "Generating production environment configuration..." + @infrastructure/scripts/configure-env.sh production + +validate-config: ## Validate generated configuration files + @echo "Validating configuration files..." + @infrastructure/scripts/validate-config.sh local + +validate-config-production: ## Validate production configuration files + @echo "Validating production configuration files..." + @infrastructure/scripts/validate-config.sh production + +# Deployment workflow targets +deploy-local: configure-local ## Deploy VM and configure for local environment + @echo "Deploying local environment..." + @$(MAKE) apply + @echo "Waiting for VM to be ready..." + @sleep 30 + @echo "Starting application services..." + @$(MAKE) start-services + +deploy-production: configure-production ## Deploy and configure for production environment (requires secrets) + @echo "Deploying production environment..." + @$(MAKE) apply + @echo "Waiting for VM to be ready..." + @sleep 30 + @echo "Starting application services..." + @$(MAKE) start-services + +start-services: ## Start Docker Compose services in the VM + @echo "Starting Docker Compose services..." + @VM_IP=$$(cd $(TERRAFORM_DIR) && tofu output -raw vm_ip 2>/dev/null) || \ + VM_IP=$$(virsh domifaddr $(VM_NAME) | grep ipv4 | awk '{print $$4}' | cut -d'/' -f1); \ + if [ -n "$$VM_IP" ]; then \ + echo "Starting services on $$VM_IP..."; \ + ssh -o StrictHostKeyChecking=no torrust@$$VM_IP 'cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose up -d'; \ + else \ + echo "Could not get VM IP. Is the VM deployed?"; \ + exit 1; \ + fi + +stop-services: ## Stop Docker Compose services in the VM + @echo "Stopping Docker Compose services..." + @VM_IP=$$(cd $(TERRAFORM_DIR) && tofu output -raw vm_ip 2>/dev/null) || \ + VM_IP=$$(virsh domifaddr $(VM_NAME) | grep ipv4 | awk '{print $$4}' | cut -d'/' -f1); \ + if [ -n "$$VM_IP" ]; then \ + echo "Stopping services on $$VM_IP..."; \ + ssh -o StrictHostKeyChecking=no torrust@$$VM_IP 'cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose down'; \ + else \ + echo "Could not get VM IP. Is the VM deployed?"; \ + exit 1; \ + fi diff --git a/application/.env.production b/application/.env.production deleted file mode 100644 index 45f573c..0000000 --- a/application/.env.production +++ /dev/null @@ -1,24 +0,0 @@ -# Torrust Tracker Demo - Production Environment Configuration -# -# This configuration uses MySQL as the default database backend. -# Make sure to change the default passwords before deployment! - -USER_ID=1000 - -# Database Configuration (MySQL) -MYSQL_ROOT_PASSWORD=secure_root_password_change_me -MYSQL_DATABASE=torrust_tracker -MYSQL_USER=torrust -MYSQL_PASSWORD=secure_password_change_me - -# Tracker Database Configuration -TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER=mysql -TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH=mysql://torrust:${MYSQL_PASSWORD}@mysql:3306/torrust_tracker - -# Tracker -TORRUST_TRACKER_CONFIG_TOML= -TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN='MyAccessToken' - -# Grafana -GF_SECURITY_ADMIN_USER=admin -GF_SECURITY_ADMIN_PASSWORD=admin diff --git a/application/compose.yaml b/application/compose.yaml index a0ce3d1..1b3afad 100644 --- a/application/compose.yaml +++ b/application/compose.yaml @@ -108,11 +108,12 @@ services: tty: true restart: unless-stopped environment: - - USER_ID=${USER_ID} - - TORRUST_TRACKER_DATABASE=${TORRUST_TRACKER_DATABASE:-mysql} - - TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER=${TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER:-mysql} - - TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH=${TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH:-mysql://torrust:password@mysql:3306/torrust_tracker} - - TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN=${TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN:-MyAccessToken} + - USER_ID=${USER_ID:-1000} + # Database connection for tracker (using Figment override pattern) + - TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER=mysql + - TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH=mysql://${MYSQL_USER}:${MYSQL_PASSWORD}@mysql:3306/${MYSQL_DATABASE} + # Admin API token for tracker (using Figment override pattern) + - TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN=${TRACKER_ADMIN_TOKEN} networks: - backend_network ports: diff --git a/application/share/bin/install.sh b/application/share/bin/install.sh index 2df8a6a..80e2413 100755 --- a/application/share/bin/install.sh +++ b/application/share/bin/install.sh @@ -1,21 +1,42 @@ #!/bin/bash +# Torrust Tracker Demo Installation Script +# This script creates the required directory structure for the application. +# Following 12-factor principles, it expects .env to be provided by the infrastructure layer. + if ! [ -f "./.env" ]; then - echo "Creating compose .env './.env'" - cp .env.production .env + echo "ERROR: Environment file './.env' not found!" + echo "The .env file must be provided by the infrastructure configuration system." + echo "Expected location: $(pwd)/.env" + echo "" + echo "To generate the .env file, run:" + echo " make configure-local # For local development" + echo " make configure-production # For production deployment" + exit 1 fi +echo "Found environment file: ./.env" + ## Proxy mkdir -p ./storage/proxy/etc/nginx-conf mkdir -p ./storage/proxy/webroot mkdir -p ./storage/dhparam +# Verify nginx configuration exists (should be provided by infrastructure) if ! [ -f "./storage/proxy/etc/nginx-conf/nginx.conf" ]; then - echo "Creating proxy config file: './storage/proxy/etc/nginx-conf/nginx.conf'" - cp ./share/container/default/config/nginx.conf ./storage/proxy/etc/nginx-conf/nginx.conf + echo "ERROR: Nginx configuration file './storage/proxy/etc/nginx-conf/nginx.conf' not found!" + echo "This file should be generated by the infrastructure configuration system." + echo "Expected location: $(pwd)/storage/proxy/etc/nginx-conf/nginx.conf" + echo "" + echo "To generate the configuration file, run:" + echo " make configure-local # For local development" + echo " make configure-production # For production deployment" + exit 1 fi +echo "Found nginx configuration: ./storage/proxy/etc/nginx-conf/nginx.conf" + ## Certbot mkdir -p ./storage/certbot/etc @@ -33,16 +54,37 @@ fi mkdir -p ./storage/tracker/etc -if ! [ -f "./storage/tracker/etc/tracker.prod.container.sqlite3.toml" ]; then - echo "Creating tracker configuration: './storage/tracker/etc/tracker.toml'" - cp ./share/container/default/config/tracker.prod.container.sqlite3.toml ./storage/tracker/etc/tracker.toml +# Verify tracker configuration exists (should be provided by infrastructure) +if ! [ -f "./storage/tracker/etc/tracker.toml" ]; then + echo "ERROR: Tracker configuration file './storage/tracker/etc/tracker.toml' not found!" + echo "This file should be generated by the infrastructure configuration system." + echo "Expected location: $(pwd)/storage/tracker/etc/tracker.toml" + echo "" + echo "To generate the configuration file, run:" + echo " make configure-local # For local development" + echo " make configure-production # For production deployment" + exit 1 fi +echo "Found tracker configuration: ./storage/tracker/etc/tracker.toml" + ## Prometheus mkdir -p ./storage/prometheus/etc +# Verify prometheus configuration exists (should be provided by infrastructure) if ! [ -f "./storage/prometheus/etc/prometheus.yml" ]; then - echo "Creating prometheus config file: './storage/prometheus/etc/prometheus.yml'" - cp ./share/container/default/config/prometheus.yml ./storage/prometheus/etc/prometheus.yml + echo "ERROR: Prometheus configuration file './storage/prometheus/etc/prometheus.yml' not found!" + echo "This file should be generated by the infrastructure configuration system." + echo "Expected location: $(pwd)/storage/prometheus/etc/prometheus.yml" + echo "" + echo "To generate the configuration file, run:" + echo " make configure-local # For local development" + echo " make configure-production # For production deployment" + exit 1 fi + +echo "Found prometheus configuration: ./storage/prometheus/etc/prometheus.yml" + +echo "Installation completed successfully!" +echo "All required directories created and configuration files verified." diff --git a/application/share/container/default/config/prometheus.yml b/application/share/container/default/config/prometheus.yml deleted file mode 100644 index 3240137..0000000 --- a/application/share/container/default/config/prometheus.yml +++ /dev/null @@ -1,20 +0,0 @@ ---- -global: - scrape_interval: 15s # How often to scrape metrics - -scrape_configs: - - job_name: "tracker_stats" - metrics_path: "/api/v1/stats" - params: - token: ["MyAccessToken"] - format: ["prometheus"] - static_configs: - - targets: ["tracker:1212"] - - - job_name: "tracker_metrics" - metrics_path: "/api/v1/metrics" - params: - token: ["MyAccessToken"] - format: ["prometheus"] - static_configs: - - targets: ["tracker:1212"] diff --git a/application/share/container/default/config/tracker.prod.container.sqlite3.toml b/application/share/container/default/config/tracker.prod.container.sqlite3.toml deleted file mode 100644 index d91af30..0000000 --- a/application/share/container/default/config/tracker.prod.container.sqlite3.toml +++ /dev/null @@ -1,62 +0,0 @@ -[metadata] -app = "torrust-tracker" -purpose = "configuration" -schema_version = "2.0.0" - -[logging] -#threshold = "trace" -threshold = "info" - -[core] -listed = false -private = false - -[core.tracker_policy] -persistent_torrent_completed_stat = true - -[core.announce_policy] -interval = 300 -interval_min = 300 - -[core.net] -on_reverse_proxy = true - -[core.database] -driver = "sqlite3" -path = "/var/lib/torrust/tracker/database/sqlite3.db" - -# UDP Tracker Configuration -# Two UDP ports are configured for different purposes: - -# Port 6868: Internal testing UDP tracker -# - Not listed on public tracker lists (like newtrackon.org) -# - Used for internal testing and development -# - Provides alternative endpoint when port 6969 is under heavy load -# - Guarantees developers can make requests without timeouts -[[udp_trackers]] -bind_address = "0.0.0.0:6868" - -# Port 6969: Official public UDP tracker -# - Primary UDP tracker port listed on public tracker lists -# - Always under heavy usage in production -# - Standard BitTorrent UDP announce endpoint -[[udp_trackers]] -bind_address = "0.0.0.0:6969" - -# HTTP Tracker Configuration -# Port 7070: Internal HTTP tracker (HTTP-only, no HTTPS) -# - Not directly accessible from internet (internal/private network only) -# - Accessed through Nginx reverse proxy which provides HTTPS termination -# - Used for HTTP-based tracker announces -[[http_trackers]] -bind_address = "0.0.0.0:7070" - -# API and Metrics Configuration -# Port 1212: Tracker API and metrics endpoint -# - Used internally (private network) between Nginx proxy and tracker service -# - Exposed publicly through Nginx proxy at https://tracker.torrust-demo.com/api/... -# - Main endpoints: /api/v1/stats, /api/v1/metrics, /api/v1/torrents -# - Used by Prometheus for monitoring and Grafana dashboards -# - See application/docs/firewall-requirements.md for complete port documentation -[http_api] -bind_address = "0.0.0.0:1212" diff --git a/docs/README.md b/docs/README.md index 70cfbc3..4b53bac 100644 --- a/docs/README.md +++ b/docs/README.md @@ -37,6 +37,16 @@ This directory currently contains cross-cutting documentation: - [Phase 1: MySQL Migration](issues/12-use-mysql-instead-of-sqlite-by-default.md) - Detailed implementation plan for database migration from SQLite to MySQL +### 🔧 [`refactoring/`](refactoring/) (Refactoring Documentation) + +**Major refactoring initiatives and changes** - Documentation of significant +codebase changes, architectural improvements, and migration summaries. + +**Current Refactoring Documentation:** + +- [Integration Test Refactor Summary](refactoring/integration-test-refactor-summary.md) - + Summary of changes made to align integration testing with 12-factor configuration principles + ### Future Categories The following directories can be created as needed: diff --git a/docs/adr/004-configuration-approach-files-vs-environment-variables.md b/docs/adr/004-configuration-approach-files-vs-environment-variables.md new file mode 100644 index 0000000..74fab53 --- /dev/null +++ b/docs/adr/004-configuration-approach-files-vs-environment-variables.md @@ -0,0 +1,300 @@ +# ADR-004: Configuration Approach - Files vs Environment Variables + +## Status + +Accepted + +## Context + +As part of the 12-Factor App refactoring (Phase 1), we need to decide how to handle +application configuration for the Torrust Tracker Demo. There are two primary approaches: + +1. **File-based configuration**: Store configuration in template-generated files + (e.g., `tracker.toml`) +2. **Environment variable configuration**: Use environment variables for all + configuration values + +Both approaches have trade-offs in terms of maintainability, deployment complexity, +and operational flexibility. + +## Decision + +We will use a **hybrid approach** that prioritizes file-based configuration with +selective use of environment variables: + +### File-based Configuration (Primary) + +- Application behavior settings +- Port configurations +- Policy settings (timeouts, intervals, etc.) +- Feature flags (listed, private, stats enabled) +- Non-sensitive defaults + +### Environment Variables (Secondary - Secrets & Environment-Specific Only) + +- Database credentials and connection strings +- API tokens and authentication secrets +- SSL certificates and keys +- External IP addresses +- Domain names +- Infrastructure-specific settings + +## Rationale + +### Why File-based Configuration is Better for This Project + +#### 1. Project Scope and Purpose + +This repository is designed as an **automated installer/deployment tool** rather +than a cloud-native, horizontally scalable application. The primary goal is to: + +- Deploy a single Torrust Tracker instance +- Provide infrastructure automation +- Enable easy manual maintenance post-deployment + +#### 2. Operational Advantages + +- **Easier maintenance**: Administrators can modify `tracker.toml` and restart the + service without recreating containers +- **Direct access**: System administrators can edit configuration files directly + on the server +- **Faster iteration**: Configuration changes don't require container recreation, + only service restart +- **Simpler troubleshooting**: All non-secret configuration is visible in + human-readable files + +#### 3. Deployment Simplicity + +- **Fewer environment variables**: Reduces complexity in Docker Compose and + deployment scripts +- **Cleaner compose.yaml**: Environment sections remain minimal and focused on secrets +- **Reduced coupling**: Application configuration is decoupled from container + orchestration + +#### 4. Administrative Experience + +- **Familiar patterns**: System administrators expect to find configuration in files + like `/etc/torrust/tracker/tracker.toml` +- **Documentation alignment**: Configuration files can be documented and versioned + alongside code +- **Backup friendly**: Configuration files are easier to backup and restore as part + of standard system administration + +### When Environment Variables Are Appropriate + +#### 1. Secrets Management + +```bash +# Database credentials +MYSQL_ROOT_PASSWORD=secret_password +MYSQL_PASSWORD=user_password + +# API authentication +TRACKER_ADMIN_TOKEN=admin_token_123 + +# Grafana admin credentials +GF_SECURITY_ADMIN_PASSWORD=secure_password +``` + +#### 2. Environment-Specific Values + +```bash +# Network configuration that varies by deployment +EXTERNAL_IP=192.168.1.100 +DOMAIN_NAME=tracker.example.com + +# Infrastructure differences +ON_REVERSE_PROXY=true +LOG_LEVEL=info +``` + +#### 3. Container Runtime Configuration + +```bash +# Docker-specific settings +USER_ID=1000 +MYSQL_DATABASE=torrust_tracker +``` + +## Implementation Examples + +### **File-based Configuration** (`tracker.toml`) + +```toml +[metadata] +app = "torrust-tracker" +purpose = "configuration" +schema_version = "2.0.0" + +[logging] +threshold = "debug" # Environment-specific value + +[core] +inactive_peer_cleanup_interval = 600 +listed = false +private = false +tracker_usage_statistics = true + +[core.announce_policy] +interval = 120 +interval_min = 120 + +[core.database] +driver = "mysql" +# URL set via environment variable at runtime +url = "" + +[core.net] +external_ip = "0.0.0.0" +on_reverse_proxy = false # Environment-specific value + +[core.tracker_policy] +max_peer_timeout = 900 +persistent_torrent_completed_stat = false +remove_peerless_torrents = true + +# Admin token set via environment variable at runtime +[http_api.access_tokens] +# admin = "" + +[[udp_trackers]] +bind_address = "0.0.0.0:6868" + +[[udp_trackers]] +bind_address = "0.0.0.0:6969" + +[[http_trackers]] +bind_address = "0.0.0.0:7070" +``` + +### **Environment Variables** (`.env`) + +```bash +# Secrets only +MYSQL_ROOT_PASSWORD=secret_root_password +MYSQL_PASSWORD=secret_user_password +TRACKER_ADMIN_TOKEN=admin_secret_token + +# Docker runtime +USER_ID=1000 +MYSQL_DATABASE=torrust_tracker +MYSQL_USER=torrust + +# Grafana admin +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=admin_password +``` + +## Benefits + +### **For System Administrators** + +- Configuration changes are made in familiar file locations +- No need to understand Docker environment variable injection +- Standard Unix administration patterns apply +- Easy to backup and restore configurations + +### **For Developers** + +- Cleaner separation of concerns +- Fewer template variables to manage +- Simpler Docker Compose files +- Easier testing and validation + +### **For Operations** + +- Faster configuration updates (restart vs recreate) +- Better debugging capabilities +- Standard logging and monitoring patterns +- Familiar deployment patterns + +## Trade-offs + +### **What We Give Up** + +- **Cloud-native patterns**: Less suitable for Kubernetes or other orchestrators +- **Dynamic reconfiguration**: Cannot change configuration without file access +- **Secret injection**: Some secrets still appear in config files (but only connection + strings, not raw credentials) + +### **What We Gain** + +- **Operational simplicity**: Standard system administration patterns +- **Deployment reliability**: Fewer moving parts in the deployment process +- **Administrative control**: Direct access to configuration without container knowledge +- **Performance**: No environment variable processing overhead + +## Exceptions + +### **Prometheus Configuration** + +Prometheus does not support runtime environment variable substitution in its configuration +files. Therefore, API tokens for scraping Torrust Tracker metrics must be embedded in +the `prometheus.yml` file during template generation: + +```yaml +scrape_configs: + - job_name: "torrust-tracker-stats" + static_configs: + - targets: ["tracker:1212"] + metrics_path: "/api/v1/stats" + params: + token: ["admin_token_123"] # Token embedded at generation time + format: ["prometheus"] +``` + +This is an acceptable exception because: + +- Prometheus config files are not typically edited by administrators +- The token is only for internal monitoring within the Docker network +- The configuration is regenerated when environment changes + +## Consequences + +### **Configuration Management Process** + +1. **Environment-specific values**: Set in `infrastructure/config/environments/{environment}.env` +2. **Template processing**: Generate config files using `configure-env.sh` +3. **Validation**: Validate generated configurations using `validate-config.sh` +4. **Deployment**: Deploy with file-based configurations + +### **Maintenance Workflow** + +1. **For secrets**: Update `.env` file and restart containers +2. **For behavior**: Edit `tracker.toml` and restart tracker service +3. **For infrastructure**: Update templates and regenerate configurations + +### **Future Considerations** + +- If the project evolves toward cloud-native deployment, this decision can be revisited +- Environment variable overrides can be added later without breaking existing deployments +- The hybrid approach provides flexibility for future architectural changes + +## Alternatives Considered + +### **Full Environment Variable Approach** + +- **Pros**: Cloud-native, 12-factor compliant, dynamic configuration +- **Cons**: Complex Docker Compose, harder maintenance, container recreation required + +### **Full File-based Approach** + +- **Pros**: Maximum simplicity, traditional Unix patterns +- **Cons**: Secrets in files, harder automation, less secure + +### **External Configuration Service** + +- **Pros**: Centralized management, audit trails, dynamic updates +- **Cons**: Additional infrastructure, complexity overkill for single-instance deployment + +## Related Decisions + +- [ADR-002: Docker for All Services](002-docker-for-all-services.md) - Establishes container-based deployment +- [ADR-003: Use MySQL Over MariaDB](003-use-mysql-over-mariadb.md) - Database choice + affects connection configuration + +## References + +- [The Twelve-Factor App](https://12factor.net/config) +- [Torrust Tracker Configuration Documentation](https://docs.rs/torrust-tracker) +- [Docker Compose Environment Variables](https://docs.docker.com/compose/environment-variables/) diff --git a/docs/guides/integration-testing-guide.md b/docs/guides/integration-testing-guide.md index 1d1696b..4d90ded 100644 --- a/docs/guides/integration-testing-guide.md +++ b/docs/guides/integration-testing-guide.md @@ -41,24 +41,34 @@ For example: cd /home/yourname/Documents/git/committer/me/github/torrust/torrust-tracker-demo ``` -**⚠️ Important**: All commands in this guide assume you are running from the -project root directory. If you see "command not found" errors, verify you are +**⚠️ CRITICAL**: All commands in this guide assume you are running from the +**project root directory**. If you see "command not found" errors, verify you are in the correct directory. +**Working Directory Indicator**: Commands will be shown with this format: + +```bash +# [PROJECT_ROOT] - Run from project root directory +make command + +# [TERRAFORM_DIR] - Run from infrastructure/terraform directory +cd infrastructure/terraform && tofu command +``` + ### 1.2 Check for Existing Resources ⚠️ **WARNING**: The following commands will destroy existing VMs and remove data. Only proceed if you want to start with a completely clean environment. ```bash -# Check for existing VMs that might conflict +# [PROJECT_ROOT] Check for existing VMs that might conflict virsh list --all | grep torrust-tracker-demo || echo "✅ No conflicting VM found" -# Check for existing libvirt volumes +# [PROJECT_ROOT] Check for existing libvirt volumes virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo || \ echo "✅ No conflicting volumes found" -# Check for existing OpenTofu state +# [PROJECT_ROOT] Check for existing OpenTofu state ls -la infrastructure/terraform/terraform.tfstate* 2>/dev/null || \ echo "✅ No existing state files" ``` @@ -71,7 +81,7 @@ ls -la infrastructure/terraform/terraform.tfstate* 2>/dev/null || \ and state files. ```bash -# Complete cleanup - removes VMs, state files, and fixes permissions +# [PROJECT_ROOT] Complete cleanup - removes VMs, state files, and fixes permissions time make clean-and-fix ``` @@ -88,18 +98,18 @@ time make clean-and-fix ### 1.4 Verify Clean State ```bash -# Verify no conflicting resources remain +# [PROJECT_ROOT] Verify no conflicting resources remain echo "=== Verifying Clean State ===" -# Check VMs +# [PROJECT_ROOT] Check VMs virsh list --all | grep torrust-tracker-demo && \ echo '❌ VM still exists!' || echo '✅ No VM conflicts' -# Check volumes in user-default pool +# [PROJECT_ROOT] Check volumes in user-default pool virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo && \ echo '❌ Volumes still exist!' || echo '✅ No volume conflicts' -# Check OpenTofu state +# [PROJECT_ROOT] Check OpenTofu state ls infrastructure/terraform/terraform.tfstate* 2>/dev/null && \ echo '❌ State files still exist!' || echo '✅ No state file conflicts' ``` @@ -108,25 +118,51 @@ ls infrastructure/terraform/terraform.tfstate* 2>/dev/null && \ ### 1.4.1 Manual Cleanup (if needed) -If the verification step shows "❌ Volumes still exist!" then manually clean them: +⚠️ **CRITICAL**: This step is often **required** because `make clean-and-fix` +sometimes misses libvirt volumes, causing deployment failures with errors like: -```bash -# List conflicting volumes -virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo +- `storage volume 'torrust-tracker-demo-cloudinit.iso' exists already` +- `storage volume 'torrust-tracker-demo.qcow2' exists already` -# Delete each volume manually -virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default -virsh vol-delete torrust-tracker-demo.qcow2 user-default +If the verification step shows "❌ Volumes still exist!" **OR** if you encounter +volume conflicts during deployment, perform this manual cleanup: -# Verify cleanup -virsh vol-list user-default 2>/dev/null | grep torrust-tracker-demo && \ +```bash +# [PROJECT_ROOT] List all volumes to see conflicts +echo "=== Current volumes in user-default pool ===" +virsh vol-list user-default + +# [PROJECT_ROOT] List only conflicting volumes +virsh vol-list user-default | grep torrust-tracker-demo || echo "No torrust volumes found" + +# [PROJECT_ROOT] Delete ALL torrust-tracker-demo volumes +# Common volumes that need cleanup: +virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default 2>/dev/null || \ + echo "cloudinit.iso not found" +virsh vol-delete torrust-tracker-demo.qcow2 user-default 2>/dev/null || \ + echo "VM disk not found" + +# [PROJECT_ROOT] Verify complete cleanup +echo "=== Verifying volume cleanup ===" +virsh vol-list user-default | grep torrust-tracker-demo && \ echo '❌ Volumes still exist!' || echo '✅ No volume conflicts' ``` **Expected Output**: Should show "✅ No volume conflicts" after manual cleanup. -**What This Fixes**: Removes leftover volumes that `make clean-and-fix` -sometimes misses. +**What This Fixes**: + +- Removes leftover volumes that `make clean-and-fix` consistently misses +- Prevents "volume already exists" errors during deployment +- Ensures a truly clean state for fresh deployments + +**Why This Happens**: The `make clean-and-fix` command primarily handles +OpenTofu state and VM definitions, but libvirt volumes can persist independently. +This is especially common when: + +- Previous deployments were interrupted +- Manual VM deletion was performed +- OpenTofu state was corrupted or manually removed ### 1.5 Set Up SSH Key Configuration @@ -136,7 +172,7 @@ caused SSH connection failures! #### For Default SSH Keys (id_rsa) ```bash -# Set up SSH key configuration for VM access +# [PROJECT_ROOT] Set up SSH key configuration for VM access time make setup-ssh-key ``` @@ -148,10 +184,10 @@ time make setup-ssh-key 1. **Configure the public key in terraform**: ```bash -# Get your non-default public key +# [PROJECT_ROOT] Get your non-default public key cat ~/.ssh/torrust_rsa.pub -# Manually edit the terraform configuration +# [PROJECT_ROOT] Manually edit the terraform configuration vim infrastructure/terraform/local.tfvars # Add your public key content: @@ -161,12 +197,12 @@ ssh_public_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC... your-key-here" 1. **Configure SSH client to use the correct private key**: ```bash -# Option 1: Create/edit SSH config +# [PROJECT_ROOT] Option 1: Create/edit SSH config echo "Host 192.168.122.* IdentityFile ~/.ssh/torrust_rsa IdentitiesOnly yes" >> ~/.ssh/config -# Option 2: Always specify key explicitly when connecting +# [PROJECT_ROOT] Option 2: Always specify key explicitly when connecting # ssh -i ~/.ssh/torrust_rsa torrust@VM_IP ``` @@ -195,7 +231,7 @@ configuration. **Verify Configuration**: ```bash -# Ensure the file contains your actual public key (not placeholder) +# [PROJECT_ROOT] Ensure the file contains your actual public key (not placeholder) cat infrastructure/terraform/local.tfvars | grep ssh_public_key # Should show your full public key, not "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY" @@ -204,7 +240,7 @@ cat infrastructure/terraform/local.tfvars | grep ssh_public_key ### 1.6 Initialize OpenTofu ```bash -# Initialize OpenTofu providers +# [PROJECT_ROOT] Initialize OpenTofu providers time make init ``` @@ -219,12 +255,129 @@ time make init --- +## Step 1.7: Generate Configuration Files (New Workflow) + +⚠️ **IMPORTANT**: Recent changes introduced a new configuration management system +that generates final configuration files from templates and environment values. + +### 1.7.1 Generate Local Environment Configuration + +```bash +# [PROJECT_ROOT] Generate local environment configuration +time make configure-local +``` + +**Expected Output**: + +- Configuration files generated from templates +- Environment values applied to templates +- **Time**: ~2 seconds + +**What This Creates**: Final configuration files including: + +- `application/.env` - Docker Compose environment file +- `application/storage/tracker/etc/tracker.toml` - Tracker configuration +- `application/storage/prometheus/etc/prometheus.yml` - Prometheus configuration +- `infrastructure/cloud-init/` - VM provisioning files + +These files are generated from templates in `infrastructure/config/templates/` using +values from `infrastructure/config/environments/local.env`. + +### 1.7.2 Validate Generated Configuration + +```bash +# [PROJECT_ROOT] Validate generated configuration files +time make validate-config +``` + +**Expected Output**: + +- All configuration files pass validation +- YAML syntax checks pass +- Template rendering successful +- **Time**: ~3 seconds + +**What This Verifies**: Generated configuration files are syntactically correct +and ready for deployment. + +## Step 1.8: Clean Application Storage (Optional but Recommended) + +⚠️ **DESTRUCTIVE OPERATION WARNING**: This step permanently deletes all +application data including: + +- **Database data** (MySQL databases, user accounts, torrents) +- **SSL certificates** (Let's Encrypt certificates, private keys) +- **Configuration files** (tracker.toml, prometheus.yml, etc.) +- **Application logs** and persistent data + +**When to use this step**: + +- ✅ Starting completely fresh integration test +- ✅ Previous test left corrupted data +- ✅ Database schema changes require clean slate +- ✅ SSL certificate issues need reset +- ❌ **NEVER** on production systems + +### 1.8.1 Remove Application Storage + +```bash +# [PROJECT_ROOT] Remove all application storage (DESTRUCTIVE!) +echo "=== WARNING: About to delete all application data ===" +echo "This will permanently remove:" +echo " - Database data (MySQL)" +echo " - SSL certificates" +echo " - Configuration files" +echo " - Application logs" +echo "" +read -p "Are you sure you want to continue? (type 'yes' to confirm): " confirm + +if [ "$confirm" = "yes" ]; then + echo "Removing application storage..." + rm -rf application/storage/ + echo "✅ Application storage deleted" +else + echo "❌ Operation cancelled" +fi +``` + +**Alternative non-interactive approach**: + +```bash +# [PROJECT_ROOT] Force remove without confirmation (use carefully!) +rm -rf application/storage/ +echo "✅ Application storage deleted" +``` + +### 1.8.2 Verify Storage Cleanup + +```bash +# [PROJECT_ROOT] Verify storage folder is gone +ls -la application/storage/ 2>/dev/null && \ + echo '❌ Storage folder still exists!' || echo '✅ Storage folder removed' + +# [PROJECT_ROOT] Verify Docker volumes are clean (if Docker is running) +docker volume ls | grep torrust-tracker-demo && \ + echo '❌ Docker volumes still exist!' || echo '✅ No Docker volumes remain' +``` + +**Expected Output**: Both checks should show "✅" (clean state). + +**What This Achieves**: Ensures a completely clean application state for testing, +preventing issues caused by: + +- Corrupted database data from previous tests +- Expired or invalid SSL certificates +- Configuration conflicts from previous deployments +- Stale application logs affecting debugging + +--- + ## Step 2: Deploy Fresh Virtual Machine ### 2.1 Plan the Deployment ```bash -# Review what will be created +# [PROJECT_ROOT] Review what will be created time make plan ``` @@ -242,7 +395,7 @@ time make plan ### 2.2 Deploy the VM ```bash -# Deploy VM with full configuration (this takes time!) +# [PROJECT_ROOT] Deploy VM with full configuration (this takes time!) time make apply ``` @@ -275,7 +428,7 @@ time make apply ### 2.3 Verify VM is Running ```bash -# Check VM status +# [PROJECT_ROOT] Check VM status virsh list --all ``` @@ -287,6 +440,26 @@ virsh list --all 1 torrust-tracker-demo running ``` +### 2.4 Refresh OpenTofu State (Important!) + +⚠️ **CRITICAL STEP**: After VM deployment, OpenTofu's state may not immediately +reflect the VM's IP address assigned by DHCP. This is a known issue where the +libvirt provider state becomes stale after cloud-init completes. + +```bash +# [PROJECT_ROOT] Refresh OpenTofu state to detect IP assignment +time make refresh-state +``` + +**Expected Output**: + +- OpenTofu state refreshed successfully +- VM IP address properly detected +- **Time**: ~3 seconds + +**What This Fixes**: Ensures OpenTofu knows the VM's actual IP address, preventing +"No IP assigned yet" issues in subsequent commands. + --- ## Step 3: Wait for Cloud-Init Completion (Critical!) @@ -300,7 +473,7 @@ has been improved to allow SSH access throughout the process. ### 3.1 Get VM IP Address ```bash -# Get IP from libvirt (more reliable during cloud-init) +# [PROJECT_ROOT] Get IP from libvirt (more reliable during cloud-init) VM_IP=$(virsh domifaddr torrust-tracker-demo | grep ipv4 | \ awk '{print $4}' | cut -d'/' -f1) echo "VM IP: $VM_IP" @@ -464,7 +637,7 @@ for better compatibility with modern compose.yaml files. ### 4.1 Test VM Access ```bash -# Test basic VM connectivity +# [PROJECT_ROOT] Test basic VM connectivity time ./infrastructure/tests/test-integration.sh access ``` @@ -477,7 +650,7 @@ time ./infrastructure/tests/test-integration.sh access ### 4.2 Test Docker Installation ```bash -# Test Docker functionality +# [PROJECT_ROOT] Test Docker functionality time ./infrastructure/tests/test-integration.sh docker ``` @@ -495,7 +668,7 @@ available and uses the appropriate command. ### 4.3 Setup Torrust Tracker Demo ```bash -# Clone and setup the Torrust Tracker repository +# [PROJECT_ROOT] Clone and setup the Torrust Tracker repository time ./infrastructure/tests/test-integration.sh setup ``` @@ -511,7 +684,7 @@ configuration. ### 4.4 Start Torrust Tracker Services ```bash -# Pull images and start all services +# [PROJECT_ROOT] Pull images and start all services time ./infrastructure/tests/test-integration.sh start ``` @@ -532,21 +705,25 @@ time ./infrastructure/tests/test-integration.sh start ### 4.5 Test Service Endpoints ```bash -# Test all API endpoints +# [PROJECT_ROOT] Test all API endpoints time ./infrastructure/tests/test-integration.sh endpoints ``` **Expected Output**: -- HTTP API responding on port 7070 -- Metrics endpoint responding on port 1212 +- HTTP API responding through nginx proxy on port 80 +- Health check API accessible without authentication +- Stats API requires authentication token - UDP ports listening (6868, 6969) - **Time**: ~15 seconds +**Note**: The integration test script may fail on endpoint testing due to authentication +requirements. For manual testing, see Step 5.2 for the correct endpoint testing procedures. + ### 4.6 Test Monitoring Services ```bash -# Test Prometheus and Grafana +# [PROJECT_ROOT] Test Prometheus and Grafana time ./infrastructure/tests/test-integration.sh monitoring ``` @@ -559,7 +736,7 @@ time ./infrastructure/tests/test-integration.sh monitoring ### 4.7 Run Complete Integration Test Suite ```bash -# Run all tests in sequence +# [PROJECT_ROOT] Run all tests in sequence time ./infrastructure/tests/test-integration.sh full-test ``` @@ -580,51 +757,166 @@ Tracker deployment. ### 5.1 SSH Into VM and Explore ```bash -# Connect to VM for manual inspection +# [PROJECT_ROOT] Connect to VM for manual inspection make ssh ``` **Inside the VM, you can run**: ```bash -# Check cloud-init logs +# [VM_REMOTE] Check cloud-init logs sudo cat /var/log/cloud-init-output.log | tail -20 -# Check running services +# [VM_REMOTE] Check running services docker compose ps -# Check service logs +# [VM_REMOTE] Check service logs docker compose logs --tail=20 -# Check system status +# [VM_REMOTE] Check system status sudo systemctl status docker sudo ufw status verbose -# Check Torrust Tracker logs +# [VM_REMOTE] Check Torrust Tracker logs docker compose logs torrust-tracker --tail=20 -# Exit the VM +# [VM_REMOTE] Exit the VM exit ``` ### 5.2 Test External Access (from Host) +**⚠️ CRITICAL NETWORK ARCHITECTURE UNDERSTANDING:** + +The deployment uses **double virtualization**: + +1. **VM Level**: VM has IP (e.g., `192.168.122.253`) with specific ports exposed +2. **Docker Network Level**: Inside VM, Docker Compose creates internal network +3. **Nginx Proxy**: Routes external traffic from port 80 to internal services + +**Port Access Rules**: + +- ✅ **Port 80**: Nginx proxy (accessible from host) → routes to internal services +- ✅ **UDP ports 6868, 6969**: Direct tracker access (accessible from host) +- ❌ **Internal ports** (1212, 7070, 3000, 9090): Only accessible within Docker network + +#### 5.2.1 Get VM IP and Test API Endpoints + ```bash -# Get VM IP for external testing +# [PROJECT_ROOT] Get VM IP for external testing VM_IP=$(cd infrastructure/terraform && tofu output -raw vm_ip) echo "VM IP: $VM_IP" -# Test HTTP API from host -curl -s http://$VM_IP:7070/api/v1/stats | jq . || echo "API test failed" +# [PROJECT_ROOT] Test health check API (no authentication required) +curl -s http://$VM_IP/api/health_check | jq . -# Test metrics endpoint from host -curl -s http://$VM_IP:1212/metrics | head -10 +# [PROJECT_ROOT] Test stats API (requires authentication token) +# Note: Get the token from the .env file in the VM +TOKEN="local-dev-admin-token-12345" +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq . ``` **Expected Output**: -- JSON response from stats API -- Prometheus metrics data +- **Health check**: + + ```json + { + "status": "Ok" + } + ``` + +- **Stats API** (with pretty JSON formatting): + + ```json + { + "torrents": 0, + "seeders": 0, + "completed": 0, + "leechers": 0, + "tcp4_connections_handled": 0, + "tcp4_announces_handled": 0, + "tcp4_scrapes_handled": 0, + "tcp6_connections_handled": 0, + "tcp6_announces_handled": 0, + "tcp6_scrapes_handled": 0, + "udp_requests_aborted": 0, + "udp_requests_banned": 0, + "udp_banned_ips_total": 0, + "udp_avg_connect_processing_time_ns": 0, + "udp_avg_announce_processing_time_ns": 0, + "udp_avg_scrape_processing_time_ns": 0, + "udp4_requests": 0, + "udp4_connections_handled": 0, + "udp4_announces_handled": 0, + "udp4_scrapes_handled": 0, + "udp4_responses": 0, + "udp4_errors_handled": 0, + "udp6_requests": 0, + "udp6_connections_handled": 0, + "udp6_announces_handled": 0, + "udp6_scrapes_handled": 0, + "udp6_responses": 0, + "udp6_errors_handled": 0 + } + ``` + +#### 5.2.2 Test Monitoring Services + +```bash +# [PROJECT_ROOT] Test Prometheus (accessible through nginx proxy) +curl -s http://$VM_IP/prometheus/api/v1/targets | jq . + +# [PROJECT_ROOT] Test Grafana web interface +curl -s -I http://$VM_IP:3100/ | head -5 + +# [PROJECT_ROOT] Alternative: Check if services are responding +curl -s -o /dev/null -w "%{http_code}\n" http://$VM_IP/prometheus/ +curl -s -o /dev/null -w "%{http_code}\n" http://$VM_IP:3100/ +``` + +#### 5.2.3 Common Endpoint Testing Mistakes + +❌ **Wrong - Trying to access internal ports directly**: + +```bash +# These will fail - internal ports not exposed outside Docker network +curl http://$VM_IP:1212/api/health_check # Port 1212 not accessible +curl http://$VM_IP:7070/api/v1/stats # Port 7070 not accessible +curl http://$VM_IP:9090/ # Port 9090 not accessible +``` + +✅ **Correct - Using nginx proxy on port 80**: + +```bash +# All API access goes through nginx proxy on port 80 +curl http://$VM_IP/api/health_check # Health check +curl "http://$VM_IP/api/v1/stats?token=TOKEN" # Stats with auth +curl http://$VM_IP/prometheus/ # Prometheus UI +``` + +#### 5.2.4 Getting the Authentication Token + +```bash +# [PROJECT_ROOT] Get the authentication token from the VM +ssh torrust@$VM_IP \ + "grep TRACKER_ADMIN_TOKEN /home/torrust/github/torrust/torrust-tracker-demo/application/.env" + +# Should output: TRACKER_ADMIN_TOKEN=local-dev-admin-token-12345 +``` + +#### 5.2.5 Advanced Testing with jq + +```bash +# [PROJECT_ROOT] Extract specific metrics with jq +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq '.torrents, .seeders, .leechers' + +# [PROJECT_ROOT] Check if tracker is healthy +curl -s http://$VM_IP/api/health_check | jq -r '.status' + +# [PROJECT_ROOT] Pretty print with color (if jq supports it) +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq --color-output . +``` --- @@ -677,19 +969,281 @@ ssh torrust@$VM_IP "docker stats --no-stream" --- -## Step 7: Cleanup +## Step 7: External Smoke Testing with Official Client Tools + +This step validates the Torrust Tracker deployment using the official Torrust +Tracker Client tools from an external perspective, simulating real BitTorrent +client interactions. + +### 7.1 Setup Torrust Tracker Client Tools + +The smoke tests require the official `torrust-tracker-client` tools. These are +**not published on crates.io** and must be compiled from the tracker repository source. + +#### 7.1.1 Check for Existing Torrust Tracker Repository + +**Priority**: Use existing local installation to avoid long compilation times. + +```bash +# [PROJECT_ROOT] Check for torrust-tracker in parent directory (preferred) +if [ -d "../torrust-tracker" ]; then + echo "✅ Found torrust-tracker in parent directory" + TRACKER_DIR="../torrust-tracker" +elif [ -d "/home/$(whoami)/Documents/git/committer/me/github/torrust/torrust-tracker" ]; then + echo "✅ Found torrust-tracker in standard location" + TRACKER_DIR="/home/$(whoami)/Documents/git/committer/me/github/torrust/torrust-tracker" +else + echo "❌ torrust-tracker repository not found" + echo "Please clone it first or specify the path manually" + TRACKER_DIR="" +fi + +echo "Using tracker directory: $TRACKER_DIR" +``` + +#### 7.1.2 Verify Client Tools Availability + +```bash +# [PROJECT_ROOT] Check if client tools are available +if [ -n "$TRACKER_DIR" ] && [ -d "$TRACKER_DIR" ]; then + cd "$TRACKER_DIR" + + # Verify we're in the right directory + ls Cargo.toml >/dev/null 2>&1 || (echo "❌ Not a valid torrust-tracker directory" && exit 1) + + # Check available client binaries + echo "=== Available client tools ===" + ls -la src/bin/ | grep -E "(client|checker)" || echo "No client tools found" + + # Test that client tools can be run (shows help/usage) + echo "=== Testing client tool availability ===" + cargo run -p torrust-tracker-client --bin udp_tracker_client -- --help >/dev/null 2>&1 && \ + echo "✅ udp_tracker_client available" || echo "❌ udp_tracker_client not available" + + cargo run -p torrust-tracker-client --bin http_tracker_client -- --help >/dev/null 2>&1 && \ + echo "✅ http_tracker_client available" || echo "❌ http_tracker_client not available" + + cargo run -p torrust-tracker-client --bin tracker_checker -- --help >/dev/null 2>&1 && \ + echo "✅ tracker_checker available" || echo "❌ tracker_checker not available" + + # Return to original directory + cd - >/dev/null +else + echo "❌ Cannot verify client tools - tracker directory not found" + echo "Please clone torrust-tracker repository:" + echo "git clone https://github.com/torrust/torrust-tracker" +fi +``` + +#### 7.1.3 Alternative: Clone if Not Available -### 7.1 Stop Services (if needed) +```bash +# [PROJECT_ROOT] Clone torrust-tracker if not found locally +if [ -z "$TRACKER_DIR" ]; then + echo "=== Cloning torrust-tracker repository ===" + git clone https://github.com/torrust/torrust-tracker + TRACKER_DIR="./torrust-tracker" + echo "✅ Repository cloned to $TRACKER_DIR" + echo "⚠️ Note: First compilation will take significant time" +fi +``` + +### 7.2 Run UDP Tracker Smoke Tests ```bash -# Stop all services cleanly +# [PROJECT_ROOT] Get VM IP for testing +VM_IP=$(cd infrastructure/terraform && tofu output -raw vm_ip) +echo "Testing against VM: $VM_IP" + +# [PROJECT_ROOT] Test UDP tracker on port 6868 +echo "=== Testing UDP Tracker (6868) ===" +cd "$TRACKER_DIR" +cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$VM_IP:6868/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq + +# [PROJECT_ROOT] Test UDP tracker on port 6969 +echo "=== Testing UDP Tracker (6969) ===" +cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$VM_IP:6969/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq + +cd - >/dev/null +``` + +**Expected Output** (for both UDP trackers): + +```json +{ + "transaction_id": 2425393296, + "announce_response": { + "interval": 120, + "leechers": 0, + "seeders": 0, + "peers": [] + } +} +``` + +### 7.3 Run HTTP Tracker Smoke Tests + +#### 7.3.1 Test Through Nginx Proxy (Expected to Work) + +```bash +# [PROJECT_ROOT] Test HTTP tracker through nginx proxy on port 80 +echo "=== Testing HTTP Tracker through Nginx Proxy (80) ===" +cd "$TRACKER_DIR" +cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$VM_IP:80 \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq + +cd - >/dev/null +``` + +**Expected Output**: + +```json +{ + "complete": 1, + "incomplete": 0, + "interval": 300, + "min interval": 300, + "peers": [ + { + "ip": "192.168.122.1", + "peer id": [ + 45, 113, 66, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 49 + ], + "port": 47401 + } + ] +} +``` + +#### 7.3.2 Test Direct Access (Expected to Fail) + +```bash +# [PROJECT_ROOT] Test HTTP tracker directly on port 7070 (expected to fail) +echo "=== Testing HTTP Tracker Direct (7070) - Expected to fail ===" +cd "$TRACKER_DIR" +cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$VM_IP:7070 \ + 9c38422213e30bff212b30c360d26f9a02136422 | jq || \ + echo "✅ Expected failure - tracker correctly configured for reverse proxy mode" + +cd - >/dev/null +``` + +**Expected Behavior**: Should fail with an error about missing `X-Forwarded-For` +header, confirming the tracker is correctly configured for reverse proxy mode. + +### 7.4 Run Comprehensive Tracker Checker + +```bash +# [PROJECT_ROOT] Run comprehensive checker +echo "=== Running Comprehensive Tracker Checker ===" +cd "$TRACKER_DIR" + +# Configure tracker checker for the test environment +export TORRUST_CHECKER_CONFIG='{ + "udp_trackers": ["udp://'$VM_IP':6969/announce"], + "http_trackers": ["http://'$VM_IP':80"], + "health_checks": ["http://'$VM_IP'/api/health_check"] +}' + +cargo run -p torrust-tracker-client --bin tracker_checker + +cd - >/dev/null +``` + +**Expected Output**: Status report for all configured endpoints showing +successful connections and responses. + +### 7.5 Smoke Test Results Interpretation + +#### ✅ Success Indicators + +All smoke tests should show: + +- **UDP Trackers**: JSON responses with interval/peer data and transaction IDs +- **HTTP Tracker** (via proxy): JSON response with tracker statistics and peer information +- **Health Check**: Successful connection through comprehensive checker +- **Response Times**: Sub-second response times for all endpoints + +#### ❌ Common Issues and Solutions + +**Compilation Errors**: + +```bash +# If Rust compilation fails, ensure Rust is installed +cargo --version || curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + +# Update Rust if compilation issues persist +rustup update +``` + +**Connection Refused**: + +```bash +# Verify VM is running and services are up +ssh torrust@$VM_IP \ + 'cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose ps' + +# Check if tracker ports are accessible +nc -zv $VM_IP 6868 # UDP tracker port 1 +nc -zv $VM_IP 6969 # UDP tracker port 2 +nc -zv $VM_IP 80 # HTTP proxy port +``` + +**UDP Connection Issues**: + +```bash +# Check firewall rules on VM +ssh torrust@$VM_IP "sudo ufw status | grep -E '(6868|6969)'" + +# Verify UDP ports are bound +ssh torrust@$VM_IP "sudo netstat -ulnp | grep -E '(6868|6969)'" +``` + +### 7.6 Performance Validation + +```bash +# [PROJECT_ROOT] Measure response times for performance validation +echo "=== Performance Testing ===" + +# Time UDP responses +time (cd "$TRACKER_DIR" && cargo run -p torrust-tracker-client --bin udp_tracker_client announce \ + udp://$VM_IP:6969/announce \ + 9c38422213e30bff212b30c360d26f9a02136422 >/dev/null) + +# Time HTTP responses +time (cd "$TRACKER_DIR" && cargo run -p torrust-tracker-client --bin http_tracker_client announce \ + http://$VM_IP:80 \ + 9c38422213e30bff212b30c360d26f9a02136422 >/dev/null) +``` + +**Expected Performance**: + +- **UDP requests**: < 1 second response time +- **HTTP requests**: < 2 seconds response time +- **No errors**: All requests should complete successfully + +--- + +## Step 8: Cleanup + +### 8.1 Stop Services (if needed) + +```bash +# [PROJECT_ROOT] Stop all services cleanly ./infrastructure/tests/test-integration.sh stop ``` -### 7.2 Destroy VM and Clean Up +### 8.2 Destroy VM and Clean Up ```bash -# Destroy the VM and clean up resources +# [PROJECT_ROOT] Destroy the VM and clean up resources time make destroy ``` @@ -699,10 +1253,10 @@ time make destroy - State files cleaned - **Time**: ~30 seconds -### 7.3 Final Cleanup +### 8.3 Final Cleanup ```bash -# Complete cleanup +# [PROJECT_ROOT] Complete cleanup make clean ``` @@ -713,12 +1267,163 @@ make clean --- +--- + +## Step 9: Key Testing Insights and Best Practices + +### 9.1 Critical Architecture Understanding + +During testing, several important architectural details were discovered: + +#### Network Architecture (Double Virtualization) + +The deployment uses **two layers of virtualization**: + +1. **Host → VM**: KVM/libvirt provides VM with IP `192.168.122.X` +2. **VM → Docker Compose**: Creates internal Docker network for services + +**Port Mapping Flow**: + +```text +Host (192.168.122.1) + ↓ SSH/HTTP requests +VM (192.168.122.253:80) + ↓ nginx proxy +Docker Network (tracker:1212, prometheus:9090, grafana:3000) +``` + +#### Authentication Requirements + +- **Health Check API**: `/api/health_check` - No authentication required +- **Stats API**: `/api/v1/stats` - Requires `?token=ADMIN_TOKEN` parameter +- **Admin Token**: Located in `/application/.env` as `TRACKER_ADMIN_TOKEN` + +### 9.2 Correct Testing Procedures + +#### ✅ Proper API Testing + +```bash +# Get VM IP +VM_IP=$(cd infrastructure/terraform && tofu output -raw vm_ip) + +# Test health (no auth needed) +curl -s http://$VM_IP/api/health_check | jq . + +# Test stats (auth required) +curl -s "http://$VM_IP/api/v1/stats?token=local-dev-admin-token-12345" | jq . + +# Test specific metrics with jq filtering +curl -s "http://$VM_IP/api/v1/stats?token=local-dev-admin-token-12345" | jq '.torrents, .seeders, .leechers' +``` + +#### ✅ Monitoring Service Testing + +```bash +# Prometheus (through nginx proxy) +curl -s http://$VM_IP/prometheus/api/v1/targets | jq . + +# Grafana (direct port access allowed) +curl -I http://$VM_IP:3100/ + +# Check HTTP response codes +curl -s -o /dev/null -w "%{http_code}\n" http://$VM_IP/prometheus/ +``` + +### 9.3 Common Testing Mistakes + +#### ❌ Port Confusion + +**Wrong**: Trying to access internal Docker ports directly from host: + +```bash +curl http://$VM_IP:1212/api/health_check # 1212 not exposed +curl http://$VM_IP:7070/api/v1/stats # 7070 not exposed +curl http://$VM_IP:9090/ # 9090 not exposed +``` + +**Correct**: Using nginx proxy on port 80: + +```bash +curl http://$VM_IP/api/health_check # Proxied to tracker:1212 +curl http://$VM_IP/api/v1/stats?token=X # Proxied to tracker:1212 +curl http://$VM_IP/prometheus/ # Proxied to prometheus:9090 +``` + +#### ❌ Missing Authentication + +**Wrong**: Testing stats API without token: + +```bash +curl http://$VM_IP/api/v1/stats +# Returns: Unhandled rejection: Err { reason: "unauthorized" } +``` + +**Correct**: Including authentication token: + +```bash +curl "http://$VM_IP/api/v1/stats?token=local-dev-admin-token-12345" +``` + +### 9.4 Integration Test Script Limitations + +The automated integration test script (`./infrastructure/tests/test-integration.sh endpoints`) +may fail because: + +1. **Authentication**: Script doesn't include token for stats API +2. **Port Assumptions**: May test internal ports instead of nginx proxy +3. **JSON Parsing**: Doesn't use `jq` for response validation + +**Manual testing** (as shown in this guide) provides more reliable results and +better insight into the actual API functionality. + +### 9.5 Useful Testing Commands + +#### JSON Processing with jq + +```bash +# Pretty print with colors +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq --color-output . + +# Extract specific fields +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq '.torrents, .seeders, .leechers' + +# Check if service is healthy +curl -s http://$VM_IP/api/health_check | jq -r '.status' + +# Count total UDP requests +curl -s "http://$VM_IP/api/v1/stats?token=$TOKEN" | jq '.udp4_requests + .udp6_requests' +``` + +#### Service Status Verification + +```bash +# Check all Docker services +ssh torrust@$VM_IP \ + 'cd /home/torrust/github/torrust/torrust-tracker-demo/application && docker compose ps' + +# Check specific service logs +ssh torrust@$VM_IP \ + 'cd /home/torrust/github/torrust/torrust-tracker-demo/application && \ + docker compose logs tracker --tail=20' + +# Check service health status +ssh torrust@$VM_IP 'docker ps --format "table {{.Names}}\t{{.Status}}"' +``` + +--- + ## Troubleshooting ### Resource Conflicts During Deployment #### Cloud-init ISO Already Exists +**Error**: `storage volume 'torrust-tracker-demo-cloudinit.iso' exists already` + +**Root Cause**: Previous deployment cleanup was incomplete, leaving libvirt volumes. + +**Solution**: + ```bash # Check if cloud-init ISO exists virsh vol-list user-default | grep cloudinit @@ -726,9 +1431,21 @@ virsh vol-list user-default | grep cloudinit # Remove the conflicting cloud-init ISO virsh vol-delete torrust-tracker-demo-cloudinit.iso user-default +# Check for VM disk volume too +virsh vol-list user-default | grep torrust-tracker-demo + +# Remove VM disk if it exists +virsh vol-delete torrust-tracker-demo.qcow2 user-default 2>/dev/null || echo "VM disk not found" + +# Verify cleanup +virsh vol-list user-default | grep torrust-tracker-demo || echo "✅ All volumes cleaned" + # Then retry: make apply ``` +**Prevention**: Always run the complete cleanup verification (Step 1.4.1) before +starting fresh deployments. + #### OpenTofu State Conflicts ```bash @@ -755,6 +1472,31 @@ virsh vol-delete torrust-tracker-demo.qcow2 user-default ### Common Issues and Solutions +#### Working Directory Confusion + +**MOST COMMON ISSUE**: Commands failing due to being in the wrong directory. + +```bash +# [PROJECT_ROOT] Check current directory +pwd +# Should output: /path/to/torrust-tracker-demo + +# [PROJECT_ROOT] If you're in the wrong directory, navigate to project root +cd /home/yourname/Documents/git/committer/me/github/torrust/torrust-tracker-demo + +# [PROJECT_ROOT] Verify you're in the right place +ls -la | grep -E "(Makefile|infrastructure|application)" +# Should show all three: Makefile, infrastructure/, application/ +``` + +**Symptoms**: + +- `make: *** No rule to make target 'configure-local'. Stop.` +- `make: *** No such file or directory. Stop.` +- `./infrastructure/tests/test-integration.sh: No such file or directory` + +**Solution**: Always ensure you're in the project root directory before running commands. + #### SSH Connection Fails **MOST COMMON CAUSES**: @@ -762,12 +1504,12 @@ virsh vol-delete torrust-tracker-demo.qcow2 user-default 1. **Missing SSH key configuration**: ```bash -# Check if SSH key was configured +# [PROJECT_ROOT] Check if SSH key was configured cat infrastructure/terraform/local.tfvars -# If file doesn't exist or contains "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY": +# [PROJECT_ROOT] If file doesn't exist or contains "REPLACE_WITH_YOUR_SSH_PUBLIC_KEY": make setup-ssh-key -# Then redeploy: make destroy && make apply +# [PROJECT_ROOT] Then redeploy: make destroy && make apply ``` 1. **Using non-default SSH key** (e.g., `torrust_rsa` instead of `id_rsa`): @@ -859,32 +1601,120 @@ ssh torrust@$VM_IP "sudo cloud-init analyze show" This guide provides a complete integration testing workflow that: 1. **Creates fresh infrastructure** in ~3-5 minutes -2. **Waits for cloud-init** to complete (~2-3 minutes) -3. **Runs comprehensive tests** covering all services (~3-5 minutes) -4. **Verifies end-to-end functionality** of the Torrust Tracker -5. **Cleans up resources** when complete +2. **Generates configuration files** from templates (~2 seconds) +3. **Refreshes OpenTofu state** to detect VM IP (~3 seconds) +4. **Waits for cloud-init** to complete (~2-3 minutes) +5. **Runs comprehensive tests** covering all services (~3-5 minutes) +6. **Verifies end-to-end functionality** of the Torrust Tracker +7. **Cleans up resources** when complete + +**Total Time**: ~8-12 minutes for complete cycle (including external smoke testing) + +### Integration Testing Results Summary + +✅ **INTEGRATION TESTS NOW PASS COMPLETELY!** + +This guide provides a complete integration testing workflow that: + +1. **Creates fresh infrastructure** in ~3-5 minutes +2. **Generates configuration files** from templates (~2 seconds) +3. **Refreshes OpenTofu state** to detect VM IP (~3 seconds) +4. **Waits for cloud-init** to complete (~2-3 minutes) +5. **Runs comprehensive tests** covering all services (~3-5 minutes) +6. **Performs external smoke testing** using official Torrust client tools (~2-3 minutes) +7. **Verifies end-to-end functionality** of the Torrust Tracker +8. **Cleans up resources** when complete (~1 minute) **Total Time**: ~8-12 minutes for complete cycle +### ✅ Successful Test Results (Latest Run) + +During the most recent testing cycle, the following components were validated successfully: + +#### Infrastructure Tests + +- ✅ **VM Access**: SSH connectivity working at `192.168.122.54` +- ✅ **Docker Installation**: Docker 28.3.1 and Docker Compose V2.38.1 working +- ✅ **Service Health**: All containers running with healthy status + +#### Service Deployment + +- ✅ **MySQL**: Database running healthy with proper credentials +- ✅ **Tracker**: Torrust Tracker running with all endpoints active +- ✅ **Prometheus**: Metrics collection working +- ✅ **Grafana**: Dashboard service healthy (version 11.4.0) +- ✅ **Nginx Proxy**: Reverse proxy routing working correctly + +#### API and Endpoint Tests + +- ✅ **Health Check API**: `{"status":"Ok"}` via nginx proxy on port 80 +- ✅ **Statistics API**: Full stats JSON with admin token authentication +- ✅ **UDP Tracker Ports**: 6868 and 6969 listening on both IPv4 and IPv6 +- ✅ **Monitoring Services**: Grafana and Prometheus both healthy + +#### Final Test Output + +```console +[SUCCESS] All integration tests passed! +``` + +### Critical Configuration Details + +#### Authentication Requirements + +- **Health Check API**: `/api/health_check` - No authentication required +- **Stats API**: `/api/v1/stats` - **Requires authentication token** +- **Admin Token**: `local-dev-admin-token-12345` (from `.env` file) + +#### Correct API Testing Examples + +```bash +# Health check (no auth needed) +curl -s http://$VM_IP/api/health_check | jq . + +# Stats API (auth required) +curl -s "http://$VM_IP/api/v1/stats?token=local-dev-admin-token-12345" | jq . +``` + +#### Network Architecture + +The deployment uses **nginx proxy** on port 80 that routes to internal services: + +- `/api/*` → routes to tracker service (internal port 1212) +- Internal Docker ports (1212, 7070, 9090) are NOT accessible from outside the VM +- UDP ports (6868, 6969) are directly exposed for tracker protocol + ### Key Lessons Learned During the development of this guide, we identified several critical issues: -1. **SSH Key Configuration**: The most common failure is missing or incorrect SSH - key setup. The `make setup-ssh-key` step is **mandatory**. +1. **Working Directory Requirements**: The most common failure is running commands + from the wrong directory. All `make` commands and test scripts must be run from + the **project root directory**, not from subdirectories like `infrastructure/terraform/`. + +2. **New Configuration Workflow**: Recent changes introduced a template-based + configuration system. You must run `make configure-local` to generate final + configuration files before deployment. + +3. **SSH Key Configuration**: SSH key setup is **mandatory**. The `make setup-ssh-key` + step must be completed before deployment. + +4. **OpenTofu State Refresh**: After VM deployment, the OpenTofu state may not + immediately reflect the VM's IP address. The `make refresh-state` step (Section 2.4) + prevents "No IP assigned yet" issues in subsequent commands. -2. **Non-Default SSH Keys**: If using custom SSH keys (like `torrust_rsa` +5. **Non-Default SSH Keys**: If using custom SSH keys (like `torrust_rsa` instead of `id_rsa`), you must: - Configure the public key in `infrastructure/terraform/local.tfvars` - Set up SSH client configuration or use `-i` flag explicitly -3. **Docker Compose Compatibility**: Cloud-init now installs Docker Compose V2 +6. **Docker Compose Compatibility**: Cloud-init now installs Docker Compose V2 plugin for better compatibility with modern compose.yaml files. Integration tests automatically detect and use the appropriate command (`docker compose` or `docker-compose`). -4. **Cloud-Init Timing**: Cloud-init performs many operations including: +7. **Cloud-Init Timing**: Cloud-init performs many operations including: - Package downloads and installations - System configuration @@ -895,7 +1725,7 @@ During the development of this guide, we identified several critical issues: during cloud-init, preventing connectivity blocks that caused completion delays. Actual completion time is typically 2-3 minutes. -5. **Debugging Techniques**: Use `virsh console` and cloud-init logs to debug +8. **Debugging Techniques**: Use `virsh console` and cloud-init logs to debug issues when SSH fails. ### Success Factors diff --git a/docs/refactoring/integration-test-refactor-summary.md b/docs/refactoring/integration-test-refactor-summary.md new file mode 100644 index 0000000..6c78229 --- /dev/null +++ b/docs/refactoring/integration-test-refactor-summary.md @@ -0,0 +1,160 @@ +# Integration Test Refactor Summary + +> **Note**: This document describes interim changes made during the 12-factor refactoring process. +> It should be removed once the refactoring described in `infrastructure/docs/refactoring/twelve-factor-refactor/` +> is completed. + +## Overview + +This document summarizes the changes made to align the integration testing workflow with the 12-factor +configuration principles currently being implemented in the Torrust Tracker Demo project. + +## Changes Made + +### 1. Updated `setup_torrust_tracker` Function (infrastructure/tests/test-integration.sh) + +**Previous Approach:** + +- Used `rsync` to copy the entire local repository with exclusions +- Fallback to `.env.production` if `.env` was missing +- Ad-hoc configuration handling + +**New Approach:** + +- Uses `git archive` to export only git-tracked files (equivalent to cloning but with current working + version) +- Runs `make configure-local` to generate configuration files using the new infrastructure system +- Executes the official `application/share/bin/install.sh` script locally +- Copies the properly configured `storage/` folder to the VM +- Verifies critical configuration files exist on the VM + +**Benefits:** + +- Tests the exact version being developed (without untracked files) +- Uses the official installation process instead of custom logic +- Leverages the new 12-factor configuration system +- More robust and maintainable + +### 2. Updated Installation Script (application/share/bin/install.sh) + +**Previous Approach:** + +- Created `.env` from `.env.production` if missing +- Copied configuration files from default templates + +**New Approach:** + +- Fails if `.env` is not present (12-factor principle) +- Expects configuration files to be pre-generated by infrastructure system +- Verifies that required configuration files exist +- Provides helpful error messages pointing to the infrastructure commands + +**Benefits:** + +- Follows 12-factor principles strictly +- No more arbitrary copying of default configurations +- Clear error messages guide users to the correct configuration process +- Separation of concerns between infrastructure and application layers + +### 3. Updated Documentation + +**Changes:** + +- Added deprecation notice to `.env.production` file +- Updated integration testing guide to reflect new workflow +- Improved documentation of what files are generated by `make configure-local` + +## Workflow Changes + +### Before (Old Workflow) + +```bash +# Deploy VM +make apply + +# Copy repo with rsync and fallback configs +setup_torrust_tracker() # Custom logic in test script + +# Start services +docker compose up -d +``` + +### After (New Workflow) + +```bash +# Deploy VM +make apply + +# Generate configuration files locally +make configure-local + +# Use git archive to copy only tracked files +git archive HEAD | extract to VM + +# Run official installation script locally +./application/share/bin/install.sh + +# Copy configured storage folder to VM +rsync storage/ to VM + +# Start services +docker compose up -d +``` + +## Files Modified + +1. **infrastructure/tests/test-integration.sh** + - Refactored `setup_torrust_tracker` function + - Improved error handling and logging + - Added proper configuration verification + +2. **application/share/bin/install.sh** + - Now requires `.env` to be present (12-factor principle) + - Verifies configuration files exist + - Provides helpful error messages + +3. **application/.env.production** + - Added deprecation notice + - Updated documentation to point to new configuration system + +4. **docs/guides/integration-testing-guide.md** + - Updated to reflect new configuration file generation + +## Migration Notes + +### For Developers + +- The integration test now uses the official installation process +- Configuration files are generated by the infrastructure system +- The test workflow is more aligned with production deployment + +### For Operations + +- The installation script now fails fast if configuration is missing +- Clear error messages guide to the correct configuration commands +- No more implicit fallbacks to default configurations + +## Testing + +All changes have been validated with: + +- ✅ Shell script linting (ShellCheck) +- ✅ Markdown linting (markdownlint) +- ✅ YAML linting (yamllint) +- ✅ Full linting pipeline passes + +## Next Steps + +1. Test the updated integration workflow with `make test` +2. Update any remaining documentation references to `.env.production` +3. Consider removing `.env.production` once migration is complete +4. Update production deployment guides to use the new configuration system + +## Benefits Achieved + +- **Consistency**: Integration tests now use the same configuration approach as production +- **Maintainability**: Removed custom configuration logic from test scripts +- **Robustness**: Proper error handling and validation +- **12-Factor Compliance**: Configuration is externalized and validated +- **Developer Experience**: Clear error messages and guidance +- **Testing Fidelity**: Tests exactly what's being developed (git-tracked files only) diff --git a/infrastructure/.gitignore b/infrastructure/.gitignore index b093859..f5919f2 100644 --- a/infrastructure/.gitignore +++ b/infrastructure/.gitignore @@ -10,6 +10,12 @@ terraform.tfplan terraform.tfplan.* +# Environment files with secrets (keep templates) +config/environments/production.env +config/environments/*.env +!config/environments/*.env.tpl +!config/environments/local.env + # Cloud-init generated files user-data.yaml user-data-minimal.yaml diff --git a/infrastructure/config/environments/local.env b/infrastructure/config/environments/local.env new file mode 100644 index 0000000..62ae263 --- /dev/null +++ b/infrastructure/config/environments/local.env @@ -0,0 +1,26 @@ +# Local Development Environment Configuration +ENVIRONMENT=local +GENERATION_DATE=$(date '+%Y-%m-%d %H:%M:%S') + +# Template processing variables +DOLLAR=$ + +# === SECRETS (Only these variables will be in Docker environment) === + +# Database Secrets +MYSQL_ROOT_PASSWORD=root_secret_local +MYSQL_DATABASE=torrust_tracker +MYSQL_USER=torrust +MYSQL_PASSWORD=tracker_secret_local + +# Tracker API Token +TRACKER_ADMIN_TOKEN=local-dev-admin-token-12345 + +# Grafana Admin Credentials +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=admin_secret_local + +# === DOCKER CONFIGURATION === + +# User ID for file permissions +USER_ID=1000 diff --git a/infrastructure/config/environments/production.env.tpl b/infrastructure/config/environments/production.env.tpl new file mode 100644 index 0000000..66f8c50 --- /dev/null +++ b/infrastructure/config/environments/production.env.tpl @@ -0,0 +1,26 @@ +# Production Environment Configuration Template +# Copy this file to production.env and replace placeholder values with secure secrets + +ENVIRONMENT=production +GENERATION_DATE=$(date '+%Y-%m-%d %H:%M:%S') + +# === SECRETS (Only these variables will be in Docker environment) === +# IMPORTANT: Replace ALL placeholder values with actual secure secrets before deployment! + +# Database Secrets +MYSQL_ROOT_PASSWORD=REPLACE_WITH_SECURE_ROOT_PASSWORD +MYSQL_DATABASE=torrust_tracker +MYSQL_USER=torrust +MYSQL_PASSWORD=REPLACE_WITH_SECURE_PASSWORD + +# Tracker API Token (Used for administrative API access) +TRACKER_ADMIN_TOKEN=REPLACE_WITH_SECURE_ADMIN_TOKEN + +# Grafana Admin Credentials +GF_SECURITY_ADMIN_USER=admin +GF_SECURITY_ADMIN_PASSWORD=REPLACE_WITH_SECURE_GRAFANA_PASSWORD + +# === DOCKER CONFIGURATION === + +# User ID for file permissions (match host user) +USER_ID=1000 diff --git a/infrastructure/config/templates/docker-compose.env.tpl b/infrastructure/config/templates/docker-compose.env.tpl new file mode 100644 index 0000000..7f184b7 --- /dev/null +++ b/infrastructure/config/templates/docker-compose.env.tpl @@ -0,0 +1,25 @@ +# Generated Docker Compose environment file for ${ENVIRONMENT} +# Generated on: ${GENERATION_DATE} +# +# This file contains only secrets and Docker-specific configuration. +# Application behavior is configured in config files (tracker.toml, prometheus.yml). + +# Database Secrets +MYSQL_ROOT_PASSWORD=${MYSQL_ROOT_PASSWORD} +MYSQL_DATABASE=${MYSQL_DATABASE} +MYSQL_USER=${MYSQL_USER} +MYSQL_PASSWORD=${MYSQL_PASSWORD} + +# Tracker Database Configuration +TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER=${TRACKER_DATABASE_DRIVER} +TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH=${TRACKER_DATABASE_URL} + +# Tracker API Token +TRACKER_ADMIN_TOKEN=${TRACKER_ADMIN_TOKEN} + +# Docker Runtime Configuration +USER_ID=${USER_ID} + +# Grafana Admin Credentials +GF_SECURITY_ADMIN_USER=${GF_SECURITY_ADMIN_USER} +GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD} diff --git a/application/share/container/default/config/nginx.conf b/infrastructure/config/templates/nginx.conf.tpl similarity index 80% rename from application/share/container/default/config/nginx.conf rename to infrastructure/config/templates/nginx.conf.tpl index 96d9cd2..27e20f7 100644 --- a/application/share/container/default/config/nginx.conf +++ b/infrastructure/config/templates/nginx.conf.tpl @@ -1,3 +1,18 @@ +# Nginx Configuration Template for Torrust Tracker Demo +# +# Variable Escaping Notes: +# - This template is processed by envsubst which substitutes all $VARIABLE patterns +# - Nginx variables (like $proxy_add_x_forwarded_for, $host, $http_upgrade) must be escaped +# - Use ${DOLLAR} environment variable to represent literal $ in nginx config +# - Example: ${DOLLAR}proxy_add_x_forwarded_for becomes $proxy_add_x_forwarded_for +# +# TODO: Fix the commented HTTPS configuration section below +# - The HTTPS configuration has inconsistent variable escaping +# - Some nginx variables use literal $ (incorrect) while others should use ${DOLLAR} +# - Line 117: proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; (needs ${DOLLAR}) +# - Lines with $host, $http_upgrade, $connection_upgrade also need escaping +# - SSL certificate paths and other static values are correct as-is + server { listen 80; @@ -11,13 +26,13 @@ server location /api/ { proxy_pass http://tracker:1212/api/; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; } location / { proxy_pass http://tracker:7070; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; } location ~ /.well-known/acme-challenge @@ -109,7 +124,7 @@ server # #add_header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload" always; # # enable strict transport security only if you understand the implications # -# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; +# proxy_set_header X-Forwarded-For ${DOLLAR}proxy_add_x_forwarded_for; # } # # root /var/www/html; diff --git a/infrastructure/config/templates/prometheus.yml.tpl b/infrastructure/config/templates/prometheus.yml.tpl new file mode 100644 index 0000000..991264a --- /dev/null +++ b/infrastructure/config/templates/prometheus.yml.tpl @@ -0,0 +1,34 @@ +--- +# Prometheus Configuration Template +# Generated from environment variables for ${ENVIRONMENT} +# +# NOTE: Admin token is stored in plain text in this config file after template processing. +# This is a limitation of Prometheus configuration - it does not support runtime environment +# variable substitution like other services. +# +# TODO: Research safer secret injection methods for Prometheus: +# - Prometheus file_sd_configs with dynamic token refresh +# - External authentication proxy (oauth2-proxy, etc.) +# - Vault integration or secret management solutions +# - Init containers to generate configs with short-lived tokens + +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'tracker_stats' + static_configs: + - targets: ['tracker:1212'] + metrics_path: '/api/v1/stats' + params: + token: ['${TRACKER_ADMIN_TOKEN}'] + format: ['prometheus'] + + - job_name: 'tracker_metrics' + static_configs: + - targets: ['tracker:1212'] + metrics_path: '/api/v1/metrics' + params: + token: ['${TRACKER_ADMIN_TOKEN}'] + format: ['prometheus'] + diff --git a/infrastructure/config/templates/tracker.toml.tpl b/infrastructure/config/templates/tracker.toml.tpl new file mode 100644 index 0000000..1c152a8 --- /dev/null +++ b/infrastructure/config/templates/tracker.toml.tpl @@ -0,0 +1,149 @@ +# Generated Torrust Tracker configuration for ${ENVIRONMENT} +# Generated on: ${GENERATION_DATE} +# +# Configuration Override with Environment Variables: +# The Torrust Tracker uses the Figment crate for configuration management. +# Any configuration value can be overridden using environment variables with the pattern: +# TORRUST_TRACKER_CONFIG_OVERRIDE_
____ +# +# Examples: +# [core.database] +# path = "..." -> TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH +# +# [http_api.access_tokens] +# admin = "..." -> TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN +# +# [logging] +# threshold = "..." -> TORRUST_TRACKER_CONFIG_OVERRIDE_LOGGING__THRESHOLD +# +# Rules: +# - Use double underscores "__" to separate nested sections/keys +# - Convert section names to UPPERCASE +# - Dots in TOML become double underscores in env vars +# - This follows Figment's environment variable override conventions +# +# Example TOML Configuration (output from tracker after merging all sources): +# [metadata] +# app = "torrust-tracker" +# purpose = "configuration" +# schema_version = "2.0.0" +# +# [logging] +# threshold = "info" +# +# [core] +# inactive_peer_cleanup_interval = 120 +# listed = false +# private = false +# tracker_usage_statistics = true +# +# [core.announce_policy] +# interval = 120 +# interval_min = 120 +# +# [core.database] +# driver = "mysql" +# path = "mysql://torrust:password@mysql:3306/torrust_tracker" +# +# [core.net] +# external_ip = "0.0.0.0" +# on_reverse_proxy = false +# +# [core.tracker_policy] +# max_peer_timeout = 60 +# persistent_torrent_completed_stat = true +# remove_peerless_torrents = true +# +# [[udp_trackers]] +# bind_address = "0.0.0.0:6868" +# tracker_usage_statistics = true +# +# [udp_trackers.cookie_lifetime] +# secs = 120 +# nanos = 0 +# +# [[udp_trackers]] +# bind_address = "0.0.0.0:6969" +# tracker_usage_statistics = true +# +# [udp_trackers.cookie_lifetime] +# secs = 120 +# nanos = 0 +# +# [[http_trackers]] +# bind_address = "0.0.0.0:7070" +# tracker_usage_statistics = true +# +# [[http_trackers]] +# bind_address = "0.0.0.0:7171" +# tracker_usage_statistics = true +# +# [http_api] +# bind_address = "0.0.0.0:1212" +# +# [http_api.access_tokens] +# admin = "***" +# +# [health_check_api] +# bind_address = "127.0.0.1:1313" +# +# Documentation: https://docs.rs/torrust-tracker-configuration/latest/torrust_tracker_configuration/ + +[metadata] +app = "torrust-tracker" +purpose = "configuration" +schema_version = "2.0.0" + +[logging] +threshold = "info" + +[core] +inactive_peer_cleanup_interval = 600 +listed = false +private = false +tracker_usage_statistics = true + +[core.announce_policy] +interval = 120 +interval_min = 120 + +[core.database] +# Driver and path will be overridden via environment variables: +# TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__DRIVER +# TORRUST_TRACKER_CONFIG_OVERRIDE_CORE__DATABASE__PATH +driver = "mysql" +path = "" + +[core.net] +external_ip = "0.0.0.0" +on_reverse_proxy = true + +[core.tracker_policy] +max_peer_timeout = 900 +persistent_torrent_completed_stat = false +remove_peerless_torrents = true + +# Health check API (internal only) +[health_check_api] +bind_address = "127.0.0.1:1313" + +# Main HTTP API +[http_api] +bind_address = "0.0.0.0:1212" + +# Admin token will be overridden via environment variable: +# TORRUST_TRACKER_CONFIG_OVERRIDE_HTTP_API__ACCESS_TOKENS__ADMIN +[http_api.access_tokens] +# admin = "" + +# UDP Trackers - Port 6868 +[[udp_trackers]] +bind_address = "0.0.0.0:6868" + +# UDP Trackers - Port 6969 +[[udp_trackers]] +bind_address = "0.0.0.0:6969" + +# HTTP Trackers - Port 7070 +[[http_trackers]] +bind_address = "0.0.0.0:7070" diff --git a/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md b/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md index 6370106..58556c4 100644 --- a/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md +++ b/infrastructure/docs/refactoring/twelve-factor-refactor/migration-guide.md @@ -318,7 +318,6 @@ make apply The new system uses environment-specific configuration: - `infrastructure/config/environments/local.env` - Local development -- `infrastructure/config/environments/staging.env` - Staging environment - `infrastructure/config/environments/production.env` - Production environment Process configuration before deployment: @@ -348,30 +347,6 @@ make destroy #### 4.1 Create Environment Variations -**Staging configuration** (`infrastructure/config/environments/staging.env`): - -```bash -# Infrastructure Configuration -INFRASTRUCTURE_PROVIDER=hetzner -INFRASTRUCTURE_REGION=fsn1 -INFRASTRUCTURE_INSTANCE_TYPE=cx11 - -# Application Configuration -TORRUST_TRACKER_MODE=private -TORRUST_TRACKER_LOG_LEVEL=info -TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 -TORRUST_TRACKER_API_TOKEN=${TORRUST_STAGING_API_TOKEN} - -# Service Configuration -GRAFANA_ADMIN_PASSWORD=${GRAFANA_STAGING_PASSWORD} -PROMETHEUS_RETENTION_TIME=15d - -# Security Configuration -SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} -DOMAIN_NAME=staging.torrust-demo.com -SSL_EMAIL=${SSL_EMAIL} -``` - **Production configuration** (`infrastructure/config/environments/production.env`): ```bash @@ -393,7 +368,7 @@ PROMETHEUS_RETENTION_TIME=30d # Security Configuration SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} -DOMAIN_NAME=torrust-demo.com +DOMAIN_NAME=tracker.torrust-demo.com SSL_EMAIL=${SSL_EMAIL} ``` @@ -498,7 +473,7 @@ direnv allow ### Week 3: Environment Support -- [ ] Create staging and production configurations +- [ ] Create production configurations - [ ] Implement environment-specific logic - [ ] Test multi-environment deployment - [ ] Validate configuration for all environments diff --git a/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md b/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md index edc0d62..0ba421a 100644 --- a/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md +++ b/infrastructure/docs/refactoring/twelve-factor-refactor/phase-1-implementation.md @@ -21,7 +21,6 @@ mkdir -p application/config/templates **Files to create:** - [ ] `infrastructure/config/environments/local.env` -- [ ] `infrastructure/config/environments/staging.env` - [ ] `infrastructure/config/environments/production.env` - [ ] `infrastructure/config/templates/tracker.toml.tpl` - [ ] `infrastructure/config/templates/prometheus.yml.tpl` @@ -81,30 +80,6 @@ PROMETHEUS_RETENTION_TIME=7d USER_ID=1000 ``` -**Staging Environment (`staging.env`):** - -```bash -# Infrastructure -INFRASTRUCTURE_PROVIDER=hetzner -INFRASTRUCTURE_REGION=fsn1 -INFRASTRUCTURE_INSTANCE_TYPE=cx11 - -# Application -TORRUST_TRACKER_MODE=private -TORRUST_TRACKER_LOG_LEVEL=info -TORRUST_TRACKER_DATABASE_DRIVER=sqlite3 -TORRUST_TRACKER_API_TOKEN=${TORRUST_STAGING_API_TOKEN} - -# Services -GRAFANA_ADMIN_PASSWORD=${GRAFANA_STAGING_PASSWORD} -PROMETHEUS_RETENTION_TIME=15d - -# Security -SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} -DOMAIN_NAME=staging.torrust-demo.com -SSL_EMAIL=${SSL_EMAIL} -``` - **Production Environment (`production.env`):** ```bash @@ -155,7 +130,7 @@ PROMETHEUS_RETENTION_TIME=30d # Security Configuration SSH_PUBLIC_KEY=${SSH_PUBLIC_KEY} -DOMAIN_NAME=torrust-demo.com +DOMAIN_NAME=tracker.torrust-demo.com SSL_EMAIL=${SSL_EMAIL} # Docker Configuration @@ -498,7 +473,7 @@ main() { local failed=0 # Validate environment files - for env in local staging production; do + for env in local production; do env_file="${CONFIG_DIR}/environments/${env}.env" if ! validate_env_file "${env_file}" "${env}"; then failed=1 diff --git a/infrastructure/scripts/configure-env.sh b/infrastructure/scripts/configure-env.sh new file mode 100755 index 0000000..9edc91b --- /dev/null +++ b/infrastructure/scripts/configure-env.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# Configuration processing script for Torrust Tracker Demo +# Processes environment variables and generates configuration files + +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +CONFIG_DIR="${PROJECT_ROOT}/infrastructure/config" + +# Default values +ENVIRONMENT="${1:-local}" +VERBOSE="${VERBOSE:-false}" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log() { + echo -e "$1" +} + +log_info() { + log "${BLUE}[INFO]${NC} $1" +} + +log_success() { + log "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + log "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + log "${RED}[ERROR]${NC} $1" >&2 +} + +# Setup production environment from template +setup_production_environment() { + local env_file="${CONFIG_DIR}/environments/production.env" + local template_file="${CONFIG_DIR}/environments/production.env.tpl" + + if [[ ! -f "${env_file}" ]]; then + if [[ ! -f "${template_file}" ]]; then + log_error "Production template not found: ${template_file}" + exit 1 + fi + + log_info "Creating production.env from template..." + cp "${template_file}" "${env_file}" + log_warning "Production environment file created from template: ${env_file}" + log_warning "IMPORTANT: You must edit this file and replace placeholder values with secure secrets!" + log_warning "File location: ${env_file}" + log_error "Aborting: Please configure production secrets first, then run this script again." + exit 1 + fi + + # Validate that placeholder values have been replaced + if grep -q "REPLACE_WITH_SECURE" "${env_file}"; then + log_error "Production environment file contains placeholder values!" + log_error "Please edit ${env_file} and replace all 'REPLACE_WITH_SECURE_*' values with actual secrets." + log_error "Found placeholder values:" + grep "REPLACE_WITH_SECURE" "${env_file}" | while read -r line; do + log_error " ${line}" + done + exit 1 + fi + + log_success "Production environment file validated" +} + +# Load environment configuration +load_environment() { + local env_file="${CONFIG_DIR}/environments/${ENVIRONMENT}.env" + + # Special handling for production environment + if [[ "${ENVIRONMENT}" == "production" ]]; then + setup_production_environment + fi + + if [[ ! -f "${env_file}" ]]; then + log_error "Environment file not found: ${env_file}" + exit 1 + fi + + log_info "Loading environment: ${ENVIRONMENT}" + # Export variables so they're available to envsubst + set -a # automatically export all variables + # shellcheck source=/dev/null + source "${env_file}" + set +a # stop automatically exporting +} + +# Validate required environment variables +validate_environment() { + local required_vars=( + "ENVIRONMENT" + "MYSQL_ROOT_PASSWORD" + "MYSQL_PASSWORD" + "TRACKER_ADMIN_TOKEN" + "GF_SECURITY_ADMIN_PASSWORD" + ) + + for var in "${required_vars[@]}"; do + if [[ -z "${!var:-}" ]]; then + log_error "Required environment variable not set: ${var}" + exit 1 + fi + done + + log_success "Environment validation passed" +} + +# Process configuration templates +process_templates() { + local templates_dir="${CONFIG_DIR}/templates" + local output_dir="${PROJECT_ROOT}/application/storage/tracker/etc" + + # Ensure output directory exists + mkdir -p "${output_dir}" + + # Process tracker configuration template + if [[ -f "${templates_dir}/tracker.toml.tpl" ]]; then + log_info "Processing tracker configuration template" + envsubst <"${templates_dir}/tracker.toml.tpl" >"${output_dir}/tracker.toml" + log_info "Generated: ${output_dir}/tracker.toml" + fi + + # Process prometheus configuration template + if [[ -f "${templates_dir}/prometheus.yml.tpl" ]]; then + log_info "Processing prometheus configuration template" + local prometheus_output_dir="${PROJECT_ROOT}/application/storage/prometheus/etc" + mkdir -p "${prometheus_output_dir}" + envsubst <"${templates_dir}/prometheus.yml.tpl" >"${prometheus_output_dir}/prometheus.yml" + log_info "Generated: ${prometheus_output_dir}/prometheus.yml" + fi + + # Process nginx configuration template + if [[ -f "${templates_dir}/nginx.conf.tpl" ]]; then + log_info "Processing nginx configuration template" + local nginx_output_dir="${PROJECT_ROOT}/application/storage/proxy/etc/nginx-conf" + mkdir -p "${nginx_output_dir}" + envsubst <"${templates_dir}/nginx.conf.tpl" >"${nginx_output_dir}/nginx.conf" + log_info "Generated: ${nginx_output_dir}/nginx.conf" + fi + + log_success "Configuration templates processed" +} + +# Generate .env file for Docker Compose +generate_docker_env() { + local templates_dir="${CONFIG_DIR}/templates" + local env_output="${PROJECT_ROOT}/application/.env" + + log_info "Generating Docker Compose environment file" + + # Set generation date for template + GENERATION_DATE="$(date)" + export GENERATION_DATE + + # Ensure ENVIRONMENT is exported for template substitution + export ENVIRONMENT + + # Process Docker Compose environment template + if [[ -f "${templates_dir}/docker-compose.env.tpl" ]]; then + envsubst <"${templates_dir}/docker-compose.env.tpl" >"${env_output}" + log_info "Generated: ${env_output}" + else + log_error "Docker Compose environment template not found: ${templates_dir}/docker-compose.env.tpl" + exit 1 + fi +} + +# Main execution +main() { + log_info "Starting configuration processing for environment: ${ENVIRONMENT}" + + load_environment + validate_environment + process_templates + generate_docker_env + + log_success "Configuration processing completed successfully" +} + +# Show help +show_help() { + cat <&2 +} + +# Check if required tools are available +check_dependencies() { + local missing_tools=() + + # Check for TOML validation tool (optional but recommended) + if ! command -v toml-test >/dev/null 2>&1 && ! command -v taplo >/dev/null 2>&1; then + log_warning "TOML validation tools not found (toml-test or taplo). Syntax validation will be limited." + fi + + # Check for YAML validation tool + if ! command -v yamllint >/dev/null 2>&1; then + missing_tools+=("yamllint") + fi + + if [[ ${#missing_tools[@]} -gt 0 ]]; then + log_warning "Missing optional validation tools: ${missing_tools[*]}" + log_info "Install with: sudo apt-get install yamllint" + fi +} + +# Validate TOML configuration files +validate_toml_files() { + local tracker_config="${PROJECT_ROOT}/application/storage/tracker/etc/tracker.toml" + + if [[ ! -f "${tracker_config}" ]]; then + log_error "Tracker configuration file not found: ${tracker_config}" + log_error "Run './infrastructure/scripts/configure-env.sh ${ENVIRONMENT}' first" + return 1 + fi + + log_info "Validating TOML configuration files..." + + # Basic TOML syntax validation using simple parsing + if command -v taplo >/dev/null 2>&1; then + if taplo check "${tracker_config}"; then + log_success "TOML syntax validation passed (using taplo)" + else + log_error "TOML syntax validation failed" + return 1 + fi + else + # Basic validation - check for common TOML syntax issues + if grep -q "^\[.*\]$" "${tracker_config}" && ! grep -q "= $" "${tracker_config}"; then + log_success "Basic TOML structure validation passed" + else + log_error "Basic TOML structure validation failed" + return 1 + fi + fi + + # Validate required sections exist + local required_sections=( + "logging" + "core" + "core.database" + "http_api" + "udp_trackers" + "http_trackers" + ) + + for section in "${required_sections[@]}"; do + if grep -q "^\[${section}\]$\|^\[\[${section}\]\]$" "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "Section found: [${section}]" + else + log_error "Required section missing: [${section}]" + return 1 + fi + done + + log_success "Tracker configuration validation passed" +} + +# Validate YAML configuration files +validate_yaml_files() { + local prometheus_config="${PROJECT_ROOT}/application/storage/prometheus/etc/prometheus.yml" + + if [[ ! -f "${prometheus_config}" ]]; then + log_error "Prometheus configuration file not found: ${prometheus_config}" + log_error "Run './infrastructure/scripts/configure-env.sh ${ENVIRONMENT}' first" + return 1 + fi + + log_info "Validating YAML configuration files..." + + # Check if file is in ignored directory + if [[ "${prometheus_config}" == *"application/storage/"* ]]; then + log_info "Skipping yamllint for file in ignored directory: application/storage/" + # Basic YAML validation using Python instead + if python3 -c "import yaml; yaml.safe_load(open('${prometheus_config}'))" 2>/dev/null; then + log_success "Basic YAML syntax validation passed (file in ignored directory)" + else + log_error "Basic YAML syntax validation failed" + return 1 + fi + else + # YAML syntax validation for files not in ignored directories + if command -v yamllint >/dev/null 2>&1; then + # Use project yamllint config if it exists + if [[ -f "${PROJECT_ROOT}/.yamllint-ci.yml" ]]; then + if yamllint -c "${PROJECT_ROOT}/.yamllint-ci.yml" "${prometheus_config}"; then + log_success "YAML syntax validation passed (using yamllint with project config)" + else + log_error "YAML syntax validation failed" + return 1 + fi + else + if yamllint "${prometheus_config}"; then + log_success "YAML syntax validation passed (using yamllint)" + else + log_error "YAML syntax validation failed" + return 1 + fi + fi + else + # Basic YAML validation using Python + if python3 -c "import yaml; yaml.safe_load(open('${prometheus_config}'))" 2>/dev/null; then + log_success "Basic YAML syntax validation passed" + else + log_error "Basic YAML syntax validation failed" + return 1 + fi + fi + fi + + # Validate required Prometheus sections + local required_keys=( + "global" + "scrape_configs" + ) + + for key in "${required_keys[@]}"; do + if grep -q "^${key}:" "${prometheus_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "Key found: ${key}" + else + log_error "Required key missing: ${key}" + return 1 + fi + done + + log_success "Prometheus configuration validation passed" +} + +# Validate environment-specific configuration +validate_environment_config() { + local tracker_config="${PROJECT_ROOT}/application/storage/tracker/etc/tracker.toml" + + log_info "Validating environment-specific configuration..." + + case "${ENVIRONMENT}" in + "local") + # Local environment allows public mode for integration testing + if grep -q 'threshold = "info"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Info logging enabled" + else + log_error "${ENVIRONMENT}: Info logging not enabled" + return 1 + fi + + if grep -q 'on_reverse_proxy = true' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Reverse proxy enabled" + else + log_error "${ENVIRONMENT}: Reverse proxy should be enabled" + return 1 + fi + + if grep -q 'private = false' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Public tracker mode enabled (for integration testing)" + else + log_error "${ENVIRONMENT}: Public tracker mode should be enabled for integration testing" + return 1 + fi + + if grep -q 'driver = "mysql"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: MySQL database configured" + else + log_error "${ENVIRONMENT}: MySQL database not configured" + return 1 + fi + + if grep -q 'external_ip = "0.0.0.0"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: External IP set to 0.0.0.0" + else + log_warning "${ENVIRONMENT}: External IP not set to 0.0.0.0 (this may be intentional)" + fi + ;; + + "production") + # Production environment requires private mode for security + if grep -q 'threshold = "info"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Info logging enabled" + else + log_error "${ENVIRONMENT}: Info logging not enabled" + return 1 + fi + + if grep -q 'on_reverse_proxy = true' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Reverse proxy enabled" + else + log_error "${ENVIRONMENT}: Reverse proxy should be enabled" + return 1 + fi + + if grep -q 'private = true' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: Private tracker mode enabled" + else + log_error "${ENVIRONMENT}: Private tracker mode should be enabled" + return 1 + fi + + if grep -q 'driver = "mysql"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: MySQL database configured" + else + log_error "${ENVIRONMENT}: MySQL database not configured" + return 1 + fi + + if grep -q 'external_ip = "0.0.0.0"' "${tracker_config}"; then + [[ "${VERBOSE}" == "true" ]] && log_info "${ENVIRONMENT}: External IP set to 0.0.0.0" + else + log_warning "${ENVIRONMENT}: External IP not set to 0.0.0.0 (this may be intentional)" + fi + ;; + + *) + log_error "Unknown environment: ${ENVIRONMENT}" + return 1 + ;; + esac + + log_success "Environment-specific configuration validation passed" +} + +# Check for template variable substitution issues +validate_template_substitution() { + local tracker_config="${PROJECT_ROOT}/application/storage/tracker/etc/tracker.toml" + local prometheus_config="${PROJECT_ROOT}/application/storage/prometheus/etc/prometheus.yml" + + log_info "Checking for unsubstituted template variables..." + + local files_to_check=("${tracker_config}" "${prometheus_config}") + local found_issues=false + + for file in "${files_to_check[@]}"; do + if [[ -f "${file}" ]]; then + # Check for unsubstituted variables (${VAR} patterns) + if grep -n '\$[{][^}]*[}]' "${file}"; then + log_error "Unsubstituted template variables found in: ${file}" + found_issues=true + fi + fi + done + + if [[ "${found_issues}" == "true" ]]; then + log_error "Template substitution validation failed" + return 1 + fi + + log_success "Template substitution validation passed" +} + +# Main validation function +main() { + log_info "Starting configuration validation for environment: ${ENVIRONMENT}" + + check_dependencies + validate_toml_files + validate_yaml_files + validate_environment_config + validate_template_substitution + + log_success "All configuration validation checks passed!" +} + +# Show help +show_help() { + cat <