-
Notifications
You must be signed in to change notification settings - Fork 0
feat: implement file extension reporting, auditor functionality, and continuous integration. #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| name: Backup Utility CI | ||
|
|
||
| on: | ||
| push: | ||
| branches: [ "master" ] | ||
| pull_request: | ||
| branches: [ "master" ] | ||
|
|
||
| jobs: | ||
| test: | ||
| runs-on: ubuntu-latest | ||
|
|
||
| steps: | ||
| - uses: actions/checkout@v3 | ||
|
|
||
| - name: Set up Python 3.10 | ||
| uses: actions/setup-python@v4 | ||
| with: | ||
| python-version: "3.10" | ||
|
|
||
| - name: Install dependencies | ||
| run: | | ||
| python -m pip install --upgrade pip | ||
| pip install pytest | ||
|
|
||
| - name: Run Pytest Validation Suite | ||
| run: | | ||
| pytest -v tests/ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,6 @@ | ||
| logs/** | ||
| *.db | ||
| *.db-journal | ||
| *.json | ||
| __pycache__ | ||
| *.py[cod] | ||
| *$py.class | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,58 +18,26 @@ | |
| import json | ||
| from datetime import datetime | ||
|
|
||
| # Load configuration from config.env (simple parsing, no external deps) | ||
| CONFIG_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.env") | ||
| # Load configuration safely from JSON | ||
| CONFIG_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "auditor_config.json") | ||
|
|
||
| def load_config(): | ||
| config = {} | ||
| if not os.path.exists(CONFIG_FILE): | ||
| print(f"Error: {CONFIG_FILE} not found. Please ensure it exists.") | ||
| sys.exit(1) | ||
|
|
||
| with open(CONFIG_FILE, 'r') as f: | ||
| for line in f: | ||
| line = line.strip() | ||
| if line and not line.startswith('#'): | ||
| # Handle bash array format for exclusions roughly | ||
| if line.startswith('EXCLUSIONS=('): | ||
| config['EXCLUSIONS'] = [] | ||
| continue | ||
| if line.startswith('AUDITOR_EXT_FILTER=('): | ||
| config['AUDITOR_EXT_FILTER'] = [] | ||
| continue | ||
|
|
||
| # If we are inside an array parsing (hacky but works for the format we defined) | ||
| if 'EXCLUSIONS' in config and isinstance(config['EXCLUSIONS'], list) and line == ')': | ||
| # Array ended, convert to tuple | ||
| continue | ||
| elif 'EXCLUSIONS' in config and isinstance(config['EXCLUSIONS'], list): | ||
| val = line.strip(' "()') | ||
| if val: config['EXCLUSIONS'].append(val) | ||
| continue | ||
|
|
||
| if 'AUDITOR_EXT_FILTER' in config and isinstance(config['AUDITOR_EXT_FILTER'], list) and line == ')': | ||
| continue | ||
| elif 'AUDITOR_EXT_FILTER' in config and isinstance(config['AUDITOR_EXT_FILTER'], list): | ||
| val = line.strip(' "()') | ||
| if val: config['AUDITOR_EXT_FILTER'].append(val.lower()) | ||
| continue | ||
|
|
||
| if '=' in line: | ||
| key, val = line.split('=', 1) | ||
| config[key] = val.strip(' "') | ||
|
|
||
| # Default fallbacks if parsing didn't catch arrays well | ||
| if 'EXCLUSIONS' not in config: config['EXCLUSIONS'] = [] | ||
| if 'AUDITOR_EXT_FILTER' not in config: config['AUDITOR_EXT_FILTER'] = [] | ||
| if 'AUDITOR_DB_NAME' not in config: config['AUDITOR_DB_NAME'] = 'auditor.db' | ||
|
|
||
| return config | ||
| try: | ||
| config = json.load(f) | ||
| return config | ||
| except json.JSONDecodeError as e: | ||
| print(f"Error parsing JSON configuration in {CONFIG_FILE}: {e}") | ||
| sys.exit(1) | ||
|
|
||
| CONFIG = load_config() | ||
|
|
||
| # Database setup | ||
| DB_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), CONFIG.get('AUDITOR_DB_NAME', 'auditor.db')) | ||
| DB_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), CONFIG.get('DB_NAME', 'auditor.db')) | ||
| LOG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs") | ||
|
|
||
| # Ensure logs dir exists | ||
|
|
@@ -127,7 +95,7 @@ def should_exclude(file_path): | |
| return True | ||
|
|
||
| # 2. Check extension filter (if defined) | ||
| ext_filter = CONFIG.get('AUDITOR_EXT_FILTER', []) | ||
| ext_filter = CONFIG.get('EXT_FILTER', []) | ||
| if ext_filter: | ||
| _, ext = os.path.splitext(file_path) | ||
| if ext.lower() not in ext_filter: | ||
|
Comment on lines
+98
to
101
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Useful? React with 👍 / 👎. |
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| { | ||
| "DB_NAME": "auditor.db", | ||
| "EXCLUSIONS": [ | ||
| ".git/", | ||
| "node_modules/", | ||
| "venv/", | ||
| ".cache/", | ||
| "tmp/", | ||
| "System Volume Information/", | ||
| "$RECYCLE.BIN/" | ||
| ], | ||
| "EXT_FILTER": [ | ||
| ".jpg", ".jpeg", ".png", ".heic", | ||
| ".cr2", ".arw", ".dng", ".tif", | ||
| ".mp4", ".mov", ".m4v", ".3gp", | ||
| ".pdf" | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| #!/usr/bin/env python3 | ||
| import os | ||
| import sys | ||
| from collections import Counter | ||
|
|
||
| def main(): | ||
| if len(sys.argv) < 2: | ||
| print("Usage: python3 find_extensions.py <target_directory> [output_file]") | ||
| sys.exit(1) | ||
|
|
||
| target_dir = sys.argv[1] | ||
| output_file = sys.argv[2] if len(sys.argv) > 2 else "extension_report.log" | ||
|
|
||
| if not os.path.isdir(target_dir): | ||
| print(f"Error: {target_dir} is not a valid directory.") | ||
| sys.exit(1) | ||
|
|
||
| print(f"Scanning {target_dir} for file extensions...") | ||
| ext_counts = Counter() | ||
|
|
||
| scanned = 0 | ||
| # Walk the directory tree | ||
| for root, dirs, files in os.walk(target_dir): | ||
| for file in files: | ||
| scanned += 1 | ||
| if scanned % 10000 == 0: | ||
| print(f"Scanned {scanned} files...") | ||
|
|
||
| # Extract extension and convert to lowercase | ||
| _, ext = os.path.splitext(file) | ||
| if ext: | ||
| ext_counts[ext.lower()] += 1 | ||
| else: | ||
| ext_counts["<no_extension>"] += 1 | ||
|
|
||
| print(f"\nScan complete. Total files processed: {scanned}") | ||
|
|
||
| # Save formatted report | ||
| with open(output_file, 'w') as f: | ||
| f.write(f"Extension Report for: {target_dir}\n") | ||
| f.write(f"Total Files: {scanned}\n") | ||
| f.write("-" * 40 + "\n") | ||
| f.write(f"{'Extension':<20} | {'Count':<10}\n") | ||
| f.write("-" * 40 + "\n") | ||
|
|
||
| for ext, count in ext_counts.most_common(): | ||
| f.write(f"{ext:<20} | {count:<10}\n") | ||
|
|
||
| print(f"Report saved to: {output_file}") | ||
| print("\nTop 15 most common extensions:") | ||
| for ext, count in ext_counts.most_common(15): | ||
| print(f" {ext:<15} : {count}") | ||
|
|
||
| if __name__ == "__main__": | ||
| main() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| import sys | ||
| import os | ||
| import tempfile | ||
| import pytest | ||
|
|
||
| # Ensure parent directory is in python path to import auditor | ||
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | ||
|
|
||
| def test_hash_calculation(): | ||
| from auditor import calculate_sha256 | ||
|
|
||
| # Create temporary file to hash | ||
| with tempfile.NamedTemporaryFile(delete=False) as f: | ||
| f.write(b"backup-utility-test") | ||
| temp_path = f.name | ||
|
|
||
| try: | ||
| # Pre-calculated SHA256 of "backup-utility-test" | ||
| expected = "48f23852dc21c9a38e8ffd9f743f847b4d7945b0f4b9006f1635cab462b7fa2b" | ||
| assert calculate_sha256(temp_path) == expected | ||
| finally: | ||
| os.remove(temp_path) | ||
|
|
||
| def test_should_exclude(monkeypatch): | ||
| import auditor | ||
|
|
||
| # Mock the configuration payload testing specific scenarios | ||
| mock_config = { | ||
| "EXCLUSIONS": [".git/", "node_modules/"], | ||
| "EXT_FILTER": [".jpg", ".mp4"] | ||
| } | ||
| monkeypatch.setattr(auditor, 'CONFIG', mock_config) | ||
|
|
||
| # Check directory exclusion rules | ||
| assert auditor.should_exclude("/my/path/.git/config") == True | ||
| assert auditor.should_exclude("/my/path/node_modules/index.js") == True | ||
|
|
||
| # Files missing from explicit Extension Filter SHOULD be excluded | ||
| assert auditor.should_exclude("/my/path/src/main.py") == True | ||
| assert auditor.should_exclude("/my/path/document.pdf") == True | ||
|
|
||
| # Acceptable files based on the mock filter | ||
| assert auditor.should_exclude("/my/path/photo.jpg") == False | ||
| assert auditor.should_exclude("/my/path/video.mp4") == False |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| import os | ||
| import json | ||
| import subprocess | ||
|
|
||
| def test_auditor_config_syntax(): | ||
| """Validates that auditor_config.json is valid JSON and contains required keys.""" | ||
| config_path = os.path.join(os.path.dirname(__file__), '..', 'auditor_config.json') | ||
| assert os.path.exists(config_path), "auditor_config.json does not exist" | ||
|
|
||
| with open(config_path, 'r') as f: | ||
| # Note: If this file has broken JSON syntax, json.load will throw a | ||
| # JSONDecodeError and correctly fail the Pytest suite! | ||
| config = json.load(f) | ||
|
|
||
| assert 'DB_NAME' in config | ||
| assert isinstance(config.get('EXCLUSIONS'), list) | ||
| assert isinstance(config.get('EXT_FILTER'), list) | ||
|
|
||
|
|
||
| def test_bash_config_syntax(): | ||
| """Validates that config.env can be safely sourced by bash without syntax errors.""" | ||
| config_path = os.path.join(os.path.dirname(__file__), '..', 'config.env') | ||
| assert os.path.exists(config_path), "config.env does not exist" | ||
|
|
||
| # Run bash -n (syntax check) on the config file | ||
| result = subprocess.run(['bash', '-n', config_path], capture_output=True, text=True) | ||
| assert result.returncode == 0, f"config.env bash syntax error: {result.stderr}" | ||
|
|
||
| # Evaluate if we can source it safely | ||
| result_source = subprocess.run(['bash', '-c', f'set -e; source {config_path}'], capture_output=True, text=True) | ||
| assert result_source.returncode == 0, f"Error sourcing config.env at runtime: {result_source.stderr}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This workflow is restricted to
masterfor bothpushandpull_request, so in environments where development happens onmain(as in this repository’s refs), the test job will not run at all and regressions can merge without validation. Please align these branch filters with the actual default/integration branch so CI executes on normal PR and push flows.Useful? React with 👍 / 👎.