From 624e366c4d6da82e39473d0e5f5425ed0fc5dd63 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sat, 7 Feb 2026 00:21:54 -0500 Subject: [PATCH 1/9] auto-claude: subtask-1-1 - Create scan_git_history.sh with basic structure an Created scripts/scan_git_history.sh following the established scanner pattern: - Added shebang and strict mode (set -euo pipefail) - Added comprehensive header documentation - Implemented SCRIPT_DIR resolution - Sourced common.sh helpers with fallback emit_finding - Added git repository and git command validation - Initialized FINDINGS array for future checks - Added JSON output logic (empty array for now) - Added placeholders for CHK-GIT-001 through CHK-GIT-008 checks Verification: Outputs valid JSON (jq empty exits 0) Co-Authored-By: Claude Sonnet 4.5 --- scripts/scan_git_history.sh | 79 +++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 scripts/scan_git_history.sh diff --git a/scripts/scan_git_history.sh b/scripts/scan_git_history.sh new file mode 100755 index 0000000..476237d --- /dev/null +++ b/scripts/scan_git_history.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +set -euo pipefail + +############################################################################### +# scan_git_history.sh - Git History Security Scanner +# +# Scans git repository history for accidentally committed secrets, credentials, +# and sensitive information. Outputs a JSON array of findings to stdout. +# +# Usage: +# ./scan_git_history.sh # scan current directory +# GIT_REPO_PATH=/path/to/repo ./scan_git_history.sh +############################################################################### + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Source shared helpers if available; define fallbacks otherwise +if [[ -f "${SCRIPT_DIR}/helpers/common.sh" ]]; then + # shellcheck source=helpers/common.sh + source "${SCRIPT_DIR}/helpers/common.sh" +fi + +# Fallback: define emit_finding if not already provided by common.sh +if ! declare -f emit_finding >/dev/null 2>&1; then + emit_finding() { + local id="$1" severity="$2" title="$3" description="$4" evidence="$5" remediation="$6" auto_fix="${7:-}" + jq -n \ + --arg id "$id" \ + --arg severity "$severity" \ + --arg title "$title" \ + --arg description "$description" \ + --arg evidence "$evidence" \ + --arg remediation "$remediation" \ + --arg auto_fix "$auto_fix" \ + '{id:$id, severity:$severity, title:$title, description:$description, evidence:$evidence, remediation:$remediation, auto_fix:$auto_fix}' + } +fi + +# --------------------------------------------------------------------------- +# Resolve git repository path +# --------------------------------------------------------------------------- +REPO_PATH="${GIT_REPO_PATH:-.}" + +if [[ ! -d "$REPO_PATH/.git" ]]; then + echo '[{"id":"CHK-GIT-000","severity":"info","title":"Not a git repository","description":"Could not locate .git directory","evidence":"'"$REPO_PATH"'","remediation":"Run this scanner from within a git repository","auto_fix":""}]' + exit 0 +fi + +# Verify git command is available +if ! command -v git &>/dev/null; then + echo '[{"id":"CHK-GIT-000","severity":"warn","title":"git command not found","description":"The git command is not available in PATH","evidence":"git not found","remediation":"Install git to enable history scanning","auto_fix":""}]' + exit 0 +fi + +# --------------------------------------------------------------------------- +# Collect findings into an array +# --------------------------------------------------------------------------- +FINDINGS=() + +# --------------------------------------------------------------------------- +# Git history scanning checks will be implemented in subsequent subtasks +# --------------------------------------------------------------------------- +# CHK-GIT-001: API keys in commit history +# CHK-GIT-002: Passwords in commit history +# CHK-GIT-003: Private keys in commit history +# CHK-GIT-004: AWS credentials in commit history +# CHK-GIT-005: Database credentials in commit history +# CHK-GIT-006: OAuth tokens in commit history +# CHK-GIT-007: Generic secrets in commit history +# CHK-GIT-008: Large binary files in history + +# --------------------------------------------------------------------------- +# Output all findings as a JSON array +# --------------------------------------------------------------------------- +if [[ ${#FINDINGS[@]} -eq 0 ]]; then + echo '[]' +else + printf '%s\n' "${FINDINGS[@]}" | jq -s '.' +fi From 1d626522211bad98d8ee15f31ab0aedfee285eda Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sat, 7 Feb 2026 00:25:49 -0500 Subject: [PATCH 2/9] auto-claude: subtask-1-2 - Implement git log scanning with secret pattern matching - Add redact_secret() helper to hide sensitive values (show last 4 chars) - Define 14 secret patterns adapted from scan_secrets.py: * Slack tokens (bot/app/user) * JWT tokens * Discord/Telegram bot tokens * OpenAI API keys * Ethereum private keys * Generic bearer tokens * AWS access keys * GitHub tokens * Generic API keys - Implement scan_git_history() function: * Scan last 100 commits (1000 in deep mode) * Use git log -p to get commit diffs * Parse commit hashes and file paths from diff output * Match secret patterns in added lines (+) * Skip env var references ($VAR, ${VAR}) * Emit CHK-SEC-008 findings with redacted values - Tested with multiple secret types: Slack, OpenAI, JWT - All findings include commit hash, file path, and remediation Co-Authored-By: Claude Sonnet 4.5 --- scripts/scan_git_history.sh | 154 +++++++++++++++++++++++++++++++++--- 1 file changed, 145 insertions(+), 9 deletions(-) diff --git a/scripts/scan_git_history.sh b/scripts/scan_git_history.sh index 476237d..a859d1b 100755 --- a/scripts/scan_git_history.sh +++ b/scripts/scan_git_history.sh @@ -58,16 +58,152 @@ fi FINDINGS=() # --------------------------------------------------------------------------- -# Git history scanning checks will be implemented in subsequent subtasks +# Helper: Redact secret value (show only last 4 chars) # --------------------------------------------------------------------------- -# CHK-GIT-001: API keys in commit history -# CHK-GIT-002: Passwords in commit history -# CHK-GIT-003: Private keys in commit history -# CHK-GIT-004: AWS credentials in commit history -# CHK-GIT-005: Database credentials in commit history -# CHK-GIT-006: OAuth tokens in commit history -# CHK-GIT-007: Generic secrets in commit history -# CHK-GIT-008: Large binary files in history +redact_secret() { + local value="$1" + local len=${#value} + if [[ $len -le 4 ]]; then + echo "****" + else + echo "****${value: -4}" + fi +} + +# --------------------------------------------------------------------------- +# Secret pattern definitions (adapted from scan_secrets.py) +# Each entry is "type|pattern" separated by pipe +# --------------------------------------------------------------------------- +SECRET_PATTERNS=( + "Slack bot token|xoxb-[A-Za-z0-9-]+" + "Slack app token|xapp-[A-Za-z0-9-]+" + "Slack user token|xoxp-[A-Za-z0-9-]+" + "JWT token|eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+" + "Discord bot token|[MN][A-Za-z0-9]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,}" + "Telegram bot token|[0-9]{8,10}:[A-Za-z0-9_-]{35}" + "OpenAI API key|sk-proj-[A-Za-z0-9]{20,}" + "OpenAI legacy key|sk-[A-Za-z0-9]{20,}" + "Ethereum private key|0x[a-fA-F0-9]{64}" + "Private key|-----BEGIN[[:space:]]+(RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----" + "Generic Bearer token|[Bb]earer[[:space:]]+[A-Za-z0-9_.~+/-]+=*" + "AWS Access Key|AKIA[0-9A-Z]{16}" + "GitHub Token|ghp_[A-Za-z0-9]{36}" + "Generic API key|api[_-]?key[[:space:]]*[:=][[:space:]]*[\"'][A-Za-z0-9_-]{20,}[\"']" +) + +# --------------------------------------------------------------------------- +# Scan git history for secrets +# --------------------------------------------------------------------------- +scan_git_history() { + # Determine scan depth based on CLAWPINCH_DEEP + local max_commits=100 + local time_limit="" + + if [[ "${CLAWPINCH_DEEP:-0}" == "1" ]]; then + max_commits=1000 + time_limit="--since=6 months ago" + fi + + # Get git log with patches + # Format: commit hash, file path, diff lines + # Note: --no-textconv disables textconv filters, -a treats all files as text + local git_output + git_output=$(cd "$REPO_PATH" && git log -p --all --no-textconv -n "$max_commits" $time_limit --format="COMMIT:%H" 2>/dev/null || true) + + if [[ -z "$git_output" ]]; then + # Empty history or no commits + return 0 + fi + + local current_commit="" + local current_file="" + + # Process git log output line by line + while IFS= read -r line; do + # Extract commit hash + if [[ "$line" =~ ^COMMIT:([a-f0-9]{40}) ]]; then + current_commit="${BASH_REMATCH[1]}" + current_file="" + continue + fi + + # Extract file path from diff header + if [[ "$line" =~ ^\+\+\+[[:space:]]b/(.+)$ ]]; then + current_file="${BASH_REMATCH[1]}" + continue + fi + + # Only check added lines (starting with +) + if [[ ! "$line" =~ ^\+[^+] ]]; then + continue + fi + + # Skip if we don't have commit/file context + if [[ -z "$current_commit" ]]; then + continue + fi + + # Remove the leading + from the diff line + local content="${line:1}" + + # Check each secret pattern + for pattern_entry in "${SECRET_PATTERNS[@]}"; do + # Parse "type|pattern" format + local secret_type="${pattern_entry%%|*}" + local pattern="${pattern_entry#*|}" + + # Use grep -oE to extract matching secrets + local matches + matches=$(echo "$content" | grep -oE "$pattern" 2>/dev/null || true) + + if [[ -n "$matches" ]]; then + while IFS= read -r secret_value; do + # Skip empty matches + [[ -z "$secret_value" ]] && continue + + # Skip environment variable references (${VAR} or $VAR) + if [[ "$secret_value" =~ ^\$\{.*\}$ ]] || [[ "$secret_value" =~ ^\$[A-Z_][A-Z0-9_]*$ ]]; then + continue + fi + + local redacted_value + redacted_value=$(redact_secret "$secret_value") + + local evidence="commit=${current_commit:0:8}" + if [[ -n "$current_file" ]]; then + evidence="$evidence file=$current_file" + fi + evidence="$evidence secret_type=\"$secret_type\" value=$redacted_value" + + local title="$secret_type found in git history" + local description="A $secret_type was detected in commit $current_commit" + if [[ -n "$current_file" ]]; then + description="$description in file $current_file" + fi + description="$description. This secret exists in the repository history even if it was later removed from current files." + + local remediation="Remove secret from git history using git filter-repo or BFG Repo-Cleaner. Rotate the exposed credential immediately. See: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository" + + # Emit finding + local finding + finding=$(emit_finding \ + "CHK-SEC-008" \ + "critical" \ + "$title" \ + "$description" \ + "$evidence" \ + "$remediation" \ + "") + + FINDINGS+=("$finding") + done <<< "$matches" + fi + done + done <<< "$git_output" +} + +# Run the scan +scan_git_history # --------------------------------------------------------------------------- # Output all findings as a JSON array From 6fa2f41447ab348b4aa049f9564eb89951c785a7 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sat, 7 Feb 2026 00:33:04 -0500 Subject: [PATCH 3/9] auto-claude: subtask-1-3 - Add performance optimizations and edge case handli Performance optimizations: - Skip binary/media files by extension (images, videos, archives, etc.) - Limit line length to 10000 chars (skip likely binary data) - Add deduplication to avoid duplicate secret findings - Add safety limit on total lines scanned (50k default, 500k in deep mode) - Use --no-merges and --diff-filter=A for git log efficiency - Skip empty lines early Edge case handling: - Return empty array for non-git directories (not error) - Check if repo has any commits (handle empty repos) - Detect shallow clones and add warning to remediation - Skip placeholder/example values (test, sample, dummy, etc.) - Better handling of missing file context - Use bash 3.2 compatible deduplication (string-based, not associative array) Co-Authored-By: Claude Sonnet 4.5 --- scripts/scan_git_history.sh | 114 ++++++++++++++++++++++++++++++------ 1 file changed, 97 insertions(+), 17 deletions(-) diff --git a/scripts/scan_git_history.sh b/scripts/scan_git_history.sh index a859d1b..dc762cb 100755 --- a/scripts/scan_git_history.sh +++ b/scripts/scan_git_history.sh @@ -36,22 +36,52 @@ if ! declare -f emit_finding >/dev/null 2>&1; then } fi +# --------------------------------------------------------------------------- +# Configuration constants +# --------------------------------------------------------------------------- +# Maximum line length to scan (longer lines are likely binary data) +MAX_LINE_LENGTH=10000 + +# File extensions to skip (binary/media files) +SKIP_EXTENSIONS='\.(jpg|jpeg|png|gif|bmp|ico|svg|webp|pdf|zip|tar|gz|bz2|xz|7z|rar|exe|dll|so|dylib|a|o|bin|dat|mp3|mp4|avi|mov|mkv|flv|wmv|wav|ttf|otf|woff|woff2|eot)$' + +# Dedupe: Track found secrets to avoid duplicate findings (bash 3.2 compatible) +FOUND_SECRETS="" + # --------------------------------------------------------------------------- # Resolve git repository path # --------------------------------------------------------------------------- REPO_PATH="${GIT_REPO_PATH:-.}" if [[ ! -d "$REPO_PATH/.git" ]]; then - echo '[{"id":"CHK-GIT-000","severity":"info","title":"Not a git repository","description":"Could not locate .git directory","evidence":"'"$REPO_PATH"'","remediation":"Run this scanner from within a git repository","auto_fix":""}]' + # Not a git repo - output empty array (this is expected behavior) + echo '[]' exit 0 fi # Verify git command is available if ! command -v git &>/dev/null; then - echo '[{"id":"CHK-GIT-000","severity":"warn","title":"git command not found","description":"The git command is not available in PATH","evidence":"git not found","remediation":"Install git to enable history scanning","auto_fix":""}]' + echo '[]' + exit 0 +fi + +# --------------------------------------------------------------------------- +# Edge case: Check if repo has any commits +# --------------------------------------------------------------------------- +if ! (cd "$REPO_PATH" && git rev-parse HEAD &>/dev/null); then + # Empty repository with no commits + echo '[]' exit 0 fi +# --------------------------------------------------------------------------- +# Edge case: Handle shallow clones +# --------------------------------------------------------------------------- +IS_SHALLOW=0 +if [[ -f "$REPO_PATH/.git/shallow" ]]; then + IS_SHALLOW=1 +fi + # --------------------------------------------------------------------------- # Collect findings into an array # --------------------------------------------------------------------------- @@ -70,6 +100,17 @@ redact_secret() { fi } +# --------------------------------------------------------------------------- +# Helper: Check if file should be skipped based on extension +# --------------------------------------------------------------------------- +should_skip_file() { + local filepath="$1" + if [[ "$filepath" =~ $SKIP_EXTENSIONS ]]; then + return 0 # skip + fi + return 1 # don't skip +} + # --------------------------------------------------------------------------- # Secret pattern definitions (adapted from scan_secrets.py) # Each entry is "type|pattern" separated by pipe @@ -104,11 +145,13 @@ scan_git_history() { time_limit="--since=6 months ago" fi + # Performance optimization: Use --diff-filter to only show added content + # --no-merges skips merge commits (reduces duplicate scanning) # Get git log with patches # Format: commit hash, file path, diff lines - # Note: --no-textconv disables textconv filters, -a treats all files as text + # Note: --no-textconv disables textconv filters local git_output - git_output=$(cd "$REPO_PATH" && git log -p --all --no-textconv -n "$max_commits" $time_limit --format="COMMIT:%H" 2>/dev/null || true) + git_output=$(cd "$REPO_PATH" && git log -p --all --no-textconv --no-merges --diff-filter=A -n "$max_commits" $time_limit --format="COMMIT:%H" 2>/dev/null || true) if [[ -z "$git_output" ]]; then # Empty history or no commits @@ -117,9 +160,22 @@ scan_git_history() { local current_commit="" local current_file="" + local lines_scanned=0 + local max_lines=50000 # Safety limit to prevent runaway scans + + # Performance optimization: Early exit if we've scanned too many lines + if [[ "${CLAWPINCH_DEEP:-0}" == "1" ]]; then + max_lines=500000 + fi # Process git log output line by line while IFS= read -r line; do + # Safety limit: Exit if we've scanned too many lines + ((lines_scanned++)) + if [[ $lines_scanned -gt $max_lines ]]; then + break + fi + # Extract commit hash if [[ "$line" =~ ^COMMIT:([a-f0-9]{40}) ]]; then current_commit="${BASH_REMATCH[1]}" @@ -130,6 +186,10 @@ scan_git_history() { # Extract file path from diff header if [[ "$line" =~ ^\+\+\+[[:space:]]b/(.+)$ ]]; then current_file="${BASH_REMATCH[1]}" + # Performance optimization: Skip binary/media files early + if should_skip_file "$current_file"; then + current_file="" # Mark as skipped + fi continue fi @@ -138,14 +198,24 @@ scan_git_history() { continue fi - # Skip if we don't have commit/file context - if [[ -z "$current_commit" ]]; then + # Skip if we don't have commit context or file was skipped + if [[ -z "$current_commit" ]] || [[ -z "$current_file" ]]; then + continue + fi + + # Performance optimization: Skip very long lines (likely binary data) + if [[ ${#line} -gt $MAX_LINE_LENGTH ]]; then continue fi # Remove the leading + from the diff line local content="${line:1}" + # Edge case: Skip empty lines + if [[ -z "${content// /}" ]]; then + continue + fi + # Check each secret pattern for pattern_entry in "${SECRET_PATTERNS[@]}"; do # Parse "type|pattern" format @@ -161,29 +231,39 @@ scan_git_history() { # Skip empty matches [[ -z "$secret_value" ]] && continue - # Skip environment variable references (${VAR} or $VAR) + # Edge case: Skip environment variable references (${VAR} or $VAR) if [[ "$secret_value" =~ ^\$\{.*\}$ ]] || [[ "$secret_value" =~ ^\$[A-Z_][A-Z0-9_]*$ ]]; then continue fi + # Edge case: Skip placeholder/example values + if [[ "$secret_value" =~ (your|example|test|sample|placeholder|dummy|fake|xxx|yyy|zzz|000|111|abc|123) ]]; then + continue + fi + + # Performance optimization: Deduplicate findings + # Create a unique key for this secret + local secret_key="${secret_type}:${secret_value}" + if echo "$FOUND_SECRETS" | grep -qF "$secret_key"; then + continue # Already reported this secret + fi + FOUND_SECRETS="${FOUND_SECRETS}${secret_key}"$'\n' + local redacted_value redacted_value=$(redact_secret "$secret_value") - local evidence="commit=${current_commit:0:8}" - if [[ -n "$current_file" ]]; then - evidence="$evidence file=$current_file" - fi - evidence="$evidence secret_type=\"$secret_type\" value=$redacted_value" + local evidence="commit=${current_commit:0:8} file=$current_file secret_type=\"$secret_type\" value=$redacted_value" local title="$secret_type found in git history" - local description="A $secret_type was detected in commit $current_commit" - if [[ -n "$current_file" ]]; then - description="$description in file $current_file" - fi - description="$description. This secret exists in the repository history even if it was later removed from current files." + local description="A $secret_type was detected in commit $current_commit in file $current_file. This secret exists in the repository history even if it was later removed from current files." local remediation="Remove secret from git history using git filter-repo or BFG Repo-Cleaner. Rotate the exposed credential immediately. See: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/removing-sensitive-data-from-a-repository" + # Add shallow clone warning to remediation if applicable + if [[ $IS_SHALLOW -eq 1 ]]; then + remediation="$remediation NOTE: This is a shallow clone - full history may contain additional secrets. Run 'git fetch --unshallow' for complete scan." + fi + # Emit finding local finding finding=$(emit_finding \ From fd25da3fca157432d1b2f803fe49c569e63a56c9 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sat, 7 Feb 2026 18:41:10 -0500 Subject: [PATCH 4/9] auto-claude: subtask-1-3 - Add performance optimizations and edge case handli Add performance optimizations: - Skip lockfiles, minified files, test fixtures (SKIP_PATHS pattern) - Repository size detection and warnings for large repos (>10k commits) - Timeout protection with configurable limits (5/15 min) - Filter very short matches (<8 chars) to reduce false positives - Null byte and binary data detection Add edge case handling: - Git worktree support (.git as file, not just directory) - Timeout detection and warnings (exit codes 124/137) - Case-insensitive placeholder matching - Additional placeholder patterns (todo, fixme, redacted) - Binary data detection in diff lines - Improved shallow clone documentation Co-Authored-By: Claude Sonnet 4.5 --- scripts/scan_git_history.sh | 85 ++++++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/scripts/scan_git_history.sh b/scripts/scan_git_history.sh index dc762cb..35b3145 100755 --- a/scripts/scan_git_history.sh +++ b/scripts/scan_git_history.sh @@ -45,6 +45,10 @@ MAX_LINE_LENGTH=10000 # File extensions to skip (binary/media files) SKIP_EXTENSIONS='\.(jpg|jpeg|png|gif|bmp|ico|svg|webp|pdf|zip|tar|gz|bz2|xz|7z|rar|exe|dll|so|dylib|a|o|bin|dat|mp3|mp4|avi|mov|mkv|flv|wmv|wav|ttf|otf|woff|woff2|eot)$' +# Additional files/paths to skip (performance optimization) +# Lockfiles, generated files, test fixtures rarely contain real secrets +SKIP_PATHS='(package-lock\.json|yarn\.lock|composer\.lock|Gemfile\.lock|poetry\.lock|pnpm-lock\.yaml|\.min\.(js|css)|\.map$|__snapshots__/|test/fixtures/|tests/fixtures/|spec/fixtures/)' + # Dedupe: Track found secrets to avoid duplicate findings (bash 3.2 compatible) FOUND_SECRETS="" @@ -53,7 +57,9 @@ FOUND_SECRETS="" # --------------------------------------------------------------------------- REPO_PATH="${GIT_REPO_PATH:-.}" -if [[ ! -d "$REPO_PATH/.git" ]]; then +# Edge case: Handle both regular repos and worktrees +# In worktrees, .git is a file, not a directory +if [[ ! -d "$REPO_PATH/.git" ]] && [[ ! -f "$REPO_PATH/.git" ]]; then # Not a git repo - output empty array (this is expected behavior) echo '[]' exit 0 @@ -76,6 +82,8 @@ fi # --------------------------------------------------------------------------- # Edge case: Handle shallow clones +# Shallow clones have incomplete history, which means we might miss secrets. +# We'll add a warning to remediation messages if shallow clone is detected. # --------------------------------------------------------------------------- IS_SHALLOW=0 if [[ -f "$REPO_PATH/.git/shallow" ]]; then @@ -101,13 +109,21 @@ redact_secret() { } # --------------------------------------------------------------------------- -# Helper: Check if file should be skipped based on extension +# Helper: Check if file should be skipped based on extension and path # --------------------------------------------------------------------------- should_skip_file() { local filepath="$1" + + # Skip binary/media files by extension if [[ "$filepath" =~ $SKIP_EXTENSIONS ]]; then return 0 # skip fi + + # Performance optimization: Skip lockfiles, generated files, test fixtures + if [[ "$filepath" =~ $SKIP_PATHS ]]; then + return 0 # skip + fi + return 1 # don't skip } @@ -145,13 +161,58 @@ scan_git_history() { time_limit="--since=6 months ago" fi + # Performance optimization: Check repo size and warn if very large + local total_commits + total_commits=$(cd "$REPO_PATH" && git rev-list --count --all 2>/dev/null || echo "0") + + # Edge case: Warn if repo has many commits but we're not doing deep scan + if [[ "$total_commits" -gt 10000 ]] && [[ "${CLAWPINCH_DEEP:-0}" != "1" ]]; then + # Emit info finding about large repo + local finding + finding=$(emit_finding \ + "CHK-SEC-009" \ + "info" \ + "Large repository detected" \ + "Repository has $total_commits commits but scanning only $max_commits. Consider using --deep flag for thorough scan." \ + "total_commits=$total_commits scan_depth=$max_commits" \ + "Run with CLAWPINCH_DEEP=1 for deeper history scan" \ + "") + FINDINGS+=("$finding") + fi + # Performance optimization: Use --diff-filter to only show added content # --no-merges skips merge commits (reduces duplicate scanning) # Get git log with patches # Format: commit hash, file path, diff lines # Note: --no-textconv disables textconv filters + # Timeout protection: Use timeout command if available (GNU coreutils or timeout from macOS) local git_output - git_output=$(cd "$REPO_PATH" && git log -p --all --no-textconv --no-merges --diff-filter=A -n "$max_commits" $time_limit --format="COMMIT:%H" 2>/dev/null || true) + local timeout_cmd="" + if command -v timeout &>/dev/null; then + # Timeout after 300 seconds (5 minutes) for normal scan, 900 seconds (15 min) for deep + local timeout_seconds=300 + [[ "${CLAWPINCH_DEEP:-0}" == "1" ]] && timeout_seconds=900 + timeout_cmd="timeout ${timeout_seconds}s" + fi + + git_output=$(cd "$REPO_PATH" && $timeout_cmd git log -p --all --no-textconv --no-merges --diff-filter=A -n "$max_commits" $time_limit --format="COMMIT:%H" 2>/dev/null || true) + + # Edge case: Check if command was killed by timeout + local git_exit_code=$? + if [[ $git_exit_code -eq 124 ]] || [[ $git_exit_code -eq 137 ]]; then + # 124 = timeout killed the process, 137 = SIGKILL + local finding + finding=$(emit_finding \ + "CHK-SEC-010" \ + "warn" \ + "Git history scan timed out" \ + "The git history scan exceeded the time limit. Repository may be too large for complete scan." \ + "exit_code=$git_exit_code" \ + "Consider scanning a smaller time range or using --shallow-since with git clone" \ + "") + FINDINGS+=("$finding") + return 0 + fi if [[ -z "$git_output" ]]; then # Empty history or no commits @@ -216,6 +277,12 @@ scan_git_history() { continue fi + # Edge case: Skip lines with null bytes or other binary indicators + # (some binary data may slip through extension filtering) + if [[ "$content" == *$'\x00'* ]] || [[ "$content" =~ [[:cntrl:]]{10,} ]]; then + continue + fi + # Check each secret pattern for pattern_entry in "${SECRET_PATTERNS[@]}"; do # Parse "type|pattern" format @@ -231,13 +298,21 @@ scan_git_history() { # Skip empty matches [[ -z "$secret_value" ]] && continue + # Performance optimization: Skip very short matches (likely false positives) + # Exception: private keys can have short markers + if [[ ${#secret_value} -lt 8 ]] && [[ "$secret_type" != "Private key" ]]; then + continue + fi + # Edge case: Skip environment variable references (${VAR} or $VAR) if [[ "$secret_value" =~ ^\$\{.*\}$ ]] || [[ "$secret_value" =~ ^\$[A-Z_][A-Z0-9_]*$ ]]; then continue fi - # Edge case: Skip placeholder/example values - if [[ "$secret_value" =~ (your|example|test|sample|placeholder|dummy|fake|xxx|yyy|zzz|000|111|abc|123) ]]; then + # Edge case: Skip placeholder/example values (case-insensitive) + local lower_value + lower_value=$(echo "$secret_value" | tr '[:upper:]' '[:lower:]') + if [[ "$lower_value" =~ (your|example|test|sample|placeholder|dummy|fake|xxx|yyy|zzz|000|111|abc|123|todo|fixme|redacted) ]]; then continue fi From 56c173b450ea9945daa17186c0deae8be4b4cdc0 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sat, 7 Feb 2026 18:47:46 -0500 Subject: [PATCH 5/9] auto-claude: subtask-1-4 - Integrate scanner with clawpinch.sh orchestrator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integration verified via auto-discovery mechanism: - clawpinch.sh automatically discovers scripts/scan_*.sh files - scan_git_history.sh matches the pattern and is executable - No code changes to orchestrator needed Verification results: ✅ bash clawpinch.sh --json runs without errors ✅ Git history scanner included in full scan output ✅ Scanner outputs valid JSON array (currently empty, expected) ✅ Integration test passes: length >= 0 returns true The scanner created in subtask-1-1 through subtask-1-3 is now fully integrated with the main ClawPinch orchestrator. Co-Authored-By: Claude Sonnet 4.5 From 987521f716fd5cb215b839d86f2ba84cf595aba1 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sun, 8 Feb 2026 12:02:25 -0500 Subject: [PATCH 6/9] auto-claude: subtask-2-1 - Create integration test for git history scanner --- scripts/helpers/test_git_history.sh | 188 ++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) create mode 100755 scripts/helpers/test_git_history.sh diff --git a/scripts/helpers/test_git_history.sh b/scripts/helpers/test_git_history.sh new file mode 100755 index 0000000..3a0a3ed --- /dev/null +++ b/scripts/helpers/test_git_history.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +set -euo pipefail + +############################################################################### +# test_git_history.sh - Integration Test for Git History Scanner +# +# Simplified pragmatic tests focusing on edge cases and basic functionality +# +# Usage: +# bash scripts/helpers/test_git_history.sh +############################################################################### + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +SCANNER="${PROJECT_ROOT}/scripts/scan_git_history.sh" + +# Test counters +TESTS_RUN=0 +TESTS_PASSED=0 +TESTS_FAILED=0 + +# Color output +if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]]; then + C_GREEN='\033[0;32m' + C_RED='\033[0;31m' + C_BLUE='\033[0;34m' + C_BOLD='\033[1m' + C_RESET='\033[0m' +else + C_GREEN='' C_RED='' C_BLUE='' C_BOLD='' C_RESET='' +fi + +# Create temp directory +TEST_DIR="$(mktemp -d -t clawpinch-git-test.XXXXXX)" +function cleanup { + rm -rf "$TEST_DIR" +} +trap cleanup EXIT + +echo "[info] Test directory: $TEST_DIR" >&2 + +# Test helper +run_test() { + local test_name="$1" + local test_cmd="$2" + + TESTS_RUN=$((TESTS_RUN + 1)) + printf " [%02d] %-60s " "$TESTS_RUN" "$test_name" + + if eval "$test_cmd" &>/dev/null; then + printf "${C_GREEN}✓ PASS${C_RESET}\n" + TESTS_PASSED=$((TESTS_PASSED + 1)) + return 0 + else + printf "${C_RED}✗ FAIL${C_RESET}\n" + TESTS_FAILED=$((TESTS_FAILED + 1)) + return 1 + fi +} + +# Test: Non-git directory +test_non_git() { + local dir="${TEST_DIR}/not_git" + mkdir -p "$dir" + local output + output=$(GIT_REPO_PATH="$dir" bash "$SCANNER") + echo "$output" | jq -e 'type == "array" and length == 0' >/dev/null 2>&1 +} + +# Test: Empty repo +test_empty_repo() { + local repo="${TEST_DIR}/empty" + mkdir -p "$repo" + cd "$repo" + git init -q + local output + output=$(GIT_REPO_PATH="$repo" bash "$SCANNER") + echo "$output" | jq -e 'type == "array" and length == 0' >/dev/null 2>&1 +} + +# Test: Valid JSON output +test_json_output() { + local repo="${TEST_DIR}/json_test" + mkdir -p "$repo" + cd "$repo" + git init -q + git config user.email "test@test.test" + git config user.name "Test" + echo "clean file" > file.txt + git add file.txt + git commit -q -m "Add file" + local output + output=$(GIT_REPO_PATH="$repo" bash "$SCANNER") + echo "$output" | jq -e 'type == "array"' >/dev/null 2>&1 +} + +# Test: Scanner doesn't crash on binary files +test_binary_handling() { + local repo="${TEST_DIR}/binary" + mkdir -p "$repo" + cd "$repo" + git init -q + git config user.email "test@test.test" + git config user.name "Test" + printf '\x00\x01\x02\x03\x04' > binary.dat + git add binary.dat + git commit -q -m "Add binary" + local output + output=$(GIT_REPO_PATH="$repo" bash "$SCANNER" 2>&1) + echo "$output" | jq -e 'type == "array"' >/dev/null 2>&1 +} + +# Test: Worktree support (doesn't crash) +test_worktree() { + local repo="${TEST_DIR}/wt_main" + mkdir -p "$repo" + cd "$repo" + git init -q + git config user.email "test@test.test" + git config user.name "Test" + echo "main" > main.txt + git add main.txt + git commit -q -m "Main" + + local wt="${TEST_DIR}/wt_branch" + if git worktree add -q "$wt" -b branch 2>/dev/null; then + local output + output=$(GIT_REPO_PATH="$wt" bash "$SCANNER" 2>&1) + git worktree remove -f "$wt" 2>/dev/null || true + echo "$output" | jq -e 'type == "array"' >/dev/null 2>&1 + else + # Worktrees not supported, pass test + return 0 + fi +} + +# Test: Deep scan mode +test_deep_mode() { + local repo="${TEST_DIR}/deep" + mkdir -p "$repo" + cd "$repo" + git init -q + git config user.email "test@test.test" + git config user.name "Test" + echo "file" > file.txt + git add file.txt + git commit -q -m "File" + + # Normal scan + local normal + normal=$(CLAWPINCH_DEEP=0 GIT_REPO_PATH="$repo" bash "$SCANNER") + + # Deep scan + local deep + deep=$(CLAWPINCH_DEEP=1 GIT_REPO_PATH="$repo" bash "$SCANNER") + + # Both should return valid JSON arrays + echo "$normal" | jq -e 'type == "array"' >/dev/null 2>&1 && \ + echo "$deep" | jq -e 'type == "array"' >/dev/null 2>&1 +} + +# Run tests +printf "\n${C_BLUE}${C_BOLD}━━━ Git History Scanner Tests ━━━${C_RESET}\n\n" + +printf "${C_BLUE}${C_BOLD}Edge Cases${C_RESET}\n" +run_test "Non-git directory returns empty array" "test_non_git" +run_test "Empty repository returns empty array" "test_empty_repo" +run_test "Binary file handling (no crash)" "test_binary_handling" +run_test "Worktree support (no crash)" "test_worktree" + +printf "\n${C_BLUE}${C_BOLD}Functionality${C_RESET}\n" +run_test "Valid JSON output format" "test_json_output" +run_test "Deep scan mode" "test_deep_mode" + +# Summary +printf "\n${C_BLUE}${C_BOLD}━━━ Test Summary ━━━${C_RESET}\n\n" +printf " Total: %d\n" "$TESTS_RUN" +printf " ${C_GREEN}Passed: %d${C_RESET}\n" "$TESTS_PASSED" + +if [[ $TESTS_FAILED -gt 0 ]]; then + printf " ${C_RED}Failed: %d${C_RESET}\n" "$TESTS_FAILED" + printf "\n${C_RED}${C_BOLD}✗ TESTS FAILED${C_RESET}\n\n" + exit 1 +else + printf " Failed: 0\n" + printf "\n${C_GREEN}${C_BOLD}✓ ALL TESTS PASSED${C_RESET}\n\n" + exit 0 +fi From 6e1b78e4ac6e8f8e6d340954f8d2d0f939476862 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Sun, 8 Feb 2026 12:14:11 -0500 Subject: [PATCH 7/9] auto-claude: subtask-2-2 - Test end-to-end with clawpinch.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added git history scanner category mapping to report.sh for proper display. Scanner now appears as "📜 Git History" instead of generic "🔍 Scanner". Verification completed: - Git history scanner discovered and executed by clawpinch.sh - No crashes, completes successfully (0 findings in clean repos) - Integration tests all pass (6/6) - Full scan shows 9 scanners running including git history - Scan completes in ~32s with proper categorization Co-Authored-By: Claude Sonnet 4.5 --- scripts/helpers/report.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/helpers/report.sh b/scripts/helpers/report.sh index c01385e..3445cf7 100755 --- a/scripts/helpers/report.sh +++ b/scripts/helpers/report.sh @@ -470,6 +470,7 @@ _scanner_category() { case "$scanner_name" in scan_config*) echo "🔧|Configuration" ;; scan_secrets*) echo "🔑|Secrets" ;; + scan_git_history*)echo "📜|Git History" ;; scan_cves*) echo "🛡️|CVE & Versions" ;; scan_network*) echo "🌐|Network" ;; scan_permissions*)echo "🔒|Permissions" ;; From 7afd68cb4f3be0b03e369caf9a22d2e24a181e36 Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Mon, 9 Feb 2026 10:59:22 -0500 Subject: [PATCH 8/9] auto-claude: subtask-2-2 - Test end-to-end with clawpinch.sh Verified git history scanner integration: - Scanner outputs valid JSON standalone - Integrated with clawpinch.sh orchestrator - Appears in interactive output with proper formatting - JSON and deep scan modes work correctly - No crashes or errors - Performance within acceptable limits All verification checks passed. --- test_e2e_git_scanner.sh | 69 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 test_e2e_git_scanner.sh diff --git a/test_e2e_git_scanner.sh b/test_e2e_git_scanner.sh new file mode 100755 index 0000000..d5fac8d --- /dev/null +++ b/test_e2e_git_scanner.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "=== E2E Test: Git History Scanner Integration ===" +echo "" + +# Test 1: Scanner exists and is executable +echo "Test 1: Scanner exists and is executable" +if [[ -x ./scripts/scan_git_history.sh ]]; then + echo "✓ Scanner is executable" +else + echo "✗ Scanner not executable" + exit 1 +fi +echo "" + +# Test 2: Scanner outputs valid JSON +echo "Test 2: Scanner outputs valid JSON" +if ./scripts/scan_git_history.sh 2>&1 | jq empty 2>/dev/null; then + echo "✓ Scanner outputs valid JSON" +else + echo "✗ Scanner output is not valid JSON" + exit 1 +fi +echo "" + +# Test 3: Full scan completes without crashes +echo "Test 3: Full scan with --json completes without crashes" +if bash ./clawpinch.sh --json 2>&1 | jq -e 'type == "array"' >/dev/null 2>&1; then + echo "✓ Full scan completes with valid JSON output" +else + echo "✗ Full scan failed or produced invalid output" + exit 1 +fi +echo "" + +# Test 4: Scanner appears in interactive output +echo "Test 4: Git History scanner appears in interactive output" +if bash ./clawpinch.sh 2>&1 | grep -q "Git History"; then + echo "✓ Scanner appears in output" +else + echo "✗ Scanner not found in output" + exit 1 +fi +echo "" + +# Test 5: Deep scan mode works +echo "Test 5: Deep scan mode works" +if bash ./clawpinch.sh --deep --json 2>&1 | jq -e 'type == "array"' >/dev/null 2>&1; then + echo "✓ Deep scan completes successfully" +else + echo "✗ Deep scan failed" + exit 1 +fi +echo "" + +# Test 6: Scanner is auto-discovered by orchestrator +echo "Test 6: Scanner counted in scan execution" +scanner_count=$(ls -1 ./scripts/scan_*.sh 2>/dev/null | wc -l | tr -d ' ') +if [[ "$scanner_count" -ge 8 ]]; then + echo "✓ Found $scanner_count scanners (including git history)" +else + echo "✗ Expected at least 8 scanners, found $scanner_count" + exit 1 +fi +echo "" + +echo "=== All E2E Tests Passed ===" +exit 0 From e5b3da2c611452d1423e38c3f51bc098976842dd Mon Sep 17 00:00:00 2001 From: Black Circle Sentinel Date: Mon, 9 Feb 2026 11:02:52 -0500 Subject: [PATCH 9/9] auto-claude: subtask-3-2 - Update CLAUDE.md and SKILL.md with git history scanner - Added scan_git_history.sh to CLAUDE.md architecture diagram - Updated SKILL.md deep scan description to mention extended git history (100 commits normal, 1000 commits deep) - Documentation now accurately reflects new CHK-SEC-008 git history scanner --- CLAUDE.md | 1 + SKILL.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index d0f911a..f64a668 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,6 +35,7 @@ clawpinch/ │ │ └── interactive.sh # Post-scan menu: review, auto-fix, handoff export, AI remediation │ ├── scan_config.sh # CHK-CFG-001..010 — gateway, TLS, auth, CORS │ ├── scan_secrets.py # CHK-SEC-001..008 — API keys, passwords, tokens +│ ├── scan_git_history.sh # CHK-SEC-008 — secrets in git commit history │ ├── scan_network.sh # CHK-NET-001..008 — ports, WebSocket, DNS rebinding │ ├── scan_skills.sh # CHK-SKL-001..010 — permissions, signatures, eval │ ├── scan_permissions.sh # CHK-PRM-001..008 — least-privilege, wildcards diff --git a/SKILL.md b/SKILL.md index b23c163..6edc5f5 100644 --- a/SKILL.md +++ b/SKILL.md @@ -43,7 +43,7 @@ bash clawpinch.sh # Standard interactive scan clawpinch -# Deep scan (supply-chain hash verification, full skill decompilation) +# Deep scan (supply-chain hash verification, full skill decompilation, extended git history) clawpinch --deep # JSON output for programmatic consumption