From d01c5184f8d29d1d496637b20ea5b9340023424b Mon Sep 17 00:00:00 2001 From: Anil Maktala Date: Fri, 20 Feb 2026 15:30:03 -0500 Subject: [PATCH 1/2] feat(spam-detection): integrate AWS Bedrock for semantic spam detection - Replace regex-based spam patterns with AWS Bedrock Claude model for semantic analysis - Add support for obfuscated content detection including homoglyphs, Unicode tricks, and leetspeak - Implement confidence scoring with 0.85 threshold to reduce false positives - Add AWS credentials configuration (AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) to workflow - Update spam check result interface to include confidence score and reason explanation - Reduce batch size from 10 to 5 comments per batch to accommodate Bedrock API rate limits - Increase batch delay from 500ms to 1000ms for Bedrock rate limit compliance - Improve logging to show confidence scores and detection reasons instead of matched patterns - Make isSpamComment function async to support Bedrock API calls - Bedrock provides superior detection of sophisticated spam attempts that evade regex patterns --- .github/workflows/delete-spam-comments.yml | 3 + scripts/delete_spam_comments.ts | 65 +++++++++++++--------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/.github/workflows/delete-spam-comments.yml b/.github/workflows/delete-spam-comments.yml index 923a0112e26..972adc64b9b 100644 --- a/.github/workflows/delete-spam-comments.yml +++ b/.github/workflows/delete-spam-comments.yml @@ -70,6 +70,9 @@ jobs: }} COMMENT_BODY: ${{ github.event.comment.body || '' }} COMMENT_AUTHOR: ${{ github.event.comment.user.login || '' }} + + # If not set, the script falls back to its built-in default prompt. + SPAM_DETECTION_PROMPT: ${{ secrets.SPAM_DETECTION_PROMPT }} run: node dist/delete_spam_comments.js - name: Create workflow summary diff --git a/scripts/delete_spam_comments.ts b/scripts/delete_spam_comments.ts index b54bff12c6a..7dff314aaac 100644 --- a/scripts/delete_spam_comments.ts +++ b/scripts/delete_spam_comments.ts @@ -30,38 +30,39 @@ function createBedrockClient(): BedrockRuntimeClient { }); } +/** + * Sanitize comment body to prevent prompt injection. + * Strips null bytes and limits length; content is passed as a separate user + * message (never interpolated into the system prompt) so injection is not + * structurally possible, but we still normalise the input defensively. + */ +function sanitizeCommentBody(body: string): string { + return body + .replace(/\0/g, "") // strip null bytes + .substring(0, 2000) // hard cap — model doesn't need more + .trim(); +} + /** * Use Bedrock to semantically detect spam, including obfuscated/homoglyph content. + * The system prompt is required via the SPAM_DETECTION_PROMPT env var. + * The comment body is passed as a separate user message so it can never + * override or escape the system instructions. */ export async function isSpamComment(body: string): Promise { if (!body.trim()) { return { isSpam: false, reason: "Empty comment", confidence: 0 }; } - const client = createBedrockClient(); - - const prompt = `You are a spam detection system for GitHub issue comments. - -Analyze the following comment and determine if it is spam. Spam includes: -- Cryptocurrency scams or investment fraud -- Unsolicited promotions for Telegram/WhatsApp/Discord groups -- Fake profit or earnings claims ("guaranteed returns", "5x in 24h", etc.) -- Phishing or obfuscated URLs (e.g. hxxps://, xn-- punycode domains, defanged links) -- Any message designed to lure users into financial scams - -IMPORTANT: The comment may use Unicode tricks, homoglyphs (α→a, ø→o, 𝟛→3), leetspeak, or other obfuscation to evade filters. Analyze the intent, not just the literal characters. + const systemPrompt = process.env.SPAM_DETECTION_PROMPT; + if (!systemPrompt) { + throw new Error("Missing required environment variable: SPAM_DETECTION_PROMPT"); + } -Comment to analyze: - -${body.substring(0, 2000)} - + const client = createBedrockClient(); -Respond with JSON only: -{ - "is_spam": true | false, - "confidence": 0.0 to 1.0, - "reason": "brief explanation" -}`; + // Sanitize and isolate the comment — never interpolate into the system prompt. + const safeBody = sanitizeCommentBody(body); try { const responseBody = await retryWithBackoff(async () => { @@ -73,7 +74,9 @@ Respond with JSON only: anthropic_version: "bedrock-2023-05-31", max_tokens: 256, temperature: 0.1, - messages: [{ role: "user", content: prompt }], + system: systemPrompt, + // Comment is the sole user message — structurally isolated from instructions. + messages: [{ role: "user", content: safeBody }], }), }); const response = await client.send(command); @@ -204,6 +207,11 @@ async function main() { process.exit(1); } + if (!process.env.SPAM_DETECTION_PROMPT) { + console.error("Missing required environment variable: SPAM_DETECTION_PROMPT"); + process.exit(1); + } + const client = new Octokit({ auth: githubToken }); if (mode === "single" && commentId) { @@ -217,7 +225,10 @@ async function main() { } } -main().catch((err) => { - console.error("Fatal error:", err); - process.exit(1); -}); +// Only run when executed directly (not when imported by tests) +if (process.env.JEST_WORKER_ID === undefined) { + main().catch((err) => { + console.error("Fatal error:", err); + process.exit(1); + }); +} From 73c6ac4eff315ba55d83b4aeb10d9123f2a28d09 Mon Sep 17 00:00:00 2001 From: Anil Maktala Date: Tue, 3 Mar 2026 10:43:54 -0500 Subject: [PATCH 2/2] ci(spam-detection): refactor to event-driven checks with stricter validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace scheduled cleanup/bulk scan with single-comment event-driven flow - Add org membership gate for manual triggers and exempt org members from flagging - Implement two-pass Bedrock confirmation to reduce false positives - Raise confidence threshold 0.85 → 0.95 for stricter detection - Add org read permission and remove batch processing logic --- .github/workflows/delete-spam-comments.yml | 62 +++---- scripts/delete_spam_comments.ts | 187 ++++++++++++--------- 2 files changed, 142 insertions(+), 107 deletions(-) diff --git a/.github/workflows/delete-spam-comments.yml b/.github/workflows/delete-spam-comments.yml index 972adc64b9b..d87eb7fa6e9 100644 --- a/.github/workflows/delete-spam-comments.yml +++ b/.github/workflows/delete-spam-comments.yml @@ -3,30 +3,44 @@ name: Delete Spam Comments on: issue_comment: types: [created] - schedule: - # Run daily at 2 AM UTC for bulk cleanup of any missed spam - - cron: "0 2 * * *" workflow_dispatch: inputs: - scan_mode: - description: "Scan mode: 'bulk' scans all comments, 'single' requires comment_id" - required: false - default: "bulk" - type: choice - options: - - bulk - - single comment_id: - description: "Comment ID (only for single mode)" - required: false - type: string + description: "Comment ID to check for spam (numeric)" + required: true + type: number permissions: issues: write contents: read + organization: read jobs: + authorize: + if: github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + outputs: + allowed: ${{ steps.check.outputs.allowed }} + steps: + - name: Check org membership + id: check + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + STATUS=$(gh api -i "orgs/${{ github.repository_owner }}/members/${{ github.actor }}" 2>/dev/null | head -1 | awk '{print $2}') + if [ "$STATUS" = "204" ]; then + echo "allowed=true" >> "$GITHUB_OUTPUT" + else + echo "::error::@${{ github.actor }} is not an org member — manual trigger denied." + echo "allowed=false" >> "$GITHUB_OUTPUT" + fi + delete-spam: + needs: [authorize] + if: >- + always() + && (needs.authorize.result == 'skipped' && github.event.comment.user.type != 'Bot') + || (needs.authorize.result == 'success' && needs.authorize.outputs.allowed == 'true') runs-on: ubuntu-latest steps: - name: Checkout repository @@ -51,25 +65,11 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} REPOSITORY_OWNER: ${{ github.repository_owner }} REPOSITORY_NAME: ${{ github.event.repository.name }} - AWS_REGION: ${{ secrets.AWS_REGION || 'us-east-1' }} + AWS_REGION: ${{ secrets.AWS_REGION }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - # Single-comment mode (event-driven) - SCAN_MODE: >- - ${{ - github.event_name == 'issue_comment' - && 'single' - || inputs.scan_mode - || 'bulk' - }} - COMMENT_ID: >- - ${{ - github.event.comment.id - || inputs.comment_id - || '' - }} - COMMENT_BODY: ${{ github.event.comment.body || '' }} - COMMENT_AUTHOR: ${{ github.event.comment.user.login || '' }} + SCAN_MODE: single + COMMENT_ID: ${{ github.event.comment.id || inputs.comment_id }} # If not set, the script falls back to its built-in default prompt. SPAM_DETECTION_PROMPT: ${{ secrets.SPAM_DETECTION_PROMPT }} diff --git a/scripts/delete_spam_comments.ts b/scripts/delete_spam_comments.ts index 7dff314aaac..33bbdaa95d0 100644 --- a/scripts/delete_spam_comments.ts +++ b/scripts/delete_spam_comments.ts @@ -9,10 +9,9 @@ import { InvokeModelCommand, } from "@aws-sdk/client-bedrock-runtime"; import { retryWithBackoff } from "./retry_utils.js"; -import { checkRateLimit, processBatch } from "./rate_limit_utils.js"; const BEDROCK_MODEL_ID = "us.anthropic.claude-sonnet-4-20250514-v1:0"; -const CONFIDENCE_THRESHOLD = 0.85; +const CONFIDENCE_THRESHOLD = 0.95; export interface SpamCheckResult { isSpam: boolean; @@ -99,6 +98,86 @@ export async function isSpamComment(body: string): Promise { } } +/** + * Run a second independent Bedrock call to confirm a spam verdict. + * Uses a distinct system prompt asking the model to re-evaluate the comment + * with fresh reasoning. Only returns true if both passes agree. + */ +async function confirmSpam(body: string, firstResult: SpamCheckResult): Promise { + const client = createBedrockClient(); + const safeBody = sanitizeCommentBody(body); + + const confirmationPrompt = + "You are a spam detection reviewer. A previous check flagged the following comment as spam. " + + "Re-evaluate the comment independently from scratch. Respond with JSON: " + + '{"is_spam": boolean, "confidence": number, "reason": string}. ' + + "Be conservative — only confirm spam if you are highly confident."; + + try { + const responseBody = await retryWithBackoff(async () => { + const command = new InvokeModelCommand({ + modelId: BEDROCK_MODEL_ID, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify({ + anthropic_version: "bedrock-2023-05-31", + max_tokens: 256, + temperature: 0.1, + system: confirmationPrompt, + messages: [{ role: "user", content: safeBody }], + }), + }); + const response = await client.send(command); + return new TextDecoder().decode(response.body); + }); + + const parsed = JSON.parse(responseBody); + const text = parsed.content?.find((c: any) => c.type === "text")?.text ?? ""; + const jsonMatch = text.match(/\{[\s\S]*\}/); + if (!jsonMatch) throw new Error("No JSON in confirmation response"); + + const result = JSON.parse(jsonMatch[0]); + const confirmed = result.is_spam === true && (result.confidence ?? 0) >= CONFIDENCE_THRESHOLD; + console.log(`Confirmation pass: is_spam=${result.is_spam}, confidence=${result.confidence?.toFixed(2)}, reason=${result.reason}`); + return confirmed; + } catch (err) { + console.warn("Confirmation pass failed, defaulting to NOT spam:", err); + return false; + } +} + +/** + * Check if a user is a member of the repository's organization. + * Returns true for org members so their comments are never flagged as spam. + */ +async function isOrgMember( + client: Octokit, + org: string, + username: string +): Promise { + try { + await client.orgs.checkMembershipForUser({ org, username }); + // A successful response (204 or 302) without throwing means the user is a member + // or the requester can see the membership. Treat as member. + return true; + } catch { + // 404 means not a member (or org/user doesn't exist) + return false; + } +} + +async function fetchComment( + client: Octokit, + owner: string, + repo: string, + commentId: number +): Promise<{ body: string; author: string }> { + const { data } = await retryWithBackoff(() => + client.issues.getComment({ owner, repo, comment_id: commentId }) + ); + return { body: data.body ?? "", author: data.user?.login ?? "unknown" }; +} + async function deleteComment( client: Octokit, owner: string, @@ -119,6 +198,12 @@ async function processSingleComment( commentAuthor: string ): Promise { console.log(`Checking comment #${commentId} by @${commentAuthor}...`); + + if (await isOrgMember(client, owner, commentAuthor)) { + console.log(`@${commentAuthor} is an org member — skipping spam check.`); + return false; + } + const result = await isSpamComment(commentBody); if (!result.isSpam) { @@ -126,81 +211,31 @@ async function processSingleComment( return false; } - console.log(`Spam detected (confidence: ${result.confidence.toFixed(2)}). Reason: ${result.reason}`); - await deleteComment(client, owner, repo, commentId); - console.log(`Deleted spam comment #${commentId}`); - return true; -} + console.log(`First pass flagged spam (confidence: ${result.confidence.toFixed(2)}). Running confirmation pass...`); + const confirmed = await confirmSpam(commentBody, result); -async function bulkScanAndDelete( - client: Octokit, - owner: string, - repo: string -): Promise<{ scanned: number; deleted: number }> { - console.log(`Starting bulk spam scan for ${owner}/${repo}...`); - - let scanned = 0; - let deleted = 0; - let page = 1; - - while (true) { - await checkRateLimit(client); - - const { data: comments } = await retryWithBackoff(() => - client.issues.listCommentsForRepo({ - owner, - repo, - per_page: 100, - page, - sort: "created", - direction: "desc", - }) - ); - - if (comments.length === 0) break; - - console.log(`Processing page ${page} (${comments.length} comments)...`); - - const results = await processBatch( - comments, - 5, // smaller batch size — each item makes a Bedrock API call - async (comment) => { - scanned++; - const body = comment.body ?? ""; - const author = comment.user?.login ?? "unknown"; - const result = await isSpamComment(body); - - if (result.isSpam) { - console.log(`Spam in comment #${comment.id} by @${author}: ${result.reason}`); - try { - await deleteComment(client, owner, repo, comment.id); - console.log(`Deleted comment #${comment.id}`); - return true; - } catch (err) { - console.error(`Failed to delete comment #${comment.id}:`, err); - return false; - } - } - return false; - }, - 1000 // 1s delay between batches to respect Bedrock rate limits - ); - - deleted += results.filter(Boolean).length; - page++; + if (!confirmed) { + console.log(`Confirmation pass did NOT agree — keeping comment #${commentId}.`); + return false; } - return { scanned, deleted }; + await deleteComment(client, owner, repo, commentId); + console.log(`--- Audit Log: Deleted comment #${commentId} ---`); + console.log(`Author: @${commentAuthor}`); + console.log(`Confidence: ${result.confidence.toFixed(2)}`); + console.log(`Reason: ${result.reason}`); + console.log(`Body length: ${commentBody.length} chars`); + console.log(`Timestamp: ${new Date().toISOString()}`); + console.log(`---`); + return true; } + async function main() { const owner = process.env.REPOSITORY_OWNER || ""; const repo = process.env.REPOSITORY_NAME || ""; const githubToken = process.env.GITHUB_TOKEN || ""; const commentId = process.env.COMMENT_ID ? parseInt(process.env.COMMENT_ID) : null; - const commentBody = process.env.COMMENT_BODY ?? ""; - const commentAuthor = process.env.COMMENT_AUTHOR ?? "unknown"; - const mode = process.env.SCAN_MODE || (commentId ? "single" : "bulk"); if (!owner || !repo || !githubToken) { console.error("Missing required environment variables: REPOSITORY_OWNER, REPOSITORY_NAME, GITHUB_TOKEN"); @@ -212,17 +247,17 @@ async function main() { process.exit(1); } + if (!commentId) { + console.error("Missing required COMMENT_ID for spam check"); + process.exit(1); + } + const client = new Octokit({ auth: githubToken }); - if (mode === "single" && commentId) { - console.log(`=== Single Comment Spam Check (comment #${commentId}) ===`); - const deleted = await processSingleComment(client, owner, repo, commentId, commentBody, commentAuthor); - console.log(`\nSummary: ${deleted ? `Spam comment #${commentId} by @${commentAuthor} was deleted.` : `Comment #${commentId} is clean — no action taken.`}`); - } else { - console.log(`=== Bulk Spam Scan for ${owner}/${repo} ===`); - const { scanned, deleted } = await bulkScanAndDelete(client, owner, repo); - console.log(`\nSummary: Scanned ${scanned} comments, deleted ${deleted} spam comments.`); - } + console.log(`=== Single Comment Spam Check (comment #${commentId}) ===`); + const { body, author } = await fetchComment(client, owner, repo, commentId); + const deleted = await processSingleComment(client, owner, repo, commentId, body, author); + console.log(`\nSummary: ${deleted ? `Spam comment #${commentId} by @${author} was deleted.` : `Comment #${commentId} is clean — no action taken.`}`); } // Only run when executed directly (not when imported by tests)