From d01c5184f8d29d1d496637b20ea5b9340023424b Mon Sep 17 00:00:00 2001
From: Anil Maktala <amaktala@amazon.com>
Date: Fri, 20 Feb 2026 15:30:03 -0500
Subject: [PATCH 1/2] feat(spam-detection): integrate AWS Bedrock for semantic
 spam detection

- Replace regex-based spam patterns with AWS Bedrock Claude model for semantic analysis
- Add support for obfuscated content detection including homoglyphs, Unicode tricks, and leetspeak
- Implement confidence scoring with 0.85 threshold to reduce false positives
- Add AWS credentials configuration (AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) to workflow
- Update spam check result interface to include confidence score and reason explanation
- Reduce batch size from 10 to 5 comments per batch to accommodate Bedrock API rate limits
- Increase batch delay from 500ms to 1000ms for Bedrock rate limit compliance
- Improve logging to show confidence scores and detection reasons instead of matched patterns
- Make isSpamComment function async to support Bedrock API calls
- Bedrock provides superior detection of sophisticated spam attempts that evade regex patterns
---
 .github/workflows/delete-spam-comments.yml |  3 +
 scripts/delete_spam_comments.ts            | 65 +++++++++++++---------
 2 files changed, 41 insertions(+), 27 deletions(-)
diff --git a/.github/workflows/delete-spam-comments.yml b/.github/workflows/delete-spam-comments.yml
index 923a0112e26..972adc64b9b 100644
--- a/.github/workflows/delete-spam-comments.yml
+++ b/.github/workflows/delete-spam-comments.yml
@@ -70,6 +70,9 @@ jobs:
             }}
           COMMENT_BODY: ${{ github.event.comment.body || '' }}
           COMMENT_AUTHOR: ${{ github.event.comment.user.login || '' }}
+
+          # If not set, the script falls back to its built-in default prompt.
+          SPAM_DETECTION_PROMPT: ${{ secrets.SPAM_DETECTION_PROMPT }}
         run: node dist/delete_spam_comments.js
 
       - name: Create workflow summary
diff --git a/scripts/delete_spam_comments.ts b/scripts/delete_spam_comments.ts
index b54bff12c6a..7dff314aaac 100644
--- a/scripts/delete_spam_comments.ts
+++ b/scripts/delete_spam_comments.ts
@@ -30,38 +30,39 @@ function createBedrockClient(): BedrockRuntimeClient {
   });
 }
 
+/**
+ * Sanitize comment body to prevent prompt injection.
+ * Strips null bytes and limits length; content is passed as a separate user
+ * message (never interpolated into the system prompt) so injection is not
+ * structurally possible, but we still normalise the input defensively.
+ */
+function sanitizeCommentBody(body: string): string {
+  return body
+    .replace(/\0/g, "") // strip null bytes
+    .substring(0, 2000)  // hard cap — model doesn't need more
+    .trim();
+}
+
 /**
  * Use Bedrock to semantically detect spam, including obfuscated/homoglyph content.
+ * The system prompt is required via the SPAM_DETECTION_PROMPT env var.
+ * The comment body is passed as a separate user message so it can never
+ * override or escape the system instructions.
  */
 export async function isSpamComment(body: string): Promise<SpamCheckResult> {
   if (!body.trim()) {
     return { isSpam: false, reason: "Empty comment", confidence: 0 };
   }
 
-  const client = createBedrockClient();
-
-  const prompt = `You are a spam detection system for GitHub issue comments.
-
-Analyze the following comment and determine if it is spam. Spam includes:
-- Cryptocurrency scams or investment fraud
-- Unsolicited promotions for Telegram/WhatsApp/Discord groups
-- Fake profit or earnings claims ("guaranteed returns", "5x in 24h", etc.)
-- Phishing or obfuscated URLs (e.g. hxxps://, xn-- punycode domains, defanged links)
-- Any message designed to lure users into financial scams
-
-IMPORTANT: The comment may use Unicode tricks, homoglyphs (α→a, ø→o, 𝟛→3), leetspeak, or other obfuscation to evade filters. Analyze the intent, not just the literal characters.
+  const systemPrompt = process.env.SPAM_DETECTION_PROMPT;
+  if (!systemPrompt) {
+    throw new Error("Missing required environment variable: SPAM_DETECTION_PROMPT");
+  }
 
-Comment to analyze:
-<comment>
-${body.substring(0, 2000)}
-</comment>
+  const client = createBedrockClient();
 
-Respond with JSON only:
-{
-  "is_spam": true | false,
-  "confidence": 0.0 to 1.0,
-  "reason": "brief explanation"
-}`;
+  // Sanitize and isolate the comment — never interpolate into the system prompt.
+  const safeBody = sanitizeCommentBody(body);
 
   try {
     const responseBody = await retryWithBackoff(async () => {
@@ -73,7 +74,9 @@ Respond with JSON only:
           anthropic_version: "bedrock-2023-05-31",
           max_tokens: 256,
           temperature: 0.1,
-          messages: [{ role: "user", content: prompt }],
+          system: systemPrompt,
+          // Comment is the sole user message — structurally isolated from instructions.
+          messages: [{ role: "user", content: safeBody }],
         }),
       });
       const response = await client.send(command);
@@ -204,6 +207,11 @@ async function main() {
     process.exit(1);
   }
 
+  if (!process.env.SPAM_DETECTION_PROMPT) {
+    console.error("Missing required environment variable: SPAM_DETECTION_PROMPT");
+    process.exit(1);
+  }
+
   const client = new Octokit({ auth: githubToken });
 
   if (mode === "single" && commentId) {
@@ -217,7 +225,10 @@ async function main() {
   }
 }
 
-main().catch((err) => {
-  console.error("Fatal error:", err);
-  process.exit(1);
-});
+// Only run when executed directly (not when imported by tests)
+if (process.env.JEST_WORKER_ID === undefined) {
+  main().catch((err) => {
+    console.error("Fatal error:", err);
+    process.exit(1);
+  });
+}

From 73c6ac4eff315ba55d83b4aeb10d9123f2a28d09 Mon Sep 17 00:00:00 2001
From: Anil Maktala <amaktala@amazon.com>
Date: Tue, 3 Mar 2026 10:43:54 -0500
Subject: [PATCH 2/2] ci(spam-detection): refactor to event-driven checks with
 stricter validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace scheduled cleanup/bulk scan with single-comment event-driven flow
- Add org membership gate for manual triggers and exempt org members from flagging
- Implement two-pass Bedrock confirmation to reduce false positives
- Raise confidence threshold 0.85 → 0.95 for stricter detection

- Add org read permission and remove batch processing logic
---
 .github/workflows/delete-spam-comments.yml |  62 +++----
 scripts/delete_spam_comments.ts            | 187 ++++++++++++---------
 2 files changed, 142 insertions(+), 107 deletions(-)

diff --git a/.github/workflows/delete-spam-comments.yml b/.github/workflows/delete-spam-comments.yml
index 972adc64b9b..d87eb7fa6e9 100644
--- a/.github/workflows/delete-spam-comments.yml
+++ b/.github/workflows/delete-spam-comments.yml
@@ -3,30 +3,44 @@ name: Delete Spam Comments
 on:
   issue_comment:
     types: [created]
-  schedule:
-    # Run daily at 2 AM UTC for bulk cleanup of any missed spam
-    - cron: "0 2 * * *"
   workflow_dispatch:
     inputs:
-      scan_mode:
-        description: "Scan mode: 'bulk' scans all comments, 'single' requires comment_id"
-        required: false
-        default: "bulk"
-        type: choice
-        options:
-          - bulk
-          - single
       comment_id:
-        description: "Comment ID (only for single mode)"
-        required: false
-        type: string
+        description: "Comment ID to check for spam (numeric)"
+        required: true
+        type: number
 
 permissions:
   issues: write
   contents: read
+  organization: read
 
 jobs:
+  authorize:
+    if: github.event_name == 'workflow_dispatch'
+    runs-on: ubuntu-latest
+    outputs:
+      allowed: ${{ steps.check.outputs.allowed }}
+    steps:
+      - name: Check org membership
+        id: check
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          STATUS=$(gh api -i "orgs/${{ github.repository_owner }}/members/${{ github.actor }}" 2>/dev/null | head -1 | awk '{print $2}')
+          if [ "$STATUS" = "204" ]; then
+            echo "allowed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "::error::@${{ github.actor }} is not an org member — manual trigger denied."
+            echo "allowed=false" >> "$GITHUB_OUTPUT"
+          fi
+
   delete-spam:
+    needs: [authorize]
+    if: >-
+      always()
+      && (needs.authorize.result == 'skipped' && github.event.comment.user.type != 'Bot')
+      || (needs.authorize.result == 'success' && needs.authorize.outputs.allowed == 'true')
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
@@ -51,25 +65,11 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           REPOSITORY_OWNER: ${{ github.repository_owner }}
           REPOSITORY_NAME: ${{ github.event.repository.name }}
-          AWS_REGION: ${{ secrets.AWS_REGION || 'us-east-1' }}
+          AWS_REGION: ${{ secrets.AWS_REGION }}
           AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
-          # Single-comment mode (event-driven)
-          SCAN_MODE: >-
-            ${{
-              github.event_name == 'issue_comment'
-              && 'single'
-              || inputs.scan_mode
-              || 'bulk'
-            }}
-          COMMENT_ID: >-
-            ${{
-              github.event.comment.id
-              || inputs.comment_id
-              || ''
-            }}
-          COMMENT_BODY: ${{ github.event.comment.body || '' }}
-          COMMENT_AUTHOR: ${{ github.event.comment.user.login || '' }}
+          SCAN_MODE: single
+          COMMENT_ID: ${{ github.event.comment.id || inputs.comment_id }}
 
           # If not set, the script falls back to its built-in default prompt.
           SPAM_DETECTION_PROMPT: ${{ secrets.SPAM_DETECTION_PROMPT }}
diff --git a/scripts/delete_spam_comments.ts b/scripts/delete_spam_comments.ts
index 7dff314aaac..33bbdaa95d0 100644
--- a/scripts/delete_spam_comments.ts
+++ b/scripts/delete_spam_comments.ts
@@ -9,10 +9,9 @@ import {
   InvokeModelCommand,
 } from "@aws-sdk/client-bedrock-runtime";
 import { retryWithBackoff } from "./retry_utils.js";
-import { checkRateLimit, processBatch } from "./rate_limit_utils.js";
 
 const BEDROCK_MODEL_ID = "us.anthropic.claude-sonnet-4-20250514-v1:0";
-const CONFIDENCE_THRESHOLD = 0.85;
+const CONFIDENCE_THRESHOLD = 0.95;
 
 export interface SpamCheckResult {
   isSpam: boolean;
@@ -99,6 +98,86 @@ export async function isSpamComment(body: string): Promise<SpamCheckResult> {
   }
 }
 
+/**
+ * Run a second independent Bedrock call to confirm a spam verdict.
+ * Uses a distinct system prompt asking the model to re-evaluate the comment
+ * with fresh reasoning. Only returns true if both passes agree.
+ */
+async function confirmSpam(body: string, firstResult: SpamCheckResult): Promise<boolean> {
+  const client = createBedrockClient();
+  const safeBody = sanitizeCommentBody(body);
+
+  const confirmationPrompt =
+    "You are a spam detection reviewer. A previous check flagged the following comment as spam. " +
+    "Re-evaluate the comment independently from scratch. Respond with JSON: " +
+    '{"is_spam": boolean, "confidence": number, "reason": string}. ' +
+    "Be conservative — only confirm spam if you are highly confident.";
+
+  try {
+    const responseBody = await retryWithBackoff(async () => {
+      const command = new InvokeModelCommand({
+        modelId: BEDROCK_MODEL_ID,
+        contentType: "application/json",
+        accept: "application/json",
+        body: JSON.stringify({
+          anthropic_version: "bedrock-2023-05-31",
+          max_tokens: 256,
+          temperature: 0.1,
+          system: confirmationPrompt,
+          messages: [{ role: "user", content: safeBody }],
+        }),
+      });
+      const response = await client.send(command);
+      return new TextDecoder().decode(response.body);
+    });
+
+    const parsed = JSON.parse(responseBody);
+    const text = parsed.content?.find((c: any) => c.type === "text")?.text ?? "";
+    const jsonMatch = text.match(/\{[\s\S]*\}/);
+    if (!jsonMatch) throw new Error("No JSON in confirmation response");
+
+    const result = JSON.parse(jsonMatch[0]);
+    const confirmed = result.is_spam === true && (result.confidence ?? 0) >= CONFIDENCE_THRESHOLD;
+    console.log(`Confirmation pass: is_spam=${result.is_spam}, confidence=${result.confidence?.toFixed(2)}, reason=${result.reason}`);
+    return confirmed;
+  } catch (err) {
+    console.warn("Confirmation pass failed, defaulting to NOT spam:", err);
+    return false;
+  }
+}
+
+/**
+ * Check if a user is a member of the repository's organization.
+ * Returns true for org members so their comments are never flagged as spam.
+ */
+async function isOrgMember(
+  client: Octokit,
+  org: string,
+  username: string
+): Promise<boolean> {
+  try {
+    await client.orgs.checkMembershipForUser({ org, username });
+    // A successful response (204 or 302) without throwing means the user is a member
+    // or the requester can see the membership. Treat as member.
+    return true;
+  } catch {
+    // 404 means not a member (or org/user doesn't exist)
+    return false;
+  }
+}
+
+async function fetchComment(
+  client: Octokit,
+  owner: string,
+  repo: string,
+  commentId: number
+): Promise<{ body: string; author: string }> {
+  const { data } = await retryWithBackoff(() =>
+    client.issues.getComment({ owner, repo, comment_id: commentId })
+  );
+  return { body: data.body ?? "", author: data.user?.login ?? "unknown" };
+}
+
 async function deleteComment(
   client: Octokit,
   owner: string,
@@ -119,6 +198,12 @@ async function processSingleComment(
   commentAuthor: string
 ): Promise<boolean> {
   console.log(`Checking comment #${commentId} by @${commentAuthor}...`);
+
+  if (await isOrgMember(client, owner, commentAuthor)) {
+    console.log(`@${commentAuthor} is an org member — skipping spam check.`);
+    return false;
+  }
+
   const result = await isSpamComment(commentBody);
 
   if (!result.isSpam) {
@@ -126,81 +211,31 @@ async function processSingleComment(
     return false;
   }
 
-  console.log(`Spam detected (confidence: ${result.confidence.toFixed(2)}). Reason: ${result.reason}`);
-  await deleteComment(client, owner, repo, commentId);
-  console.log(`Deleted spam comment #${commentId}`);
-  return true;
-}
+  console.log(`First pass flagged spam (confidence: ${result.confidence.toFixed(2)}). Running confirmation pass...`);
+  const confirmed = await confirmSpam(commentBody, result);
 
-async function bulkScanAndDelete(
-  client: Octokit,
-  owner: string,
-  repo: string
-): Promise<{ scanned: number; deleted: number }> {
-  console.log(`Starting bulk spam scan for ${owner}/${repo}...`);
-
-  let scanned = 0;
-  let deleted = 0;
-  let page = 1;
-
-  while (true) {
-    await checkRateLimit(client);
-
-    const { data: comments } = await retryWithBackoff(() =>
-      client.issues.listCommentsForRepo({
-        owner,
-        repo,
-        per_page: 100,
-        page,
-        sort: "created",
-        direction: "desc",
-      })
-    );
-
-    if (comments.length === 0) break;
-
-    console.log(`Processing page ${page} (${comments.length} comments)...`);
-
-    const results = await processBatch(
-      comments,
-      5, // smaller batch size — each item makes a Bedrock API call
-      async (comment) => {
-        scanned++;
-        const body = comment.body ?? "";
-        const author = comment.user?.login ?? "unknown";
-        const result = await isSpamComment(body);
-
-        if (result.isSpam) {
-          console.log(`Spam in comment #${comment.id} by @${author}: ${result.reason}`);
-          try {
-            await deleteComment(client, owner, repo, comment.id);
-            console.log(`Deleted comment #${comment.id}`);
-            return true;
-          } catch (err) {
-            console.error(`Failed to delete comment #${comment.id}:`, err);
-            return false;
-          }
-        }
-        return false;
-      },
-      1000 // 1s delay between batches to respect Bedrock rate limits
-    );
-
-    deleted += results.filter(Boolean).length;
-    page++;
+  if (!confirmed) {
+    console.log(`Confirmation pass did NOT agree — keeping comment #${commentId}.`);
+    return false;
   }
 
-  return { scanned, deleted };
+  await deleteComment(client, owner, repo, commentId);
+  console.log(`--- Audit Log: Deleted comment #${commentId} ---`);
+  console.log(`Author: @${commentAuthor}`);
+  console.log(`Confidence: ${result.confidence.toFixed(2)}`);
+  console.log(`Reason: ${result.reason}`);
+  console.log(`Body length: ${commentBody.length} chars`);
+  console.log(`Timestamp: ${new Date().toISOString()}`);
+  console.log(`---`);
+  return true;
 }
 
+
 async function main() {
   const owner = process.env.REPOSITORY_OWNER || "";
   const repo = process.env.REPOSITORY_NAME || "";
   const githubToken = process.env.GITHUB_TOKEN || "";
   const commentId = process.env.COMMENT_ID ? parseInt(process.env.COMMENT_ID) : null;
-  const commentBody = process.env.COMMENT_BODY ?? "";
-  const commentAuthor = process.env.COMMENT_AUTHOR ?? "unknown";
-  const mode = process.env.SCAN_MODE || (commentId ? "single" : "bulk");
 
   if (!owner || !repo || !githubToken) {
     console.error("Missing required environment variables: REPOSITORY_OWNER, REPOSITORY_NAME, GITHUB_TOKEN");
@@ -212,17 +247,17 @@ async function main() {
     process.exit(1);
   }
 
+  if (!commentId) {
+    console.error("Missing required COMMENT_ID for spam check");
+    process.exit(1);
+  }
+
   const client = new Octokit({ auth: githubToken });
 
-  if (mode === "single" && commentId) {
-    console.log(`=== Single Comment Spam Check (comment #${commentId}) ===`);
-    const deleted = await processSingleComment(client, owner, repo, commentId, commentBody, commentAuthor);
-    console.log(`\nSummary: ${deleted ? `Spam comment #${commentId} by @${commentAuthor} was deleted.` : `Comment #${commentId} is clean — no action taken.`}`);
-  } else {
-    console.log(`=== Bulk Spam Scan for ${owner}/${repo} ===`);
-    const { scanned, deleted } = await bulkScanAndDelete(client, owner, repo);
-    console.log(`\nSummary: Scanned ${scanned} comments, deleted ${deleted} spam comments.`);
-  }
+  console.log(`=== Single Comment Spam Check (comment #${commentId}) ===`);
+  const { body, author } = await fetchComment(client, owner, repo, commentId);
+  const deleted = await processSingleComment(client, owner, repo, commentId, body, author);
+  console.log(`\nSummary: ${deleted ? `Spam comment #${commentId} by @${author} was deleted.` : `Comment #${commentId} is clean — no action taken.`}`);
 }
 
 // Only run when executed directly (not when imported by tests)