From c23b20b58b1ee95678a755c3b1dc4714f403cdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Thu, 22 Jan 2026 13:20:56 +0000 Subject: [PATCH 01/11] fix: refine feedback focus prompting --- src/scribae/feedback.py | 22 ++++++++-- src/scribae/feedback_cli.py | 4 +- src/scribae/prompts/feedback.py | 71 ++++++++++++++++++++++----------- tests/unit/feedback_cli_test.py | 25 ++++++++++++ 4 files changed, 94 insertions(+), 28 deletions(-) diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index b9de917..0b84875 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -231,6 +231,7 @@ class FeedbackFocus(str): CLARITY = "clarity" STYLE = "style" EVIDENCE = "evidence" + ALL = "all" @classmethod def from_raw(cls, value: str) -> FeedbackFocus: @@ -240,6 +241,21 @@ def from_raw(cls, value: str) -> FeedbackFocus: raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") return cls(lowered) + @classmethod + def parse_list(cls, value: str) -> list[str]: + parts = [item.strip() for item in value.split(",") if item.strip()] + if not parts: + raise FeedbackValidationError("--focus must include at least one category.") + allowed = {cls.SEO, cls.STRUCTURE, cls.CLARITY, cls.STYLE, cls.EVIDENCE} + normalized: list[str] = [] + for part in parts: + lowered = part.lower() + if lowered not in allowed: + raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") + if lowered not in normalized: + normalized.append(lowered) + return normalized + @dataclass(frozen=True) class BodySection: @@ -263,7 +279,7 @@ class FeedbackContext: brief: SeoBrief project: ProjectConfig note: NoteDetails | None - focus: str | None + focus: list[str] | None language: str selected_outline: list[str] selected_sections: list[BodySection] @@ -281,7 +297,7 @@ def prepare_context( project: ProjectConfig, note_path: Path | None = None, language: str | None = None, - focus: str | None = None, + focus: list[str] | None = None, section_range: tuple[int, int] | None = None, max_body_chars: int = 12000, max_note_chars: int = 6000, @@ -509,7 +525,7 @@ class _FeedbackPromptContext: note_excerpt: str | None project: ProjectConfig language: str - focus: str | None + focus: list[str] | None selected_outline: list[str] selected_sections: list[dict[str, str]] diff --git a/src/scribae/feedback_cli.py b/src/scribae/feedback_cli.py index c5d07b1..c0921c7 100644 --- a/src/scribae/feedback_cli.py +++ b/src/scribae/feedback_cli.py @@ -53,7 +53,7 @@ def feedback_command( focus: str | None = typer.Option( # noqa: B008 None, "--focus", - help="Narrow the review scope: seo|structure|clarity|style|evidence.", + help="Narrow the review scope (comma-separated): seo|structure|clarity|style|evidence.", ), output_format: str = typer.Option( # noqa: B008 FeedbackFormat.MARKDOWN, @@ -187,7 +187,7 @@ def feedback_command( focus_value = None if focus: try: - focus_value = str(FeedbackFocus.from_raw(focus)) + focus_value = FeedbackFocus.parse_list(focus) except FeedbackValidationError as exc: typer.secho(str(exc), err=True, fg=typer.colors.RED) raise typer.Exit(exc.exit_code) from exc diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index 644b29b..0939d16 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -31,7 +31,7 @@ def project(self) -> ProjectConfig: ... def language(self) -> str: ... @property - def focus(self) -> str | None: ... + def focus(self) -> list[str] | None: ... @property def selected_outline(self) -> list[str]: ... @@ -61,30 +61,13 @@ class FeedbackPromptBundle: - Be conservative about facts. If a claim is not supported by the provided note, flag it as needing evidence. - If a field is empty, output an empty array ([]) or empty string, not null. - Use consistent severity labels: low | medium | high. - - Use consistent categories (definitions below). + - Use consistent categories (seo | structure | clarity | style | evidence | other). - Do not use emojis or special symbols in the output. - Categories: - - seo: keyword usage and density throughout content; placement in headings and early paragraphs; - primary/secondary keyword balance; search intent alignment; internal linking opportunities; - content depth for keyword competitiveness - - structure: heading hierarchy; section organization and alignment with brief outline; logical flow - and transitions between sections; paragraph length; intro/conclusion quality; scannability - (appropriate use of lists, subheadings for long sections) - - clarity: confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; - sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences - - style: tone consistency; voice; wordiness and filler phrases; audience appropriateness; - repetitive phrasing or word choices; clichés; formality level matching project tone; sentence - variety - - evidence: unsupported claims; missing citations; statements needing fact-checking; statistics - without sources; vague attributions ("studies show", "experts say"); claims contradicting the - source note; outdated information - - other: issues not fitting the above categories - Focus behavior: - - When Focus is "all", review the draft across all categories with balanced attention. - - When Focus is a specific category, prioritize findings in that category but still report - critical (high severity) issues from other categories. + - When Focus is "all" (or missing), review the draft across all categories with balanced attention. + - When Focus specifies one or more categories, only report findings in those categories, but still + report critical (high severity) issues from other categories. """ ).strip() @@ -108,7 +91,12 @@ class FeedbackPromptBundle: [REVIEW SCOPE] Focus: {focus} + In-scope categories: {focus_categories} SelectedOutlineRange: {selected_outline} + Critical override: Always include high severity issues from any category. + + [FOCUS CATEGORY DEFINITIONS] + {category_definitions} [DRAFT SECTIONS] The following sections are extracted from the draft for review: @@ -126,8 +114,43 @@ class FeedbackPromptBundle: ).strip() +_CATEGORY_DEFINITIONS: dict[str, str] = { + "seo": ( + "keyword usage and density throughout content; placement in headings and early paragraphs; " + "primary/secondary keyword balance; search intent alignment; internal linking opportunities; " + "content depth for keyword competitiveness" + ), + "structure": ( + "heading hierarchy; section organization and alignment with brief outline; logical flow " + "and transitions between sections; paragraph length; intro/conclusion quality; scannability " + "(appropriate use of lists, subheadings for long sections)" + ), + "clarity": ( + "confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; " + "sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences" + ), + "style": ( + "tone consistency; voice; wordiness and filler phrases; audience appropriateness; " + "repetitive phrasing or word choices; clichés; formality level matching project tone; sentence " + "variety" + ), + "evidence": ( + "unsupported claims; missing citations; statements needing fact-checking; statistics " + "without sources; vague attributions (\"studies show\", \"experts say\"); claims contradicting the " + "source note; outdated information" + ), +} + + +def _format_category_definitions(categories: list[str]) -> str: + lines = [f"- {category}: {_CATEGORY_DEFINITIONS[category]}" for category in categories] + return "\n".join(lines) if lines else "- none" + + def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackPromptBundle: """Render the system and user prompts for the feedback agent.""" + focus_categories = context.focus or list(_CATEGORY_DEFINITIONS.keys()) + focus_label = ", ".join(focus_categories) project_keywords = ", ".join(context.project.get("keywords") or []) or "none" faq_entries = [f"{item.question} — {item.answer}" for item in context.brief.faq] schema_json = json.dumps( @@ -176,11 +199,13 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm search_intent=context.brief.search_intent, outline=" | ".join(context.brief.outline), faq=" | ".join(faq_entries), - focus=context.focus or "all (seo, structure, clarity, style, evidence)", + focus=focus_label or "all (seo, structure, clarity, style, evidence)", + focus_categories=focus_label or "seo, structure, clarity, style, evidence", selected_outline=", ".join(context.selected_outline) or "(all)", draft_sections_json=draft_sections_json, note_excerpt=context.note_excerpt or "No source note provided.", schema_json=schema_json, + category_definitions=_format_category_definitions(focus_categories), ) return FeedbackPromptBundle(system_prompt=FEEDBACK_SYSTEM_PROMPT, user_prompt=prompt) diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index ece11a9..60ae6ce 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -176,6 +176,31 @@ def test_feedback_section_range_selects_outline( assert "SelectedOutlineRange: Introduction to Observability, Logging Foundations" in result.stdout +def test_feedback_focus_multiple_categories_limits_prompt( + body_multi_section_path: Path, + brief_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "feedback", + "--body", + str(body_multi_section_path), + "--brief", + str(brief_path), + "--focus", + "seo, clarity", + "--dry-run", + ], + ) + + assert result.exit_code == 0 + assert "Focus: seo, clarity" in result.stdout + assert "In-scope categories: seo, clarity" in result.stdout + assert "Critical override: Always include high severity issues from any category." in result.stdout + assert "- structure:" not in result.stdout + + def test_feedback_passes_seed_and_top_p( monkeypatch: pytest.MonkeyPatch, body_path: Path, From a8b62d05b50ec86f8538667377290154d40458e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 14:06:06 +0100 Subject: [PATCH 02/11] refactor: extract feedback category definitions to shared module - Move CATEGORY_DEFINITIONS to prompts/feedback_categories.py as single source of truth - Remove unused FeedbackFocus.ALL constant and dead from_raw() method - Add FeedbackFocus.ALLOWED derived from CATEGORY_DEFINITIONS keys - Update imports in feedback.py and prompts/feedback.py Co-Authored-By: Claude Opus 4.5 --- src/scribae/feedback.py | 13 ++------- src/scribae/prompts/feedback.py | 34 +++------------------- src/scribae/prompts/feedback_categories.py | 30 +++++++++++++++++++ 3 files changed, 37 insertions(+), 40 deletions(-) create mode 100644 src/scribae/prompts/feedback_categories.py diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index 0b84875..ca9ff07 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -19,6 +19,7 @@ from .llm import LLM_OUTPUT_RETRIES, LLM_TIMEOUT_SECONDS, OpenAISettings, apply_optional_settings, make_model from .project import ProjectConfig from .prompts.feedback import FEEDBACK_SYSTEM_PROMPT, FeedbackPromptBundle, build_feedback_prompt_bundle +from .prompts.feedback_categories import CATEGORY_DEFINITIONS # Pattern to match emoji characters across common Unicode ranges _EMOJI_PATTERN = re.compile( @@ -231,26 +232,18 @@ class FeedbackFocus(str): CLARITY = "clarity" STYLE = "style" EVIDENCE = "evidence" - ALL = "all" - @classmethod - def from_raw(cls, value: str) -> FeedbackFocus: - lowered = value.lower().strip() - allowed = {cls.SEO, cls.STRUCTURE, cls.CLARITY, cls.STYLE, cls.EVIDENCE} - if lowered not in allowed: - raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") - return cls(lowered) + ALLOWED: frozenset[str] = frozenset(CATEGORY_DEFINITIONS.keys()) @classmethod def parse_list(cls, value: str) -> list[str]: parts = [item.strip() for item in value.split(",") if item.strip()] if not parts: raise FeedbackValidationError("--focus must include at least one category.") - allowed = {cls.SEO, cls.STRUCTURE, cls.CLARITY, cls.STYLE, cls.EVIDENCE} normalized: list[str] = [] for part in parts: lowered = part.lower() - if lowered not in allowed: + if lowered not in cls.ALLOWED: raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") if lowered not in normalized: normalized.append(lowered) diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index 0939d16..8eb1f30 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -8,6 +8,8 @@ from scribae.brief import SeoBrief from scribae.project import ProjectConfig +from .feedback_categories import CATEGORY_DEFINITIONS + class FeedbackPromptBody(Protocol): @property @@ -114,42 +116,14 @@ class FeedbackPromptBundle: ).strip() -_CATEGORY_DEFINITIONS: dict[str, str] = { - "seo": ( - "keyword usage and density throughout content; placement in headings and early paragraphs; " - "primary/secondary keyword balance; search intent alignment; internal linking opportunities; " - "content depth for keyword competitiveness" - ), - "structure": ( - "heading hierarchy; section organization and alignment with brief outline; logical flow " - "and transitions between sections; paragraph length; intro/conclusion quality; scannability " - "(appropriate use of lists, subheadings for long sections)" - ), - "clarity": ( - "confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; " - "sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences" - ), - "style": ( - "tone consistency; voice; wordiness and filler phrases; audience appropriateness; " - "repetitive phrasing or word choices; clichés; formality level matching project tone; sentence " - "variety" - ), - "evidence": ( - "unsupported claims; missing citations; statements needing fact-checking; statistics " - "without sources; vague attributions (\"studies show\", \"experts say\"); claims contradicting the " - "source note; outdated information" - ), -} - - def _format_category_definitions(categories: list[str]) -> str: - lines = [f"- {category}: {_CATEGORY_DEFINITIONS[category]}" for category in categories] + lines = [f"- {category}: {CATEGORY_DEFINITIONS[category]}" for category in categories] return "\n".join(lines) if lines else "- none" def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackPromptBundle: """Render the system and user prompts for the feedback agent.""" - focus_categories = context.focus or list(_CATEGORY_DEFINITIONS.keys()) + focus_categories = context.focus or list(CATEGORY_DEFINITIONS.keys()) focus_label = ", ".join(focus_categories) project_keywords = ", ".join(context.project.get("keywords") or []) or "none" faq_entries = [f"{item.question} — {item.answer}" for item in context.brief.faq] diff --git a/src/scribae/prompts/feedback_categories.py b/src/scribae/prompts/feedback_categories.py new file mode 100644 index 0000000..2e01a39 --- /dev/null +++ b/src/scribae/prompts/feedback_categories.py @@ -0,0 +1,30 @@ +"""Feedback category definitions shared between feedback.py and prompts/feedback.py.""" + +from __future__ import annotations + +CATEGORY_DEFINITIONS: dict[str, str] = { + "seo": ( + "keyword usage and density throughout content; placement in headings and early paragraphs; " + "primary/secondary keyword balance; search intent alignment; internal linking opportunities; " + "content depth for keyword competitiveness" + ), + "structure": ( + "heading hierarchy; section organization and alignment with brief outline; logical flow " + "and transitions between sections; paragraph length; intro/conclusion quality; scannability " + "(appropriate use of lists, subheadings for long sections)" + ), + "clarity": ( + "confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; " + "sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences" + ), + "style": ( + "tone consistency; voice; wordiness and filler phrases; audience appropriateness; " + "repetitive phrasing or word choices; clichés; formality level matching project tone; sentence " + "variety" + ), + "evidence": ( + "unsupported claims; missing citations; statements needing fact-checking; statistics " + "without sources; vague attributions (\"studies show\", \"experts say\"); claims contradicting the " + "source note; outdated information" + ), +} From d27a017a690023d0610314dcd4c439b49b79e680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 14:41:38 +0100 Subject: [PATCH 03/11] fix: limit schema category enum to focused categories When --focus is provided, the JSON schema's findings.category field now only lists the selected categories plus "other" (for critical overrides) instead of all categories. This reduces LLM confusion. Co-Authored-By: Claude Opus 4.5 --- src/scribae/prompts/feedback.py | 4 +++- tests/unit/feedback_cli_test.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index 8eb1f30..b21dfa6 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -127,6 +127,8 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm focus_label = ", ".join(focus_categories) project_keywords = ", ".join(context.project.get("keywords") or []) or "none" faq_entries = [f"{item.question} — {item.answer}" for item in context.brief.faq] + # Build category enum for schema: include selected categories + "other" for critical overrides + category_enum = "|".join(focus_categories + ["other"]) schema_json = json.dumps( { "summary": {"issues": ["string"], "strengths": ["string"]}, @@ -149,7 +151,7 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm "findings": [ { "severity": "low|medium|high", - "category": "seo|structure|clarity|style|evidence|other", + "category": category_enum, "message": "string", "location": {"heading": "string", "paragraph_index": 1}, } diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index 60ae6ce..467d14a 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -198,7 +198,10 @@ def test_feedback_focus_multiple_categories_limits_prompt( assert "Focus: seo, clarity" in result.stdout assert "In-scope categories: seo, clarity" in result.stdout assert "Critical override: Always include high severity issues from any category." in result.stdout + # Verify category definitions are limited to selected categories assert "- structure:" not in result.stdout + # Verify JSON schema category enum only lists selected categories + "other" for critical overrides + assert '"category": "seo|clarity|other"' in result.stdout def test_feedback_passes_seed_and_top_p( From aff73a93faad5acbd31962ed55856f934fb33cdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 14:53:28 +0100 Subject: [PATCH 04/11] refactor: derive focus validation error message from ALLOWED set Co-Authored-By: Claude Opus 4.5 --- src/scribae/feedback.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index ca9ff07..6af9666 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -244,7 +244,8 @@ def parse_list(cls, value: str) -> list[str]: for part in parts: lowered = part.lower() if lowered not in cls.ALLOWED: - raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") + allowed_list = ", ".join(sorted(cls.ALLOWED)) + raise FeedbackValidationError(f"--focus must be one of: {allowed_list}.") if lowered not in normalized: normalized.append(lowered) return normalized From 9270727dcf855c20434f7b51478a44261d82eaa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 15:44:34 +0100 Subject: [PATCH 05/11] fix: enforce focus categories with prompt + post-processing - Add explicit negative instruction: "ONLY use category values listed in the schema. DO NOT use any other category values." - Add _normalize_finding_categories() to remap any out-of-scope categories to "other" after LLM returns - Add tests for the normalization function This belt-and-suspenders approach ensures LLMs cannot output categories outside the focus scope, even if they hallucinate from training data. Co-Authored-By: Claude Opus 4.5 --- src/scribae/feedback.py | 34 ++++++++++++++++++++ src/scribae/prompts/feedback.py | 12 +++---- tests/unit/feedback_cli_test.py | 56 +++++++++++++++++++++++++++++++-- 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index 6af9666..c57bfd0 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -402,6 +402,8 @@ def generate_feedback_report( except Exception as exc: # pragma: no cover - surfaced to CLI raise FeedbackLLMError(f"LLM request failed: {exc}") from exc + # Remap any out-of-scope categories to "other" + report = _normalize_finding_categories(report, context.focus) return report @@ -566,6 +568,38 @@ def _feedback_language_text(report: FeedbackReport) -> str: return "\n".join([issue_text, strength_text, findings, checklist, section_notes]).strip() +def _normalize_finding_categories(report: FeedbackReport, focus: list[str] | None) -> FeedbackReport: + """Remap any finding categories outside the focus scope to 'other'. + + If focus is None (all categories), no remapping is performed. + """ + if focus is None: + return report + + allowed = set(focus) | {"other"} + needs_remap = any(f.category not in allowed for f in report.findings) + if not needs_remap: + return report + + remapped_findings = [ + FeedbackFinding( + severity=f.severity, + category=f.category if f.category in allowed else "other", + message=f.message, + location=f.location, + ) + for f in report.findings + ] + return FeedbackReport( + summary=report.summary, + brief_alignment=report.brief_alignment, + section_notes=report.section_notes, + evidence_gaps=report.evidence_gaps, + findings=remapped_findings, + checklist=report.checklist, + ) + + def _load_body(body_path: Path, *, max_chars: int) -> BodyDocument: try: post = frontmatter.load(body_path) diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index b21dfa6..11ea92a 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -63,13 +63,9 @@ class FeedbackPromptBundle: - Be conservative about facts. If a claim is not supported by the provided note, flag it as needing evidence. - If a field is empty, output an empty array ([]) or empty string, not null. - Use consistent severity labels: low | medium | high. - - Use consistent categories (seo | structure | clarity | style | evidence | other). + - ONLY use category values listed in the schema. DO NOT use any other category values. + - Use "other" for critical issues outside the focus scope. - Do not use emojis or special symbols in the output. - - Focus behavior: - - When Focus is "all" (or missing), review the draft across all categories with balanced attention. - - When Focus specifies one or more categories, only report findings in those categories, but still - report critical (high severity) issues from other categories. """ ).strip() @@ -93,9 +89,9 @@ class FeedbackPromptBundle: [REVIEW SCOPE] Focus: {focus} - In-scope categories: {focus_categories} + Allowed categories: {focus_categories}, other SelectedOutlineRange: {selected_outline} - Critical override: Always include high severity issues from any category. + Note: Use "other" only for high severity issues that fall outside the focused categories. [FOCUS CATEGORY DEFINITIONS] {category_definitions} diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index 467d14a..48beb87 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -196,8 +196,8 @@ def test_feedback_focus_multiple_categories_limits_prompt( assert result.exit_code == 0 assert "Focus: seo, clarity" in result.stdout - assert "In-scope categories: seo, clarity" in result.stdout - assert "Critical override: Always include high severity issues from any category." in result.stdout + assert "Allowed categories: seo, clarity, other" in result.stdout + assert 'Use "other" only for high severity issues' in result.stdout # Verify category definitions are limited to selected categories assert "- structure:" not in result.stdout # Verify JSON schema category enum only lists selected categories + "other" for critical overrides @@ -372,3 +372,55 @@ def test_findings_message_emojis_stripped(self) -> None: checklist=[], ) assert report.findings[0].message == "Claim needs citation" + + +class TestNormalizeFindingCategories: + def _make_report(self, categories: list[str]) -> FeedbackReport: + return FeedbackReport( + summary=FeedbackSummary(issues=[], strengths=[]), + brief_alignment=BriefAlignment( + intent="Matches intent", + outline_covered=[], + outline_missing=[], + keywords_covered=[], + keywords_missing=[], + faq_covered=[], + faq_missing=[], + ), + section_notes=[], + evidence_gaps=[], + findings=[ + FeedbackFinding(severity="medium", category=cat, message=f"Issue in {cat}") + for cat in categories + ], + checklist=[], + ) + + def test_no_focus_returns_unchanged(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "structure", "clarity"]) + result = _normalize_finding_categories(report, focus=None) + assert result is report + + def test_all_in_scope_returns_unchanged(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "clarity", "other"]) + result = _normalize_finding_categories(report, focus=["seo", "clarity"]) + assert [f.category for f in result.findings] == ["seo", "clarity", "other"] + + def test_out_of_scope_remapped_to_other(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "structure", "evidence"]) + result = _normalize_finding_categories(report, focus=["seo", "clarity"]) + assert [f.category for f in result.findings] == ["seo", "other", "other"] + + def test_preserves_other_fields(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["structure"]) + result = _normalize_finding_categories(report, focus=["seo"]) + assert result.findings[0].severity == "medium" + assert result.findings[0].message == "Issue in structure" From 29dd76d00c4fc64e675e05ce2fe553fca6ceaeb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 15:56:45 +0100 Subject: [PATCH 06/11] fix: add type cast for mypy in normalize categories test Cast category string to Any to satisfy mypy's Literal type requirement for FeedbackFinding.category. Co-Authored-By: Claude Opus 4.5 --- tests/unit/feedback_cli_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index 48beb87..93028c1 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -2,6 +2,7 @@ import json from pathlib import Path +from typing import Any, cast import pytest from typer.testing import CliRunner @@ -390,7 +391,11 @@ def _make_report(self, categories: list[str]) -> FeedbackReport: section_notes=[], evidence_gaps=[], findings=[ - FeedbackFinding(severity="medium", category=cat, message=f"Issue in {cat}") + FeedbackFinding( + severity="medium", + category=cast(Any, cat), + message=f"Issue in {cat}", + ) for cat in categories ], checklist=[], From 04a66918c20b10d0937e5746268661f0be6af712 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 15:59:33 +0100 Subject: [PATCH 07/11] chore: add mypy to pre-commit hooks Add local mypy hook that runs via uv to use project dependencies. Fix pre-existing torch import type errors in mt.py with ignore comments. Co-Authored-By: Claude Opus 4.5 --- .pre-commit-config.yaml | 9 +++++++++ src/scribae/translate/mt.py | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 78c67b0..87cff18 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,15 @@ repos: args: [ --fix ] - id: ruff-format + - repo: local + hooks: + - id: mypy + name: mypy + entry: uv run mypy + language: system + types: [ python ] + pass_filenames: false + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: diff --git a/src/scribae/translate/mt.py b/src/scribae/translate/mt.py index 97bbc0c..4175a51 100644 --- a/src/scribae/translate/mt.py +++ b/src/scribae/translate/mt.py @@ -62,14 +62,14 @@ def _pipeline_for(self, model_id: str) -> Pipeline: def _require_torch(self) -> ModuleType: try: - import torch + import torch # type: ignore[import-not-found] except ImportError as exc: raise RuntimeError( "Translation requires PyTorch. Install it with " "`uv sync --extra translation` or " "`uv sync --extra translation --index pytorch-cpu` (CPU-only)." ) from exc - return torch + return torch # type: ignore[no-any-return] def prefetch(self, steps: Iterable[RouteStep]) -> None: """Warm translation pipelines for the provided route steps.""" From 1167279b1bf206b33575cd645ee2c6f0586513ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 16:01:10 +0100 Subject: [PATCH 08/11] style: apply ruff format to codebase Co-Authored-By: Claude Opus 4.5 --- src/scribae/brief.py | 5 ++- src/scribae/feedback.py | 38 ++++++++++------------ src/scribae/idea.py | 12 ++----- src/scribae/language.py | 3 +- src/scribae/llm.py | 5 +-- src/scribae/main.py | 1 + src/scribae/meta.py | 3 +- src/scribae/project.py | 4 +-- src/scribae/prompts/feedback_categories.py | 2 +- src/scribae/prompts/meta.py | 2 +- src/scribae/prompts/write.py | 1 + src/scribae/translate/model_registry.py | 8 ++--- src/scribae/translate/postedit.py | 14 +++----- src/scribae/translate_cli.py | 9 ++--- tests/unit/feedback_cli_test.py | 6 ++-- tests/unit/language_test.py | 12 ++----- tests/unit/translate_cli_test.py | 8 ++--- 17 files changed, 49 insertions(+), 84 deletions(-) diff --git a/src/scribae/brief.py b/src/scribae/brief.py index 24fa47e..9e07aaa 100644 --- a/src/scribae/brief.py +++ b/src/scribae/brief.py @@ -36,6 +36,7 @@ "save_prompt_artifacts", ] + class BriefingError(Exception): """Raised when a brief cannot be generated.""" @@ -409,9 +410,7 @@ def _select_idea( if len(ideas.ideas) == 1: return ideas.ideas[0] - raise BriefValidationError( - "Select an idea with --idea (id or 1-based index), or set idea_id in note frontmatter." - ) + raise BriefValidationError("Select an idea with --idea (id or 1-based index), or set idea_id in note frontmatter.") def _metadata_idea_id(metadata: dict[str, Any]) -> str | None: diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index c57bfd0..cba81a4 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -24,21 +24,21 @@ # Pattern to match emoji characters across common Unicode ranges _EMOJI_PATTERN = re.compile( "[" - "\U0001F600-\U0001F64F" # emoticons - "\U0001F300-\U0001F5FF" # symbols & pictographs - "\U0001F680-\U0001F6FF" # transport & map symbols - "\U0001F1E0-\U0001F1FF" # flags - "\U00002700-\U000027BF" # dingbats - "\U0001F900-\U0001F9FF" # supplemental symbols & pictographs - "\U0001FA00-\U0001FA6F" # chess symbols, extended-A - "\U0001FA70-\U0001FAFF" # symbols & pictographs extended-A - "\U00002600-\U000026FF" # misc symbols - "\U0001F700-\U0001F77F" # alchemical symbols - "\U0001F780-\U0001F7FF" # geometric shapes extended - "\U0001F800-\U0001F8FF" # supplemental arrows-C - "\U0001F3FB-\U0001F3FF" # skin tone modifiers - "\uFE0F" # variation selector-16 (emoji presentation) - "\u200D" # zero-width joiner (used in combined emojis) + "\U0001f600-\U0001f64f" # emoticons + "\U0001f300-\U0001f5ff" # symbols & pictographs + "\U0001f680-\U0001f6ff" # transport & map symbols + "\U0001f1e0-\U0001f1ff" # flags + "\U00002700-\U000027bf" # dingbats + "\U0001f900-\U0001f9ff" # supplemental symbols & pictographs + "\U0001fa00-\U0001fa6f" # chess symbols, extended-A + "\U0001fa70-\U0001faff" # symbols & pictographs extended-A + "\U00002600-\U000026ff" # misc symbols + "\U0001f700-\U0001f77f" # alchemical symbols + "\U0001f780-\U0001f7ff" # geometric shapes extended + "\U0001f800-\U0001f8ff" # supplemental arrows-C + "\U0001f3fb-\U0001f3ff" # skin tone modifiers + "\ufe0f" # variation selector-16 (emoji presentation) + "\u200d" # zero-width joiner (used in combined emojis) "]+", flags=re.UNICODE, ) @@ -370,9 +370,7 @@ def generate_feedback_report( resolved_settings = OpenAISettings.from_env() llm_agent: Agent[None, FeedbackReport] = ( - agent - if agent is not None - else _create_agent(model_name, temperature=temperature, top_p=top_p, seed=seed) + agent if agent is not None else _create_agent(model_name, temperature=temperature, top_p=top_p, seed=seed) ) _report(reporter, f"Calling model '{model_name}' via {resolved_settings.base_url}") @@ -449,9 +447,7 @@ def render_markdown(report: FeedbackReport) -> str: if report.findings: for finding in report.findings: location = _format_location(finding.location) - sections.append( - f"- **{finding.severity.upper()}** [{finding.category}] {finding.message}{location}" - ) + sections.append(f"- **{finding.severity.upper()}** [{finding.category}] {finding.message}{location}") else: sections.extend(_render_list([])) sections.append("") diff --git a/src/scribae/idea.py b/src/scribae/idea.py index acad883..c3e59d2 100644 --- a/src/scribae/idea.py +++ b/src/scribae/idea.py @@ -134,9 +134,7 @@ def prepare_context( ) _report(reporter, "Prepared idea-generation prompt.") - return IdeaContext( - note=note, project=project, prompts=prompts, language=language_resolution.language - ) + return IdeaContext(note=note, project=project, prompts=prompts, language=language_resolution.language) def generate_ideas( @@ -216,9 +214,7 @@ def save_prompt_artifacts( prompt_path = destination / f"{stamp}-{slug}-ideas.prompt.txt" note_path = destination / f"{stamp}-note.txt" - prompt_payload = ( - f"SYSTEM PROMPT:\n{context.prompts.system_prompt}\n\nUSER PROMPT:\n{context.prompts.user_prompt}\n" - ) + prompt_payload = f"SYSTEM PROMPT:\n{context.prompts.system_prompt}\n\nUSER PROMPT:\n{context.prompts.user_prompt}\n" prompt_path.write_text(prompt_payload, encoding="utf-8") note_path.write_text(context.note.body, encoding="utf-8") @@ -248,9 +244,7 @@ def _create_agent( def _idea_language_text(ideas: IdeaList) -> str: - return "\n".join( - f"{item.title} {item.description} {item.why}" for item in ideas.ideas - ) + return "\n".join(f"{item.title} {item.description} {item.why}" for item in ideas.ideas) def _invoke_agent(agent: Agent[None, IdeaList], prompt: str, *, timeout_seconds: float) -> IdeaList: diff --git a/src/scribae/language.py b/src/scribae/language.py index fb5f9e5..9466d94 100644 --- a/src/scribae/language.py +++ b/src/scribae/language.py @@ -106,8 +106,7 @@ def ensure_language_output( def _append_language_correction(prompt: str, expected_language: str) -> str: correction = ( - "\n\n[LANGUAGE CORRECTION]\n" - f"Regenerate the full response strictly in language code '{expected_language}'." + f"\n\n[LANGUAGE CORRECTION]\nRegenerate the full response strictly in language code '{expected_language}'." ) return f"{prompt}{correction}" diff --git a/src/scribae/llm.py b/src/scribae/llm.py index 6fb8fb9..2f771fb 100644 --- a/src/scribae/llm.py +++ b/src/scribae/llm.py @@ -35,8 +35,9 @@ def configure_environment(self) -> None: os.environ["OPENAI_API_KEY"] = self.api_key -def make_model(model_name: str, *, model_settings: ModelSettings, - settings: OpenAISettings | None = None) -> OpenAIChatModel: +def make_model( + model_name: str, *, model_settings: ModelSettings, settings: OpenAISettings | None = None +) -> OpenAIChatModel: """Return an OpenAI-compatible model configured for local/remote endpoints.""" resolved_settings = settings or OpenAISettings.from_env() provider = OpenAIProvider(base_url=resolved_settings.base_url, api_key=resolved_settings.api_key) diff --git a/src/scribae/main.py b/src/scribae/main.py index 7b42e03..f1657d2 100644 --- a/src/scribae/main.py +++ b/src/scribae/main.py @@ -25,6 +25,7 @@ def app_callback() -> None: """Root Scribae CLI callback.""" + app.command("idea", help="Brainstorm article ideas from a note with project-aware guidance.")(idea_command) app.command( "brief", diff --git a/src/scribae/meta.py b/src/scribae/meta.py index bd23031..27cc116 100644 --- a/src/scribae/meta.py +++ b/src/scribae/meta.py @@ -253,8 +253,7 @@ def generate_metadata( _report( reporter, - f"Calling model '{model_name}' via {resolved_settings.base_url}" - + (f" (reason: {reason})" if reason else ""), + f"Calling model '{model_name}' via {resolved_settings.base_url}" + (f" (reason: {reason})" if reason else ""), ) try: diff --git a/src/scribae/project.py b/src/scribae/project.py index fd19bbe..b3fddde 100644 --- a/src/scribae/project.py +++ b/src/scribae/project.py @@ -86,9 +86,7 @@ def _resolve_project_path(name: str, *, base_dir: Path | None = None) -> Path: if resolved.exists(): return resolved - raise FileNotFoundError( - f"Project config {search_dir / f'{name}.yaml'} or {search_dir / f'{name}.yml'} not found" - ) + raise FileNotFoundError(f"Project config {search_dir / f'{name}.yaml'} or {search_dir / f'{name}.yml'} not found") def _merge_with_defaults(data: Mapping[str, Any]) -> ProjectConfig: diff --git a/src/scribae/prompts/feedback_categories.py b/src/scribae/prompts/feedback_categories.py index 2e01a39..0984255 100644 --- a/src/scribae/prompts/feedback_categories.py +++ b/src/scribae/prompts/feedback_categories.py @@ -24,7 +24,7 @@ ), "evidence": ( "unsupported claims; missing citations; statements needing fact-checking; statistics " - "without sources; vague attributions (\"studies show\", \"experts say\"); claims contradicting the " + 'without sources; vague attributions ("studies show", "experts say"); claims contradicting the ' "source note; outdated information" ), } diff --git a/src/scribae/prompts/meta.py b/src/scribae/prompts/meta.py index 42ef414..76be9ab 100644 --- a/src/scribae/prompts/meta.py +++ b/src/scribae/prompts/meta.py @@ -128,7 +128,7 @@ def render_brief_context(brief: SeoBrief | None) -> str: f"""\ BriefTitle: {brief.title} PrimaryKeyword: {brief.primary_keyword} - SecondaryKeywords: {', '.join(brief.secondary_keywords)} + SecondaryKeywords: {", ".join(brief.secondary_keywords)} PlannedSearchIntent: {brief.search_intent} PlannedMetaDescription: {brief.meta_description} """ diff --git a/src/scribae/prompts/write.py b/src/scribae/prompts/write.py index 66b8095..9f244de 100644 --- a/src/scribae/prompts/write.py +++ b/src/scribae/prompts/write.py @@ -84,6 +84,7 @@ def build_user_prompt( style_rules=style_rules_text, ) + def build_faq_prompt( *, project: ProjectConfig, diff --git a/src/scribae/translate/model_registry.py b/src/scribae/translate/model_registry.py index 37aae41..9dcc772 100644 --- a/src/scribae/translate/model_registry.py +++ b/src/scribae/translate/model_registry.py @@ -13,9 +13,7 @@ "pt": "por_Latn", } -_NLLB_CODE_ALIASES = { - value.lower(): value for value in NLLB_LANGUAGE_MAP.values() -} | { +_NLLB_CODE_ALIASES = {value.lower(): value for value in NLLB_LANGUAGE_MAP.values()} | { value.lower().replace("_", "-"): value for value in NLLB_LANGUAGE_MAP.values() } @@ -85,9 +83,7 @@ def nllb_lang_code(self, lang: str) -> str: if mapped: return mapped supported = ", ".join(sorted(NLLB_LANGUAGE_MAP)) - raise ValueError( - f"Unsupported language code '{lang}' for NLLB fallback. Supported ISO codes: {supported}." - ) + raise ValueError(f"Unsupported language code '{lang}' for NLLB fallback. Supported ISO codes: {supported}.") def nllb_spec(self) -> ModelSpec: return ModelSpec( diff --git a/src/scribae/translate/postedit.py b/src/scribae/translate/postedit.py index f905ef6..fb5141e 100644 --- a/src/scribae/translate/postedit.py +++ b/src/scribae/translate/postedit.py @@ -79,9 +79,7 @@ def post_edit( trimmed_source, trimmed_mt = self._trim_inputs(source_text, mt_draft) prompt = self._build_prompt(trimmed_source, trimmed_mt, cfg, protected.placeholders.keys(), strict=strict) if self.max_chars is not None and len(prompt) > self.max_chars: - raise PostEditAborted( - f"post-edit prompt length {len(prompt)} exceeds limit of {self.max_chars} characters" - ) + raise PostEditAborted(f"post-edit prompt length {len(prompt)} exceeds limit of {self.max_chars} characters") try: result = self._invoke(prompt, protected.placeholders.keys(), mt_draft, expected_lang=cfg.target_lang) except PostEditAborted: @@ -337,9 +335,7 @@ def _build_prompt( "You are a post-editor improving a machine translation with minimal edits.\n" f"Source language: {cfg.source_lang}; Target language: {cfg.target_lang}.\n" f"Tone: register={tone.register}, audience={tone.audience}.\n" - "[CONSTRAINTS]\n" - + "\n".join(f"- {line}" for line in constraints) - + "\n[GLOSSARY]\n" + "[CONSTRAINTS]\n" + "\n".join(f"- {line}" for line in constraints) + "\n[GLOSSARY]\n" f"{glossary_section}\n" "[INPUT] SOURCE TEXT:\n" f"{source_text}\n\n" @@ -377,10 +373,10 @@ def _restore_markdown_structure(self, mt_draft: str, edited: str) -> str: # Patterns for Markdown prefixes we want to restore # Blockquote pattern: one or more '>' possibly with spaces - blockquote_pattern = re.compile(r'^((?:>\s*)+)') + blockquote_pattern = re.compile(r"^((?:>\s*)+)") # List marker pattern: optional whitespace + marker + space # Supports: -, *, +, and numbered lists like 1., 2., 10. - list_marker_pattern = re.compile(r'^(\s*(?:[-*+]|\d+\.)\s+)') + list_marker_pattern = re.compile(r"^(\s*(?:[-*+]|\d+\.)\s+)") restored_lines = [] for i, edited_line in enumerate(edited_lines): @@ -413,7 +409,7 @@ def _restore_markdown_structure(self, mt_draft: str, edited: str) -> str: restored_lines.append(restored_line) - return '\n'.join(restored_lines) + return "\n".join(restored_lines) def _apply_glossary(self, text: str, glossary: dict[str, str]) -> str: translation = text diff --git a/src/scribae/translate_cli.py b/src/scribae/translate_cli.py index d1c5c8f..f17a58c 100644 --- a/src/scribae/translate_cli.py +++ b/src/scribae/translate_cli.py @@ -92,9 +92,7 @@ def _debug_path(base: Path) -> Path: def _validate_language_code(value: str, *, label: str) -> None: cleaned = value.strip() if not cleaned or not _LANGUAGE_CODE_RE.fullmatch(cleaned): - raise typer.BadParameter( - f"{label} must be a language code like en or eng_Latn; received '{value}'." - ) + raise typer.BadParameter(f"{label} must be a language code like en or eng_Latn; received '{value}'.") @translate_app.command() @@ -102,10 +100,7 @@ def translate( src: str | None = typer.Option( # noqa: B008 None, "--src", - help=( - "Source language code, e.g. en or eng_Latn (NLLB). " - "Required unless provided via --project." - ), + help=("Source language code, e.g. en or eng_Latn (NLLB). Required unless provided via --project."), ), tgt: str = typer.Option( # noqa: B008 ..., diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index 93028c1..b8c40ee 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -55,9 +55,7 @@ def __init__(self) -> None: faq_covered=[], faq_missing=[], ), - section_notes=[ - SectionNote(heading="Introduction to Observability", notes=["Add a concrete example."]) - ], + section_notes=[SectionNote(heading="Introduction to Observability", notes=["Add a concrete example."])], evidence_gaps=["Add a source for claims about monitoring cadence."], findings=[ FeedbackFinding( @@ -296,7 +294,7 @@ def test_strips_emoji_only_string(self) -> None: assert strip_emojis("🚀✅📚") == "" def test_preserves_special_characters(self) -> None: - assert strip_emojis("Use \"quotes\" and (parens)") == "Use \"quotes\" and (parens)" + assert strip_emojis('Use "quotes" and (parens)') == 'Use "quotes" and (parens)' assert strip_emojis("Items: a, b, c") == "Items: a, b, c" def test_strips_flag_emojis(self) -> None: diff --git a/tests/unit/language_test.py b/tests/unit/language_test.py index 41292a3..2f0f759 100644 --- a/tests/unit/language_test.py +++ b/tests/unit/language_test.py @@ -14,16 +14,12 @@ def test_resolve_language_prefers_flag_and_project_over_note() -> None: metadata = {"lang": "fr"} - resolved = resolve_output_language( - flag_language="de", project_language="es", metadata=metadata, text="bonjour" - ) + resolved = resolve_output_language(flag_language="de", project_language="es", metadata=metadata, text="bonjour") assert resolved.language == "de" assert resolved.source == "flag" - project_first = resolve_output_language( - flag_language=None, project_language="es", metadata=metadata, text="hola" - ) + project_first = resolve_output_language(flag_language=None, project_language="es", metadata=metadata, text="hola") assert project_first.language == "es" assert project_first.source == "project" @@ -32,9 +28,7 @@ def test_resolve_language_prefers_flag_and_project_over_note() -> None: def test_resolve_language_uses_frontmatter_before_detection() -> None: metadata = {"language": "pt-BR"} - resolved = resolve_output_language( - flag_language=None, project_language="", metadata=metadata, text="conteudo" - ) + resolved = resolve_output_language(flag_language=None, project_language="", metadata=metadata, text="conteudo") assert resolved.language == "pt-BR" assert resolved.source == "frontmatter" diff --git a/tests/unit/translate_cli_test.py b/tests/unit/translate_cli_test.py index 890a63e..737b1f4 100644 --- a/tests/unit/translate_cli_test.py +++ b/tests/unit/translate_cli_test.py @@ -95,9 +95,7 @@ def translate(self, text: str, cfg: Any) -> str: def _patch_loader(monkeypatch: pytest.MonkeyPatch, projects_dir: Path) -> None: loader = project_module.load_project - monkeypatch.setattr( - "scribae.translate_cli.load_project", lambda name: loader(name, base_dir=projects_dir) - ) + monkeypatch.setattr("scribae.translate_cli.load_project", lambda name: loader(name, base_dir=projects_dir)) def test_translate_requires_src_without_project( @@ -129,7 +127,7 @@ def test_translate_uses_project_defaults( projects_dir = tmp_path / "projects" projects_dir.mkdir() (projects_dir / "demo.yaml").write_text( - "language: fr\n" "tone: academic\n" "audience: researchers\n", + "language: fr\ntone: academic\naudience: researchers\n", encoding="utf-8", ) _patch_loader(monkeypatch, projects_dir) @@ -164,7 +162,7 @@ def test_translate_flags_override_project_defaults( projects_dir = tmp_path / "projects" projects_dir.mkdir() (projects_dir / "demo.yaml").write_text( - "language: fr\n" "tone: academic\n" "audience: researchers\n", + "language: fr\ntone: academic\naudience: researchers\n", encoding="utf-8", ) _patch_loader(monkeypatch, projects_dir) From ea458eacfa451d78f1386ed487790bc8219bd979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 16:08:43 +0100 Subject: [PATCH 09/11] fix: handle torch optional import for mypy Add torch to mypy ignore_missing_imports override and use cast instead of type: ignore to avoid unused-ignore errors in CI. Co-Authored-By: Claude Opus 4.5 --- pyproject.toml | 2 +- src/scribae/translate/mt.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8ea0b3e..0e6ce52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,7 @@ disallow_untyped_defs = true warn_unused_ignores = true [[tool.mypy.overrides]] -module = ["frontmatter", "yaml", "tomli"] +module = ["frontmatter", "yaml", "tomli", "torch"] ignore_missing_imports = true [tool.pytest.ini_options] diff --git a/src/scribae/translate/mt.py b/src/scribae/translate/mt.py index 4175a51..36d7632 100644 --- a/src/scribae/translate/mt.py +++ b/src/scribae/translate/mt.py @@ -2,7 +2,7 @@ from collections.abc import Iterable from types import ModuleType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, cast from .model_registry import ModelRegistry, RouteStep @@ -62,14 +62,14 @@ def _pipeline_for(self, model_id: str) -> Pipeline: def _require_torch(self) -> ModuleType: try: - import torch # type: ignore[import-not-found] + import torch except ImportError as exc: raise RuntimeError( "Translation requires PyTorch. Install it with " "`uv sync --extra translation` or " "`uv sync --extra translation --index pytorch-cpu` (CPU-only)." ) from exc - return torch # type: ignore[no-any-return] + return cast(ModuleType, torch) def prefetch(self, steps: Iterable[RouteStep]) -> None: """Warm translation pipelines for the provided route steps.""" From 1a1863653f26396b09310643f8dba826c6492c21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 16:16:37 +0100 Subject: [PATCH 10/11] fix: require --all-extras for development (torch needed for mypy) Remove torch-specific mypy workarounds since torch is now required for development. Update documentation to clarify that --all-extras is mandatory for mypy to pass. Co-Authored-By: Claude Opus 4.5 --- AGENTS.md | 4 ++-- CLAUDE.md | 4 ++-- README.md | 2 ++ pyproject.toml | 2 +- src/scribae/translate/mt.py | 4 ++-- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5a94df3..64bcc99 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,14 +11,14 @@ Scribae is a CLI tool that transforms local Markdown notes into structured SEO c ## Quick Reference ```bash -uv sync --locked --all-extras --dev # Install dependencies +uv sync --locked --all-extras --dev # Required: install all dependencies including PyTorch uv run scribae --help # Run CLI uv run ruff check # Lint (auto-fix: --fix) uv run mypy # Type check uv run pytest # Run tests ``` -**Important:** Always run tests, mypy, and ruff at the end of your task and fix any issues. +**Important:** The `--all-extras` flag is required for development (PyTorch needed for mypy). Always run tests, mypy, and ruff at the end of your task and fix any issues. ## Project Structure diff --git a/CLAUDE.md b/CLAUDE.md index 83f4227..3c93750 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ Scribae is a CLI tool that transforms local Markdown notes into structured SEO c ## Build & Development Commands ```bash -uv sync --locked --all-extras --dev # Install dependencies (includes PyTorch with CUDA) +uv sync --locked --all-extras --dev # Required: install all dependencies including PyTorch uv run scribae --help # Run CLI uv run ruff check # Lint (auto-fix: --fix) uv run mypy # Type check @@ -25,7 +25,7 @@ For a lighter install (~200MB vs ~2GB), use the CPU-only PyTorch index: uv sync --locked --all-extras --dev --index pytorch-cpu ``` -**Important:** Always run tests, mypy, and ruff at the end of your task and fix any issues. +**Important:** The `--all-extras` flag is required for development. It installs PyTorch which is needed for mypy to pass. Always run tests, mypy, and ruff at the end of your task and fix any issues. ## Architecture diff --git a/README.md b/README.md index 5b73197..e0f7cc6 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ Options: ### Setup +The `--all-extras` flag is required for development as it installs PyTorch, which is needed for mypy type checking to pass. + ```bash git clone https://github.com/fmueller/scribae.git cd scribae diff --git a/pyproject.toml b/pyproject.toml index 0e6ce52..8ea0b3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,7 @@ disallow_untyped_defs = true warn_unused_ignores = true [[tool.mypy.overrides]] -module = ["frontmatter", "yaml", "tomli", "torch"] +module = ["frontmatter", "yaml", "tomli"] ignore_missing_imports = true [tool.pytest.ini_options] diff --git a/src/scribae/translate/mt.py b/src/scribae/translate/mt.py index 36d7632..97bbc0c 100644 --- a/src/scribae/translate/mt.py +++ b/src/scribae/translate/mt.py @@ -2,7 +2,7 @@ from collections.abc import Iterable from types import ModuleType -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any from .model_registry import ModelRegistry, RouteStep @@ -69,7 +69,7 @@ def _require_torch(self) -> ModuleType: "`uv sync --extra translation` or " "`uv sync --extra translation --index pytorch-cpu` (CPU-only)." ) from exc - return cast(ModuleType, torch) + return torch def prefetch(self, steps: Iterable[RouteStep]) -> None: """Warm translation pipelines for the provided route steps.""" From cbc6071cc06091a30269658d53fdcc866c889df0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20M=C3=BCller?= Date: Fri, 23 Jan 2026 16:57:04 +0100 Subject: [PATCH 11/11] feat: improve feedback section scope prompting - Rename SelectedOutlineRange to SectionsUnderReview for clarity - Omit line entirely when all sections are selected (redundant) - Add explicit instruction for brief alignment checking Co-Authored-By: Claude Opus 4.5 --- src/scribae/prompts/feedback.py | 19 +++++++++++++++++-- tests/unit/feedback_cli_test.py | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index 11ea92a..53abf22 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -90,7 +90,7 @@ class FeedbackPromptBundle: [REVIEW SCOPE] Focus: {focus} Allowed categories: {focus_categories}, other - SelectedOutlineRange: {selected_outline} + {sections_under_review_line} Note: Use "other" only for high severity issues that fall outside the focused categories. [FOCUS CATEGORY DEFINITIONS] @@ -117,6 +117,18 @@ def _format_category_definitions(categories: list[str]) -> str: return "\n".join(lines) if lines else "- none" +def _format_sections_under_review(selected_outline: list[str], total_outline: int) -> str: + """Format SectionsUnderReview line, or empty string if all sections are selected.""" + if len(selected_outline) >= total_outline: + # All sections selected - omit the line entirely (redundant with Outline above) + return "" + headings = ", ".join(selected_outline) + return ( + f"SectionsUnderReview: {headings}\n" + "Only evaluate outline_covered and outline_missing for these sections.\n" + ) + + def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackPromptBundle: """Render the system and user prompts for the feedback agent.""" focus_categories = context.focus or list(CATEGORY_DEFINITIONS.keys()) @@ -158,6 +170,9 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm ensure_ascii=False, ) draft_sections_json = json.dumps(context.selected_sections, indent=2, ensure_ascii=False) + sections_under_review_line = _format_sections_under_review( + context.selected_outline, len(context.brief.outline) + ) prompt = FEEDBACK_USER_PROMPT_TEMPLATE.format( site_name=context.project["site_name"], domain=context.project["domain"], @@ -173,7 +188,7 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm faq=" | ".join(faq_entries), focus=focus_label or "all (seo, structure, clarity, style, evidence)", focus_categories=focus_label or "seo, structure, clarity, style, evidence", - selected_outline=", ".join(context.selected_outline) or "(all)", + sections_under_review_line=sections_under_review_line, draft_sections_json=draft_sections_json, note_excerpt=context.note_excerpt or "No source note provided.", schema_json=schema_json, diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index b8c40ee..8b9c9dc 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -153,10 +153,11 @@ def test_feedback_dry_run_prints_prompt(body_path: Path, brief_path: Path) -> No assert "[REQUIRED JSON SCHEMA]" in result.stdout -def test_feedback_section_range_selects_outline( +def test_feedback_section_range_shows_sections_under_review( body_multi_section_path: Path, brief_path: Path, ) -> None: + """When --section is specified, prompt shows SectionsUnderReview with selected headings.""" result = runner.invoke( app, [ @@ -172,7 +173,34 @@ def test_feedback_section_range_selects_outline( ) assert result.exit_code == 0 - assert "SelectedOutlineRange: Introduction to Observability, Logging Foundations" in result.stdout + # Renamed from SelectedOutlineRange to SectionsUnderReview + assert "SectionsUnderReview: Introduction to Observability, Logging Foundations" in result.stdout + # Explicit instruction for brief alignment checking + assert "Only evaluate outline_covered and outline_missing for these sections" in result.stdout + + +def test_feedback_all_sections_omits_sections_under_review( + body_path: Path, + brief_path: Path, +) -> None: + """When no --section is specified (all sections), SectionsUnderReview is omitted.""" + result = runner.invoke( + app, + [ + "feedback", + "--body", + str(body_path), + "--brief", + str(brief_path), + "--dry-run", + ], + ) + + assert result.exit_code == 0 + # SectionsUnderReview should NOT appear when reviewing all sections + assert "SectionsUnderReview:" not in result.stdout + # The old name should also not appear + assert "SelectedOutlineRange:" not in result.stdout def test_feedback_focus_multiple_categories_limits_prompt(