diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 78c67b0..87cff18 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,15 @@ repos: args: [ --fix ] - id: ruff-format + - repo: local + hooks: + - id: mypy + name: mypy + entry: uv run mypy + language: system + types: [ python ] + pass_filenames: false + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: diff --git a/AGENTS.md b/AGENTS.md index 5a94df3..64bcc99 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,14 +11,14 @@ Scribae is a CLI tool that transforms local Markdown notes into structured SEO c ## Quick Reference ```bash -uv sync --locked --all-extras --dev # Install dependencies +uv sync --locked --all-extras --dev # Required: install all dependencies including PyTorch uv run scribae --help # Run CLI uv run ruff check # Lint (auto-fix: --fix) uv run mypy # Type check uv run pytest # Run tests ``` -**Important:** Always run tests, mypy, and ruff at the end of your task and fix any issues. +**Important:** The `--all-extras` flag is required for development (PyTorch needed for mypy). Always run tests, mypy, and ruff at the end of your task and fix any issues. ## Project Structure diff --git a/CLAUDE.md b/CLAUDE.md index 83f4227..3c93750 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ Scribae is a CLI tool that transforms local Markdown notes into structured SEO c ## Build & Development Commands ```bash -uv sync --locked --all-extras --dev # Install dependencies (includes PyTorch with CUDA) +uv sync --locked --all-extras --dev # Required: install all dependencies including PyTorch uv run scribae --help # Run CLI uv run ruff check # Lint (auto-fix: --fix) uv run mypy # Type check @@ -25,7 +25,7 @@ For a lighter install (~200MB vs ~2GB), use the CPU-only PyTorch index: uv sync --locked --all-extras --dev --index pytorch-cpu ``` -**Important:** Always run tests, mypy, and ruff at the end of your task and fix any issues. +**Important:** The `--all-extras` flag is required for development. It installs PyTorch which is needed for mypy to pass. Always run tests, mypy, and ruff at the end of your task and fix any issues. ## Architecture diff --git a/README.md b/README.md index 5b73197..e0f7cc6 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,8 @@ Options: ### Setup +The `--all-extras` flag is required for development as it installs PyTorch, which is needed for mypy type checking to pass. + ```bash git clone https://github.com/fmueller/scribae.git cd scribae diff --git a/src/scribae/brief.py b/src/scribae/brief.py index 24fa47e..9e07aaa 100644 --- a/src/scribae/brief.py +++ b/src/scribae/brief.py @@ -36,6 +36,7 @@ "save_prompt_artifacts", ] + class BriefingError(Exception): """Raised when a brief cannot be generated.""" @@ -409,9 +410,7 @@ def _select_idea( if len(ideas.ideas) == 1: return ideas.ideas[0] - raise BriefValidationError( - "Select an idea with --idea (id or 1-based index), or set idea_id in note frontmatter." - ) + raise BriefValidationError("Select an idea with --idea (id or 1-based index), or set idea_id in note frontmatter.") def _metadata_idea_id(metadata: dict[str, Any]) -> str | None: diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index b9de917..cba81a4 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -19,25 +19,26 @@ from .llm import LLM_OUTPUT_RETRIES, LLM_TIMEOUT_SECONDS, OpenAISettings, apply_optional_settings, make_model from .project import ProjectConfig from .prompts.feedback import FEEDBACK_SYSTEM_PROMPT, FeedbackPromptBundle, build_feedback_prompt_bundle +from .prompts.feedback_categories import CATEGORY_DEFINITIONS # Pattern to match emoji characters across common Unicode ranges _EMOJI_PATTERN = re.compile( "[" - "\U0001F600-\U0001F64F" # emoticons - "\U0001F300-\U0001F5FF" # symbols & pictographs - "\U0001F680-\U0001F6FF" # transport & map symbols - "\U0001F1E0-\U0001F1FF" # flags - "\U00002700-\U000027BF" # dingbats - "\U0001F900-\U0001F9FF" # supplemental symbols & pictographs - "\U0001FA00-\U0001FA6F" # chess symbols, extended-A - "\U0001FA70-\U0001FAFF" # symbols & pictographs extended-A - "\U00002600-\U000026FF" # misc symbols - "\U0001F700-\U0001F77F" # alchemical symbols - "\U0001F780-\U0001F7FF" # geometric shapes extended - "\U0001F800-\U0001F8FF" # supplemental arrows-C - "\U0001F3FB-\U0001F3FF" # skin tone modifiers - "\uFE0F" # variation selector-16 (emoji presentation) - "\u200D" # zero-width joiner (used in combined emojis) + "\U0001f600-\U0001f64f" # emoticons + "\U0001f300-\U0001f5ff" # symbols & pictographs + "\U0001f680-\U0001f6ff" # transport & map symbols + "\U0001f1e0-\U0001f1ff" # flags + "\U00002700-\U000027bf" # dingbats + "\U0001f900-\U0001f9ff" # supplemental symbols & pictographs + "\U0001fa00-\U0001fa6f" # chess symbols, extended-A + "\U0001fa70-\U0001faff" # symbols & pictographs extended-A + "\U00002600-\U000026ff" # misc symbols + "\U0001f700-\U0001f77f" # alchemical symbols + "\U0001f780-\U0001f7ff" # geometric shapes extended + "\U0001f800-\U0001f8ff" # supplemental arrows-C + "\U0001f3fb-\U0001f3ff" # skin tone modifiers + "\ufe0f" # variation selector-16 (emoji presentation) + "\u200d" # zero-width joiner (used in combined emojis) "]+", flags=re.UNICODE, ) @@ -232,13 +233,22 @@ class FeedbackFocus(str): STYLE = "style" EVIDENCE = "evidence" + ALLOWED: frozenset[str] = frozenset(CATEGORY_DEFINITIONS.keys()) + @classmethod - def from_raw(cls, value: str) -> FeedbackFocus: - lowered = value.lower().strip() - allowed = {cls.SEO, cls.STRUCTURE, cls.CLARITY, cls.STYLE, cls.EVIDENCE} - if lowered not in allowed: - raise FeedbackValidationError("--focus must be seo, structure, clarity, style, or evidence.") - return cls(lowered) + def parse_list(cls, value: str) -> list[str]: + parts = [item.strip() for item in value.split(",") if item.strip()] + if not parts: + raise FeedbackValidationError("--focus must include at least one category.") + normalized: list[str] = [] + for part in parts: + lowered = part.lower() + if lowered not in cls.ALLOWED: + allowed_list = ", ".join(sorted(cls.ALLOWED)) + raise FeedbackValidationError(f"--focus must be one of: {allowed_list}.") + if lowered not in normalized: + normalized.append(lowered) + return normalized @dataclass(frozen=True) @@ -263,7 +273,7 @@ class FeedbackContext: brief: SeoBrief project: ProjectConfig note: NoteDetails | None - focus: str | None + focus: list[str] | None language: str selected_outline: list[str] selected_sections: list[BodySection] @@ -281,7 +291,7 @@ def prepare_context( project: ProjectConfig, note_path: Path | None = None, language: str | None = None, - focus: str | None = None, + focus: list[str] | None = None, section_range: tuple[int, int] | None = None, max_body_chars: int = 12000, max_note_chars: int = 6000, @@ -360,9 +370,7 @@ def generate_feedback_report( resolved_settings = OpenAISettings.from_env() llm_agent: Agent[None, FeedbackReport] = ( - agent - if agent is not None - else _create_agent(model_name, temperature=temperature, top_p=top_p, seed=seed) + agent if agent is not None else _create_agent(model_name, temperature=temperature, top_p=top_p, seed=seed) ) _report(reporter, f"Calling model '{model_name}' via {resolved_settings.base_url}") @@ -392,6 +400,8 @@ def generate_feedback_report( except Exception as exc: # pragma: no cover - surfaced to CLI raise FeedbackLLMError(f"LLM request failed: {exc}") from exc + # Remap any out-of-scope categories to "other" + report = _normalize_finding_categories(report, context.focus) return report @@ -437,9 +447,7 @@ def render_markdown(report: FeedbackReport) -> str: if report.findings: for finding in report.findings: location = _format_location(finding.location) - sections.append( - f"- **{finding.severity.upper()}** [{finding.category}] {finding.message}{location}" - ) + sections.append(f"- **{finding.severity.upper()}** [{finding.category}] {finding.message}{location}") else: sections.extend(_render_list([])) sections.append("") @@ -509,7 +517,7 @@ class _FeedbackPromptContext: note_excerpt: str | None project: ProjectConfig language: str - focus: str | None + focus: list[str] | None selected_outline: list[str] selected_sections: list[dict[str, str]] @@ -556,6 +564,38 @@ def _feedback_language_text(report: FeedbackReport) -> str: return "\n".join([issue_text, strength_text, findings, checklist, section_notes]).strip() +def _normalize_finding_categories(report: FeedbackReport, focus: list[str] | None) -> FeedbackReport: + """Remap any finding categories outside the focus scope to 'other'. + + If focus is None (all categories), no remapping is performed. + """ + if focus is None: + return report + + allowed = set(focus) | {"other"} + needs_remap = any(f.category not in allowed for f in report.findings) + if not needs_remap: + return report + + remapped_findings = [ + FeedbackFinding( + severity=f.severity, + category=f.category if f.category in allowed else "other", + message=f.message, + location=f.location, + ) + for f in report.findings + ] + return FeedbackReport( + summary=report.summary, + brief_alignment=report.brief_alignment, + section_notes=report.section_notes, + evidence_gaps=report.evidence_gaps, + findings=remapped_findings, + checklist=report.checklist, + ) + + def _load_body(body_path: Path, *, max_chars: int) -> BodyDocument: try: post = frontmatter.load(body_path) diff --git a/src/scribae/feedback_cli.py b/src/scribae/feedback_cli.py index c5d07b1..c0921c7 100644 --- a/src/scribae/feedback_cli.py +++ b/src/scribae/feedback_cli.py @@ -53,7 +53,7 @@ def feedback_command( focus: str | None = typer.Option( # noqa: B008 None, "--focus", - help="Narrow the review scope: seo|structure|clarity|style|evidence.", + help="Narrow the review scope (comma-separated): seo|structure|clarity|style|evidence.", ), output_format: str = typer.Option( # noqa: B008 FeedbackFormat.MARKDOWN, @@ -187,7 +187,7 @@ def feedback_command( focus_value = None if focus: try: - focus_value = str(FeedbackFocus.from_raw(focus)) + focus_value = FeedbackFocus.parse_list(focus) except FeedbackValidationError as exc: typer.secho(str(exc), err=True, fg=typer.colors.RED) raise typer.Exit(exc.exit_code) from exc diff --git a/src/scribae/idea.py b/src/scribae/idea.py index acad883..c3e59d2 100644 --- a/src/scribae/idea.py +++ b/src/scribae/idea.py @@ -134,9 +134,7 @@ def prepare_context( ) _report(reporter, "Prepared idea-generation prompt.") - return IdeaContext( - note=note, project=project, prompts=prompts, language=language_resolution.language - ) + return IdeaContext(note=note, project=project, prompts=prompts, language=language_resolution.language) def generate_ideas( @@ -216,9 +214,7 @@ def save_prompt_artifacts( prompt_path = destination / f"{stamp}-{slug}-ideas.prompt.txt" note_path = destination / f"{stamp}-note.txt" - prompt_payload = ( - f"SYSTEM PROMPT:\n{context.prompts.system_prompt}\n\nUSER PROMPT:\n{context.prompts.user_prompt}\n" - ) + prompt_payload = f"SYSTEM PROMPT:\n{context.prompts.system_prompt}\n\nUSER PROMPT:\n{context.prompts.user_prompt}\n" prompt_path.write_text(prompt_payload, encoding="utf-8") note_path.write_text(context.note.body, encoding="utf-8") @@ -248,9 +244,7 @@ def _create_agent( def _idea_language_text(ideas: IdeaList) -> str: - return "\n".join( - f"{item.title} {item.description} {item.why}" for item in ideas.ideas - ) + return "\n".join(f"{item.title} {item.description} {item.why}" for item in ideas.ideas) def _invoke_agent(agent: Agent[None, IdeaList], prompt: str, *, timeout_seconds: float) -> IdeaList: diff --git a/src/scribae/language.py b/src/scribae/language.py index fb5f9e5..9466d94 100644 --- a/src/scribae/language.py +++ b/src/scribae/language.py @@ -106,8 +106,7 @@ def ensure_language_output( def _append_language_correction(prompt: str, expected_language: str) -> str: correction = ( - "\n\n[LANGUAGE CORRECTION]\n" - f"Regenerate the full response strictly in language code '{expected_language}'." + f"\n\n[LANGUAGE CORRECTION]\nRegenerate the full response strictly in language code '{expected_language}'." ) return f"{prompt}{correction}" diff --git a/src/scribae/llm.py b/src/scribae/llm.py index 6fb8fb9..2f771fb 100644 --- a/src/scribae/llm.py +++ b/src/scribae/llm.py @@ -35,8 +35,9 @@ def configure_environment(self) -> None: os.environ["OPENAI_API_KEY"] = self.api_key -def make_model(model_name: str, *, model_settings: ModelSettings, - settings: OpenAISettings | None = None) -> OpenAIChatModel: +def make_model( + model_name: str, *, model_settings: ModelSettings, settings: OpenAISettings | None = None +) -> OpenAIChatModel: """Return an OpenAI-compatible model configured for local/remote endpoints.""" resolved_settings = settings or OpenAISettings.from_env() provider = OpenAIProvider(base_url=resolved_settings.base_url, api_key=resolved_settings.api_key) diff --git a/src/scribae/main.py b/src/scribae/main.py index 7b42e03..f1657d2 100644 --- a/src/scribae/main.py +++ b/src/scribae/main.py @@ -25,6 +25,7 @@ def app_callback() -> None: """Root Scribae CLI callback.""" + app.command("idea", help="Brainstorm article ideas from a note with project-aware guidance.")(idea_command) app.command( "brief", diff --git a/src/scribae/meta.py b/src/scribae/meta.py index bd23031..27cc116 100644 --- a/src/scribae/meta.py +++ b/src/scribae/meta.py @@ -253,8 +253,7 @@ def generate_metadata( _report( reporter, - f"Calling model '{model_name}' via {resolved_settings.base_url}" - + (f" (reason: {reason})" if reason else ""), + f"Calling model '{model_name}' via {resolved_settings.base_url}" + (f" (reason: {reason})" if reason else ""), ) try: diff --git a/src/scribae/project.py b/src/scribae/project.py index fd19bbe..b3fddde 100644 --- a/src/scribae/project.py +++ b/src/scribae/project.py @@ -86,9 +86,7 @@ def _resolve_project_path(name: str, *, base_dir: Path | None = None) -> Path: if resolved.exists(): return resolved - raise FileNotFoundError( - f"Project config {search_dir / f'{name}.yaml'} or {search_dir / f'{name}.yml'} not found" - ) + raise FileNotFoundError(f"Project config {search_dir / f'{name}.yaml'} or {search_dir / f'{name}.yml'} not found") def _merge_with_defaults(data: Mapping[str, Any]) -> ProjectConfig: diff --git a/src/scribae/prompts/feedback.py b/src/scribae/prompts/feedback.py index 644b29b..53abf22 100644 --- a/src/scribae/prompts/feedback.py +++ b/src/scribae/prompts/feedback.py @@ -8,6 +8,8 @@ from scribae.brief import SeoBrief from scribae.project import ProjectConfig +from .feedback_categories import CATEGORY_DEFINITIONS + class FeedbackPromptBody(Protocol): @property @@ -31,7 +33,7 @@ def project(self) -> ProjectConfig: ... def language(self) -> str: ... @property - def focus(self) -> str | None: ... + def focus(self) -> list[str] | None: ... @property def selected_outline(self) -> list[str]: ... @@ -61,30 +63,9 @@ class FeedbackPromptBundle: - Be conservative about facts. If a claim is not supported by the provided note, flag it as needing evidence. - If a field is empty, output an empty array ([]) or empty string, not null. - Use consistent severity labels: low | medium | high. - - Use consistent categories (definitions below). + - ONLY use category values listed in the schema. DO NOT use any other category values. + - Use "other" for critical issues outside the focus scope. - Do not use emojis or special symbols in the output. - - Categories: - - seo: keyword usage and density throughout content; placement in headings and early paragraphs; - primary/secondary keyword balance; search intent alignment; internal linking opportunities; - content depth for keyword competitiveness - - structure: heading hierarchy; section organization and alignment with brief outline; logical flow - and transitions between sections; paragraph length; intro/conclusion quality; scannability - (appropriate use of lists, subheadings for long sections) - - clarity: confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; - sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences - - style: tone consistency; voice; wordiness and filler phrases; audience appropriateness; - repetitive phrasing or word choices; clichés; formality level matching project tone; sentence - variety - - evidence: unsupported claims; missing citations; statements needing fact-checking; statistics - without sources; vague attributions ("studies show", "experts say"); claims contradicting the - source note; outdated information - - other: issues not fitting the above categories - - Focus behavior: - - When Focus is "all", review the draft across all categories with balanced attention. - - When Focus is a specific category, prioritize findings in that category but still report - critical (high severity) issues from other categories. """ ).strip() @@ -108,7 +89,12 @@ class FeedbackPromptBundle: [REVIEW SCOPE] Focus: {focus} - SelectedOutlineRange: {selected_outline} + Allowed categories: {focus_categories}, other + {sections_under_review_line} + Note: Use "other" only for high severity issues that fall outside the focused categories. + + [FOCUS CATEGORY DEFINITIONS] + {category_definitions} [DRAFT SECTIONS] The following sections are extracted from the draft for review: @@ -126,10 +112,31 @@ class FeedbackPromptBundle: ).strip() +def _format_category_definitions(categories: list[str]) -> str: + lines = [f"- {category}: {CATEGORY_DEFINITIONS[category]}" for category in categories] + return "\n".join(lines) if lines else "- none" + + +def _format_sections_under_review(selected_outline: list[str], total_outline: int) -> str: + """Format SectionsUnderReview line, or empty string if all sections are selected.""" + if len(selected_outline) >= total_outline: + # All sections selected - omit the line entirely (redundant with Outline above) + return "" + headings = ", ".join(selected_outline) + return ( + f"SectionsUnderReview: {headings}\n" + "Only evaluate outline_covered and outline_missing for these sections.\n" + ) + + def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackPromptBundle: """Render the system and user prompts for the feedback agent.""" + focus_categories = context.focus or list(CATEGORY_DEFINITIONS.keys()) + focus_label = ", ".join(focus_categories) project_keywords = ", ".join(context.project.get("keywords") or []) or "none" faq_entries = [f"{item.question} — {item.answer}" for item in context.brief.faq] + # Build category enum for schema: include selected categories + "other" for critical overrides + category_enum = "|".join(focus_categories + ["other"]) schema_json = json.dumps( { "summary": {"issues": ["string"], "strengths": ["string"]}, @@ -152,7 +159,7 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm "findings": [ { "severity": "low|medium|high", - "category": "seo|structure|clarity|style|evidence|other", + "category": category_enum, "message": "string", "location": {"heading": "string", "paragraph_index": 1}, } @@ -163,6 +170,9 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm ensure_ascii=False, ) draft_sections_json = json.dumps(context.selected_sections, indent=2, ensure_ascii=False) + sections_under_review_line = _format_sections_under_review( + context.selected_outline, len(context.brief.outline) + ) prompt = FEEDBACK_USER_PROMPT_TEMPLATE.format( site_name=context.project["site_name"], domain=context.project["domain"], @@ -176,11 +186,13 @@ def build_feedback_prompt_bundle(context: FeedbackPromptContext) -> FeedbackProm search_intent=context.brief.search_intent, outline=" | ".join(context.brief.outline), faq=" | ".join(faq_entries), - focus=context.focus or "all (seo, structure, clarity, style, evidence)", - selected_outline=", ".join(context.selected_outline) or "(all)", + focus=focus_label or "all (seo, structure, clarity, style, evidence)", + focus_categories=focus_label or "seo, structure, clarity, style, evidence", + sections_under_review_line=sections_under_review_line, draft_sections_json=draft_sections_json, note_excerpt=context.note_excerpt or "No source note provided.", schema_json=schema_json, + category_definitions=_format_category_definitions(focus_categories), ) return FeedbackPromptBundle(system_prompt=FEEDBACK_SYSTEM_PROMPT, user_prompt=prompt) diff --git a/src/scribae/prompts/feedback_categories.py b/src/scribae/prompts/feedback_categories.py new file mode 100644 index 0000000..0984255 --- /dev/null +++ b/src/scribae/prompts/feedback_categories.py @@ -0,0 +1,30 @@ +"""Feedback category definitions shared between feedback.py and prompts/feedback.py.""" + +from __future__ import annotations + +CATEGORY_DEFINITIONS: dict[str, str] = { + "seo": ( + "keyword usage and density throughout content; placement in headings and early paragraphs; " + "primary/secondary keyword balance; search intent alignment; internal linking opportunities; " + "content depth for keyword competitiveness" + ), + "structure": ( + "heading hierarchy; section organization and alignment with brief outline; logical flow " + "and transitions between sections; paragraph length; intro/conclusion quality; scannability " + "(appropriate use of lists, subheadings for long sections)" + ), + "clarity": ( + "confusing sentences; ambiguous references; unexplained jargon or acronyms; readability; " + "sentence length variation; passive voice overuse; complex nested clauses; clear topic sentences" + ), + "style": ( + "tone consistency; voice; wordiness and filler phrases; audience appropriateness; " + "repetitive phrasing or word choices; clichés; formality level matching project tone; sentence " + "variety" + ), + "evidence": ( + "unsupported claims; missing citations; statements needing fact-checking; statistics " + 'without sources; vague attributions ("studies show", "experts say"); claims contradicting the ' + "source note; outdated information" + ), +} diff --git a/src/scribae/prompts/meta.py b/src/scribae/prompts/meta.py index 42ef414..76be9ab 100644 --- a/src/scribae/prompts/meta.py +++ b/src/scribae/prompts/meta.py @@ -128,7 +128,7 @@ def render_brief_context(brief: SeoBrief | None) -> str: f"""\ BriefTitle: {brief.title} PrimaryKeyword: {brief.primary_keyword} - SecondaryKeywords: {', '.join(brief.secondary_keywords)} + SecondaryKeywords: {", ".join(brief.secondary_keywords)} PlannedSearchIntent: {brief.search_intent} PlannedMetaDescription: {brief.meta_description} """ diff --git a/src/scribae/prompts/write.py b/src/scribae/prompts/write.py index 66b8095..9f244de 100644 --- a/src/scribae/prompts/write.py +++ b/src/scribae/prompts/write.py @@ -84,6 +84,7 @@ def build_user_prompt( style_rules=style_rules_text, ) + def build_faq_prompt( *, project: ProjectConfig, diff --git a/src/scribae/translate/model_registry.py b/src/scribae/translate/model_registry.py index 37aae41..9dcc772 100644 --- a/src/scribae/translate/model_registry.py +++ b/src/scribae/translate/model_registry.py @@ -13,9 +13,7 @@ "pt": "por_Latn", } -_NLLB_CODE_ALIASES = { - value.lower(): value for value in NLLB_LANGUAGE_MAP.values() -} | { +_NLLB_CODE_ALIASES = {value.lower(): value for value in NLLB_LANGUAGE_MAP.values()} | { value.lower().replace("_", "-"): value for value in NLLB_LANGUAGE_MAP.values() } @@ -85,9 +83,7 @@ def nllb_lang_code(self, lang: str) -> str: if mapped: return mapped supported = ", ".join(sorted(NLLB_LANGUAGE_MAP)) - raise ValueError( - f"Unsupported language code '{lang}' for NLLB fallback. Supported ISO codes: {supported}." - ) + raise ValueError(f"Unsupported language code '{lang}' for NLLB fallback. Supported ISO codes: {supported}.") def nllb_spec(self) -> ModelSpec: return ModelSpec( diff --git a/src/scribae/translate/postedit.py b/src/scribae/translate/postedit.py index f905ef6..fb5141e 100644 --- a/src/scribae/translate/postedit.py +++ b/src/scribae/translate/postedit.py @@ -79,9 +79,7 @@ def post_edit( trimmed_source, trimmed_mt = self._trim_inputs(source_text, mt_draft) prompt = self._build_prompt(trimmed_source, trimmed_mt, cfg, protected.placeholders.keys(), strict=strict) if self.max_chars is not None and len(prompt) > self.max_chars: - raise PostEditAborted( - f"post-edit prompt length {len(prompt)} exceeds limit of {self.max_chars} characters" - ) + raise PostEditAborted(f"post-edit prompt length {len(prompt)} exceeds limit of {self.max_chars} characters") try: result = self._invoke(prompt, protected.placeholders.keys(), mt_draft, expected_lang=cfg.target_lang) except PostEditAborted: @@ -337,9 +335,7 @@ def _build_prompt( "You are a post-editor improving a machine translation with minimal edits.\n" f"Source language: {cfg.source_lang}; Target language: {cfg.target_lang}.\n" f"Tone: register={tone.register}, audience={tone.audience}.\n" - "[CONSTRAINTS]\n" - + "\n".join(f"- {line}" for line in constraints) - + "\n[GLOSSARY]\n" + "[CONSTRAINTS]\n" + "\n".join(f"- {line}" for line in constraints) + "\n[GLOSSARY]\n" f"{glossary_section}\n" "[INPUT] SOURCE TEXT:\n" f"{source_text}\n\n" @@ -377,10 +373,10 @@ def _restore_markdown_structure(self, mt_draft: str, edited: str) -> str: # Patterns for Markdown prefixes we want to restore # Blockquote pattern: one or more '>' possibly with spaces - blockquote_pattern = re.compile(r'^((?:>\s*)+)') + blockquote_pattern = re.compile(r"^((?:>\s*)+)") # List marker pattern: optional whitespace + marker + space # Supports: -, *, +, and numbered lists like 1., 2., 10. - list_marker_pattern = re.compile(r'^(\s*(?:[-*+]|\d+\.)\s+)') + list_marker_pattern = re.compile(r"^(\s*(?:[-*+]|\d+\.)\s+)") restored_lines = [] for i, edited_line in enumerate(edited_lines): @@ -413,7 +409,7 @@ def _restore_markdown_structure(self, mt_draft: str, edited: str) -> str: restored_lines.append(restored_line) - return '\n'.join(restored_lines) + return "\n".join(restored_lines) def _apply_glossary(self, text: str, glossary: dict[str, str]) -> str: translation = text diff --git a/src/scribae/translate_cli.py b/src/scribae/translate_cli.py index d1c5c8f..f17a58c 100644 --- a/src/scribae/translate_cli.py +++ b/src/scribae/translate_cli.py @@ -92,9 +92,7 @@ def _debug_path(base: Path) -> Path: def _validate_language_code(value: str, *, label: str) -> None: cleaned = value.strip() if not cleaned or not _LANGUAGE_CODE_RE.fullmatch(cleaned): - raise typer.BadParameter( - f"{label} must be a language code like en or eng_Latn; received '{value}'." - ) + raise typer.BadParameter(f"{label} must be a language code like en or eng_Latn; received '{value}'.") @translate_app.command() @@ -102,10 +100,7 @@ def translate( src: str | None = typer.Option( # noqa: B008 None, "--src", - help=( - "Source language code, e.g. en or eng_Latn (NLLB). " - "Required unless provided via --project." - ), + help=("Source language code, e.g. en or eng_Latn (NLLB). Required unless provided via --project."), ), tgt: str = typer.Option( # noqa: B008 ..., diff --git a/tests/unit/feedback_cli_test.py b/tests/unit/feedback_cli_test.py index ece11a9..8b9c9dc 100644 --- a/tests/unit/feedback_cli_test.py +++ b/tests/unit/feedback_cli_test.py @@ -2,6 +2,7 @@ import json from pathlib import Path +from typing import Any, cast import pytest from typer.testing import CliRunner @@ -54,9 +55,7 @@ def __init__(self) -> None: faq_covered=[], faq_missing=[], ), - section_notes=[ - SectionNote(heading="Introduction to Observability", notes=["Add a concrete example."]) - ], + section_notes=[SectionNote(heading="Introduction to Observability", notes=["Add a concrete example."])], evidence_gaps=["Add a source for claims about monitoring cadence."], findings=[ FeedbackFinding( @@ -154,10 +153,11 @@ def test_feedback_dry_run_prints_prompt(body_path: Path, brief_path: Path) -> No assert "[REQUIRED JSON SCHEMA]" in result.stdout -def test_feedback_section_range_selects_outline( +def test_feedback_section_range_shows_sections_under_review( body_multi_section_path: Path, brief_path: Path, ) -> None: + """When --section is specified, prompt shows SectionsUnderReview with selected headings.""" result = runner.invoke( app, [ @@ -173,7 +173,62 @@ def test_feedback_section_range_selects_outline( ) assert result.exit_code == 0 - assert "SelectedOutlineRange: Introduction to Observability, Logging Foundations" in result.stdout + # Renamed from SelectedOutlineRange to SectionsUnderReview + assert "SectionsUnderReview: Introduction to Observability, Logging Foundations" in result.stdout + # Explicit instruction for brief alignment checking + assert "Only evaluate outline_covered and outline_missing for these sections" in result.stdout + + +def test_feedback_all_sections_omits_sections_under_review( + body_path: Path, + brief_path: Path, +) -> None: + """When no --section is specified (all sections), SectionsUnderReview is omitted.""" + result = runner.invoke( + app, + [ + "feedback", + "--body", + str(body_path), + "--brief", + str(brief_path), + "--dry-run", + ], + ) + + assert result.exit_code == 0 + # SectionsUnderReview should NOT appear when reviewing all sections + assert "SectionsUnderReview:" not in result.stdout + # The old name should also not appear + assert "SelectedOutlineRange:" not in result.stdout + + +def test_feedback_focus_multiple_categories_limits_prompt( + body_multi_section_path: Path, + brief_path: Path, +) -> None: + result = runner.invoke( + app, + [ + "feedback", + "--body", + str(body_multi_section_path), + "--brief", + str(brief_path), + "--focus", + "seo, clarity", + "--dry-run", + ], + ) + + assert result.exit_code == 0 + assert "Focus: seo, clarity" in result.stdout + assert "Allowed categories: seo, clarity, other" in result.stdout + assert 'Use "other" only for high severity issues' in result.stdout + # Verify category definitions are limited to selected categories + assert "- structure:" not in result.stdout + # Verify JSON schema category enum only lists selected categories + "other" for critical overrides + assert '"category": "seo|clarity|other"' in result.stdout def test_feedback_passes_seed_and_top_p( @@ -267,7 +322,7 @@ def test_strips_emoji_only_string(self) -> None: assert strip_emojis("🚀✅📚") == "" def test_preserves_special_characters(self) -> None: - assert strip_emojis("Use \"quotes\" and (parens)") == "Use \"quotes\" and (parens)" + assert strip_emojis('Use "quotes" and (parens)') == 'Use "quotes" and (parens)' assert strip_emojis("Items: a, b, c") == "Items: a, b, c" def test_strips_flag_emojis(self) -> None: @@ -344,3 +399,59 @@ def test_findings_message_emojis_stripped(self) -> None: checklist=[], ) assert report.findings[0].message == "Claim needs citation" + + +class TestNormalizeFindingCategories: + def _make_report(self, categories: list[str]) -> FeedbackReport: + return FeedbackReport( + summary=FeedbackSummary(issues=[], strengths=[]), + brief_alignment=BriefAlignment( + intent="Matches intent", + outline_covered=[], + outline_missing=[], + keywords_covered=[], + keywords_missing=[], + faq_covered=[], + faq_missing=[], + ), + section_notes=[], + evidence_gaps=[], + findings=[ + FeedbackFinding( + severity="medium", + category=cast(Any, cat), + message=f"Issue in {cat}", + ) + for cat in categories + ], + checklist=[], + ) + + def test_no_focus_returns_unchanged(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "structure", "clarity"]) + result = _normalize_finding_categories(report, focus=None) + assert result is report + + def test_all_in_scope_returns_unchanged(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "clarity", "other"]) + result = _normalize_finding_categories(report, focus=["seo", "clarity"]) + assert [f.category for f in result.findings] == ["seo", "clarity", "other"] + + def test_out_of_scope_remapped_to_other(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["seo", "structure", "evidence"]) + result = _normalize_finding_categories(report, focus=["seo", "clarity"]) + assert [f.category for f in result.findings] == ["seo", "other", "other"] + + def test_preserves_other_fields(self) -> None: + from scribae.feedback import _normalize_finding_categories + + report = self._make_report(["structure"]) + result = _normalize_finding_categories(report, focus=["seo"]) + assert result.findings[0].severity == "medium" + assert result.findings[0].message == "Issue in structure" diff --git a/tests/unit/language_test.py b/tests/unit/language_test.py index 41292a3..2f0f759 100644 --- a/tests/unit/language_test.py +++ b/tests/unit/language_test.py @@ -14,16 +14,12 @@ def test_resolve_language_prefers_flag_and_project_over_note() -> None: metadata = {"lang": "fr"} - resolved = resolve_output_language( - flag_language="de", project_language="es", metadata=metadata, text="bonjour" - ) + resolved = resolve_output_language(flag_language="de", project_language="es", metadata=metadata, text="bonjour") assert resolved.language == "de" assert resolved.source == "flag" - project_first = resolve_output_language( - flag_language=None, project_language="es", metadata=metadata, text="hola" - ) + project_first = resolve_output_language(flag_language=None, project_language="es", metadata=metadata, text="hola") assert project_first.language == "es" assert project_first.source == "project" @@ -32,9 +28,7 @@ def test_resolve_language_prefers_flag_and_project_over_note() -> None: def test_resolve_language_uses_frontmatter_before_detection() -> None: metadata = {"language": "pt-BR"} - resolved = resolve_output_language( - flag_language=None, project_language="", metadata=metadata, text="conteudo" - ) + resolved = resolve_output_language(flag_language=None, project_language="", metadata=metadata, text="conteudo") assert resolved.language == "pt-BR" assert resolved.source == "frontmatter" diff --git a/tests/unit/translate_cli_test.py b/tests/unit/translate_cli_test.py index 890a63e..737b1f4 100644 --- a/tests/unit/translate_cli_test.py +++ b/tests/unit/translate_cli_test.py @@ -95,9 +95,7 @@ def translate(self, text: str, cfg: Any) -> str: def _patch_loader(monkeypatch: pytest.MonkeyPatch, projects_dir: Path) -> None: loader = project_module.load_project - monkeypatch.setattr( - "scribae.translate_cli.load_project", lambda name: loader(name, base_dir=projects_dir) - ) + monkeypatch.setattr("scribae.translate_cli.load_project", lambda name: loader(name, base_dir=projects_dir)) def test_translate_requires_src_without_project( @@ -129,7 +127,7 @@ def test_translate_uses_project_defaults( projects_dir = tmp_path / "projects" projects_dir.mkdir() (projects_dir / "demo.yaml").write_text( - "language: fr\n" "tone: academic\n" "audience: researchers\n", + "language: fr\ntone: academic\naudience: researchers\n", encoding="utf-8", ) _patch_loader(monkeypatch, projects_dir) @@ -164,7 +162,7 @@ def test_translate_flags_override_project_defaults( projects_dir = tmp_path / "projects" projects_dir.mkdir() (projects_dir / "demo.yaml").write_text( - "language: fr\n" "tone: academic\n" "audience: researchers\n", + "language: fr\ntone: academic\naudience: researchers\n", encoding="utf-8", ) _patch_loader(monkeypatch, projects_dir)