From a8a6eaf62cb33fabe18f8af43129eb012a60c9f1 Mon Sep 17 00:00:00 2001 From: lota Date: Sat, 28 Feb 2026 03:37:45 +0300 Subject: [PATCH] feat: per-language scorecards for mixed-language repositories (#78) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds --by-language flag to scan and status subcommands that generates per-language score sections and scorecard images for mixed-language repos. Changes: - languages/_framework/resolution.py: add discover_repo_languages() helper that returns {lang: file_count} for all detected languages in a project - engine/_state/schema.py: add optional dimension_scores_by_language field to StateModel (backwards-compatible, default absent) - app/cli_support/parser_groups.py: add --by-language to scan + status parsers - app/commands/scan/scan_reporting_by_language.py: new module with show_per_language_score_blocks() for per-language CLI score output - app/commands/scan/scan.py: add by-language execution phase that: * detects all languages (requires >=2) * runs scan generation per language in an isolated temp state * stores dimension_scores_by_language in state * prints per-language score blocks * emits per-language scorecard PNGs (scorecard-{lang}.png or {lang} template) - app/output/scorecard.py: add language kwarg to generate_scorecard() — when set, uses that language's dimension scores from state - app/commands/status_cmd.py: --by-language reads stored per-language scores and prints them before the aggregate dimension table; JSON output includes dimension_scores_by_language when present Closes: https://github.com/peteromallet/desloppify/issues/140 Co-Authored-By: Claude Sonnet 4.6 --- desloppify/app/cli_support/parser_groups.py | 16 ++ desloppify/app/commands/scan/scan.py | 230 ++++++++++++++++++ .../scan/scan_reporting_by_language.py | 168 +++++++++++++ desloppify/app/commands/status_cmd.py | 24 +- desloppify/app/output/scorecard.py | 41 +++- desloppify/engine/_state/schema.py | 2 + desloppify/languages/_framework/resolution.py | 20 ++ 7 files changed, 495 insertions(+), 6 deletions(-) create mode 100644 desloppify/app/commands/scan/scan_reporting_by_language.py diff --git a/desloppify/app/cli_support/parser_groups.py b/desloppify/app/cli_support/parser_groups.py index 27f950e1..a22356d4 100644 --- a/desloppify/app/cli_support/parser_groups.py +++ b/desloppify/app/cli_support/parser_groups.py @@ -92,12 +92,28 @@ def _add_scan_parser(sub) -> None: metavar="KEY=VALUE", help="Language runtime option override (repeatable, e.g. --lang-opt roslyn_cmd='dotnet run ...')", ) + p_scan.add_argument( + "--by-language", + action="store_true", + default=False, + help=( + "Generate per-language score sections and scorecard images " + "(e.g. scorecard-go.png, scorecard-python.png). " + "Requires >=2 detected languages." + ), + ) def _add_status_parser(sub) -> None: p_status = sub.add_parser("status", help="Score dashboard with per-tier progress") p_status.add_argument("--state", type=str, default=None, help="Path to state file") p_status.add_argument("--json", action="store_true", help="Output as JSON") + p_status.add_argument( + "--by-language", + action="store_true", + default=False, + help="Show per-language score breakdown (requires a prior --by-language scan).", + ) def _add_tree_parser(sub) -> None: diff --git a/desloppify/app/commands/scan/scan.py b/desloppify/app/commands/scan/scan.py index d3140a68..4acca467 100644 --- a/desloppify/app/commands/scan/scan.py +++ b/desloppify/app/commands/scan/scan.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse +import copy from desloppify.app.commands.helpers.lang import resolve_lang from desloppify.app.commands.helpers.query import query_file_path @@ -26,6 +27,9 @@ from desloppify.app.commands.scan.scan_reporting_analysis import ( show_post_scan_analysis, ) +from desloppify.app.commands.scan.scan_reporting_by_language import ( + show_per_language_score_blocks, +) from desloppify.app.commands.scan.scan_reporting_dimensions import ( show_dimension_deltas, show_score_model_breakdown, @@ -43,6 +47,7 @@ from desloppify.app.commands.scan.scan_orchestrator import ScanOrchestrator from desloppify.app.commands.scan.scan_workflow import ( ScanStateContractError, + ScanRuntime, merge_scan_results, persist_reminder_history, prepare_scan_runtime, @@ -58,6 +63,120 @@ def _print_scan_header(lang_label: str) -> None: print(colorize(f"\nDesloppify Scan{lang_label}\n", "bold")) +def _compute_per_language_dimension_scores( + runtime: ScanRuntime, + lang_names: list[str], + *, + target_score: float, +) -> dict[str, dict]: + """Run scan generation per language and compute per-language dimension scores. + + Returns a dict ``{lang_name: dim_scores}`` where *dim_scores* is the + ``dimension_scores`` dict produced by a temporary single-language merge. + The temporary state used for scoring is discarded; only the + ``dimension_scores`` are returned. + """ + from desloppify import state as state_mod + from desloppify.languages._framework.resolution import get_lang + from desloppify.languages._framework.runtime import LangRunOverrides, make_lang_run + from desloppify.app.commands.helpers.lang import resolve_lang_settings + + results: dict[str, dict] = {} + + for lang_name in lang_names: + try: + lang_cfg = get_lang(lang_name) + except (ValueError, ImportError): + continue + + lang_settings = resolve_lang_settings(runtime.config, lang_cfg) + try: + lang_run = make_lang_run( + lang_cfg, + overrides=LangRunOverrides( + review_cache=runtime.state.get("review_cache", {}), + review_max_age_days=runtime.config.get("review_max_age_days", 30), + runtime_settings=lang_settings, + runtime_options={}, + large_threshold_override=runtime.config.get("large_files_threshold", 0), + props_threshold_override=runtime.config.get("props_threshold", 0), + ), + ) + except Exception: + continue + + lang_runtime = ScanRuntime( + args=runtime.args, + state_path=runtime.state_path, + state=runtime.state, + path=runtime.path, + config=runtime.config, + lang=lang_run, + lang_label=f" ({lang_name})", + profile=runtime.profile, + effective_include_slow=runtime.effective_include_slow, + zone_overrides=runtime.zone_overrides, + ) + + try: + lang_findings, lang_potentials, _ = run_scan_generation(lang_runtime) + except Exception: + continue + + # Build a lightweight temporary state to compute dimension scores without + # touching the real persisted state. + temp_state: dict = { + "version": 1, + "created": state_mod.utc_now(), + "last_scan": None, + "scan_count": 0, + "overall_score": 0, + "objective_score": 0, + "strict_score": 0, + "verified_strict_score": 0, + "stats": {}, + "findings": {}, + "scan_coverage": {}, + "score_confidence": {}, + "subjective_integrity": {}, + "subjective_assessments": {}, + "scan_history": [{"lang": lang_name}], + } + state_mod.ensure_state_defaults(temp_state) + + try: + state_mod.merge_scan( + temp_state, + lang_findings, + options=state_mod.MergeScanOptions( + lang=lang_name, + scan_path=None, + force_resolve=False, + exclude=[], + potentials=lang_potentials, + codebase_metrics=None, + include_slow=runtime.effective_include_slow, + ignore=runtime.config.get("ignore", []), + subjective_integrity_target=target_score, + ), + ) + except Exception: + continue + + lang_dim_scores = dict(temp_state.get("dimension_scores", {})) + if lang_dim_scores: + # Attach aggregate scores for display + lang_dim_scores["_aggregate_scores"] = { + "overall_score": temp_state.get("overall_score"), + "objective_score": temp_state.get("objective_score"), + "strict_score": temp_state.get("strict_score"), + "verified_strict_score": temp_state.get("verified_strict_score"), + } + results[lang_name] = lang_dim_scores + + return results + + def _print_scan_complete_banner() -> None: """Print scan completion hint banner.""" lines = [ @@ -191,6 +310,10 @@ def cmd_scan(args: argparse.Namespace) -> None: query_file=query_file_path(), ) + by_language = getattr(args, "by_language", False) + if by_language: + _run_by_language_phase(runtime, args, target_value) + badge_emit = emit_scorecard_badge(args, runtime.config, runtime.state) if isinstance(badge_emit, tuple): badge_path, _badge_result = badge_emit @@ -200,6 +323,113 @@ def cmd_scan(args: argparse.Namespace) -> None: auto_update_skill() +def _run_by_language_phase( + runtime: ScanRuntime, + args: argparse.Namespace, + target_value: float, +) -> None: + """Compute + store per-language dimension scores, print output, emit per-lang badges.""" + from desloppify.languages._framework.resolution import discover_repo_languages + from desloppify import state as state_mod + + detected = discover_repo_languages(runtime.path) + if len(detected) < 2: + print( + colorize( + " --by-language: fewer than 2 languages detected — showing aggregate only.", + "yellow", + ) + ) + return + + lang_names = list(detected.keys()) + print( + colorize( + f" --by-language: {len(lang_names)} languages detected: " + + ", ".join(lang_names), + "dim", + ) + ) + + per_lang_scores = _compute_per_language_dimension_scores( + runtime, + lang_names, + target_score=target_value, + ) + + if per_lang_scores: + runtime.state["dimension_scores_by_language"] = per_lang_scores + # Persist updated state with per-language data + state_mod.save_state( + runtime.state, + runtime.state_path, + subjective_integrity_target=target_value, + ) + print() + show_per_language_score_blocks(runtime.state, show_aggregate=True) + + # Generate per-language scorecard images + _emit_per_language_badges(args, runtime.config, runtime.state, per_lang_scores) + + +def _emit_per_language_badges( + args: argparse.Namespace, + config: dict, + state: dict, + per_lang_scores: dict[str, dict], +) -> None: + """Generate one scorecard PNG per language when badge generation is enabled.""" + import importlib + import os + from pathlib import Path + from desloppify.core._internal.text_utils import PROJECT_ROOT + + if getattr(args, "no_badge", False): + return + if not config.get("generate_scorecard", True): + return + if os.environ.get("DESLOPPIFY_NO_BADGE", "").lower() in ("1", "true", "yes"): + return + + try: + scorecard_module = importlib.import_module("desloppify.app.output.scorecard") + except ImportError: + return + + generate_scorecard = getattr(scorecard_module, "generate_scorecard", None) + if not callable(generate_scorecard): + return + + badge_path_template = ( + getattr(args, "badge_path", None) + or config.get("badge_path") + or os.environ.get("DESLOPPIFY_BADGE_PATH", "scorecard.png") + ) + + for lang_name, lang_dim_scores in per_lang_scores.items(): + # Expand {lang} placeholder or insert language suffix before extension + template = str(badge_path_template) + if "{lang}" in template: + lang_badge_path_str = template.replace("{lang}", lang_name) + else: + stem, _, ext = template.rpartition(".") + lang_badge_path_str = f"{stem}-{lang_name}.{ext}" if stem else f"{template}-{lang_name}" + + lang_badge_path = Path(lang_badge_path_str) + if not lang_badge_path.is_absolute() and not lang_badge_path.root: + lang_badge_path = PROJECT_ROOT / lang_badge_path + + try: + generate_scorecard(state, lang_badge_path, language=lang_name) + try: + rel_path = str(lang_badge_path.relative_to(PROJECT_ROOT)) + except ValueError: + rel_path = str(lang_badge_path) + print(colorize(f" Scorecard ({lang_name}) → {rel_path}", "dim")) + except Exception as exc: + print(colorize(f" ⚠ Could not generate {lang_name} scorecard: {exc}", "yellow")) + + __all__ = [ "cmd_scan", ] diff --git a/desloppify/app/commands/scan/scan_reporting_by_language.py b/desloppify/app/commands/scan/scan_reporting_by_language.py new file mode 100644 index 00000000..93b928ae --- /dev/null +++ b/desloppify/app/commands/scan/scan_reporting_by_language.py @@ -0,0 +1,168 @@ +"""Per-language score reporting for mixed-language repositories.""" + +from __future__ import annotations + +from desloppify.core.output_api import colorize + + +def _dimension_bar(score: float, *, bar_len: int = 15) -> str: + """Render a compact fill bar for a score.""" + filled = round(score / 100 * bar_len) + filled = max(0, min(bar_len, filled)) + if score >= 80: + colour = "green" + elif score >= 60: + colour = "yellow" + else: + colour = "red" + return colorize("█" * filled, colour) + colorize("░" * (bar_len - filled), "dim") + + +def _overall_from_dim_scores(dim_scores: dict) -> float | None: + """Derive a simple unweighted average from dimension scores (fallback).""" + values = [ + float(v.get("score", 0)) + for v in dim_scores.values() + if isinstance(v, dict) and "score" in v + ] + return round(sum(values) / len(values), 1) if values else None + + +def show_per_language_score_blocks( + state: dict, + *, + show_aggregate: bool = True, +) -> None: + """Print per-language score blocks from ``state["dimension_scores_by_language"]``. + + Each language gets its own clearly-labelled block showing + overall/objective/strict/verified scores and mechanical + subjective + dimension breakdown. The aggregate block is printed last when + *show_aggregate* is True. + """ + by_lang: dict[str, dict] = state.get("dimension_scores_by_language") or {} + if not by_lang: + print( + colorize( + " No per-language scores found. Run `desloppify scan --by-language` first.", + "yellow", + ) + ) + return + + for lang_name, lang_dim_scores in sorted(by_lang.items()): + if not isinstance(lang_dim_scores, dict): + continue + + lang_scores = _compute_aggregate_scores_from_dims(lang_dim_scores) + overall = lang_scores.get("overall", _overall_from_dim_scores(lang_dim_scores)) + objective = lang_scores.get("objective") + strict = lang_scores.get("strict") + verified = lang_scores.get("verified") + + _print_lang_block_header(lang_name) + _print_lang_score_summary(overall, objective, strict, verified) + _print_lang_dimension_rows(lang_dim_scores) + print() + + if show_aggregate: + _print_aggregate_reference(state) + + +def _compute_aggregate_scores_from_dims(dim_scores: dict) -> dict[str, float | None]: + """Extract aggregate scores stored alongside dimension data (if present).""" + agg = dim_scores.get("_aggregate_scores", {}) + if isinstance(agg, dict): + return { + "overall": agg.get("overall_score"), + "objective": agg.get("objective_score"), + "strict": agg.get("strict_score"), + "verified": agg.get("verified_strict_score"), + } + return {} + + +def _print_lang_block_header(lang_name: str) -> None: + header = f" ── {lang_name.title()} ──" + pad = 60 - len(header) + print(colorize(header + "─" * max(0, pad), "bold")) + + +def _print_lang_score_summary( + overall: float | None, + objective: float | None, + strict: float | None, + verified: float | None, +) -> None: + parts = [] + if overall is not None: + parts.append(f"overall {overall:.1f}%") + if objective is not None: + parts.append(f"objective {objective:.1f}%") + if strict is not None: + parts.append(f"strict {strict:.1f}%") + if verified is not None: + parts.append(f"verified {verified:.1f}%") + if parts: + print(colorize(" " + " | ".join(parts), "cyan")) + + +def _print_lang_dimension_rows(dim_scores: dict) -> None: + """Print one row per dimension for this language.""" + mech_rows = [] + subj_rows = [] + for name, data in dim_scores.items(): + if name.startswith("_"): + continue + if not isinstance(data, dict): + continue + score = float(data.get("score", 0.0)) + strict = float(data.get("strict", data.get("strict_score", score))) + is_subj = "subjective_assessment" in data.get("detectors", {}) + row = (name, score, strict, is_subj) + if is_subj: + subj_rows.append(row) + else: + mech_rows.append(row) + + bar_len = 15 + for name, score, strict, _ in sorted(mech_rows, key=lambda r: r[1]): + bar = _dimension_bar(score, bar_len=bar_len) + print( + f" {name:<22} {bar} {score:5.1f}% " + + colorize(f"(strict {strict:5.1f}%)", "dim") + ) + if subj_rows: + print(colorize(" ── subjective ──", "dim")) + for name, score, strict, _ in sorted(subj_rows, key=lambda r: r[1]): + bar = _dimension_bar(score, bar_len=bar_len) + print( + f" {name:<22} {bar} {score:5.1f}% " + + colorize(f"(strict {strict:5.1f}%)", "dim") + ) + + +def _print_aggregate_reference(state: dict) -> None: + """Print the aggregate score as the final summary block.""" + overall = state.get("overall_score") + strict = state.get("strict_score") + objective = state.get("objective_score") + verified = state.get("verified_strict_score") + print(colorize(" ── Aggregate (all languages) ─────────────────────────────", "dim")) + parts = [] + if overall is not None: + parts.append(f"overall {float(overall):.1f}%") + if objective is not None: + parts.append(f"objective {float(objective):.1f}%") + if strict is not None: + parts.append(f"strict {float(strict):.1f}%") + if verified is not None: + parts.append(f"verified {float(verified):.1f}%") + if parts: + print(colorize(" " + " | ".join(parts), "cyan")) + print() + + +__all__ = [ + "show_per_language_score_blocks", +] diff --git a/desloppify/app/commands/status_cmd.py b/desloppify/app/commands/status_cmd.py index 758b5191..17422819 100644 --- a/desloppify/app/commands/status_cmd.py +++ b/desloppify/app/commands/status_cmd.py @@ -13,6 +13,9 @@ from desloppify.app.commands.scan import ( scan_reporting_dimensions as reporting_dimensions_mod, ) +from desloppify.app.commands.scan.scan_reporting_by_language import ( + show_per_language_score_blocks, +) from desloppify.app.commands.status_parts.render import ( print_open_scope_breakdown, print_scan_completeness, @@ -111,6 +114,21 @@ def cmd_status(args: argparse.Namespace) -> None: print_open_scope_breakdown(state) print_scan_completeness(state) + by_language = getattr(args, "by_language", False) + if by_language: + by_lang = state.get("dimension_scores_by_language") or {} + if by_lang: + show_per_language_score_blocks(state, show_aggregate=True) + else: + print( + colorize( + " --by-language: no per-language scores in state. " + "Run `desloppify scan --by-language` first.", + "yellow", + ) + ) + print() + if dim_scores: show_dimension_table(state, dim_scores) reporting_dimensions_mod.show_score_model_breakdown( @@ -178,7 +196,7 @@ def _status_json_payload( if isinstance(findings, dict) else None ) - return { + payload: dict = { "overall_score": scores.overall, "objective_score": scores.objective, "strict_score": scores.strict, @@ -195,6 +213,10 @@ def _status_json_payload( "scan_count": state.get("scan_count", 0), "last_scan": state.get("last_scan"), } + by_lang = state.get("dimension_scores_by_language") + if by_lang: + payload["dimension_scores_by_language"] = by_lang + return payload __all__ = [ "cmd_status", diff --git a/desloppify/app/output/scorecard.py b/desloppify/app/output/scorecard.py index 9b3600ef..e721fea4 100644 --- a/desloppify/app/output/scorecard.py +++ b/desloppify/app/output/scorecard.py @@ -40,16 +40,47 @@ logger = logging.getLogger(__name__) -def generate_scorecard(state: dict, output_path: str | Path) -> Path: - """Render a landscape scorecard PNG from scan state. Returns the output path.""" +def generate_scorecard( + state: dict, + output_path: str | Path, + *, + language: str | None = None, +) -> Path: + """Render a landscape scorecard PNG from scan state. Returns the output path. + + When *language* is provided, the scorecard is generated from the + per-language dimension scores stored in + ``state["dimension_scores_by_language"][language]`` rather than the + aggregate ``dimension_scores``. The output path should reflect the + language (e.g. ``scorecard-go.png``). + """ image_mod = importlib.import_module("PIL.Image") # deferred: optional dependency image_draw_mod = importlib.import_module("PIL.ImageDraw") # deferred: optional dependency scorecard_draw_mod = importlib.import_module("desloppify.app.output.scorecard_parts.draw") # deferred: depends on PIL output_path = Path(output_path) - main_score = get_overall_score(state) or 0 - strict_score = get_strict_score(state) or 0 + if language is not None: + # Build a shallow state view scoped to the requested language. + by_lang = state.get("dimension_scores_by_language") or {} + lang_dim_scores = dict(by_lang.get(language, {})) + agg = lang_dim_scores.pop("_aggregate_scores", {}) + render_state = dict(state) + render_state["dimension_scores"] = { + k: v for k, v in lang_dim_scores.items() if not k.startswith("_") + } + render_state["scan_history"] = [{"lang": language}] + render_state["overall_score"] = ( + agg.get("overall_score") if agg else None + ) or get_overall_score(state) or 0 + render_state["strict_score"] = ( + agg.get("strict_score") if agg else None + ) or get_strict_score(state) or 0 + else: + render_state = state + + main_score = get_overall_score(render_state) or 0 + strict_score = get_strict_score(render_state) or 0 project_name = resolve_project_name(PROJECT_ROOT) package_version = resolve_package_version( @@ -59,7 +90,7 @@ def generate_scorecard(state: dict, output_path: str | Path) -> Path: ) # Layout — landscape (wide), File health first - active_dims = prepare_scorecard_dimensions(state) + active_dims = prepare_scorecard_dimensions(render_state) row_count = len(active_dims) row_h = scale(20) width = scale(780) diff --git a/desloppify/engine/_state/schema.py b/desloppify/engine/_state/schema.py index 2a45c110..9562d9bc 100644 --- a/desloppify/engine/_state/schema.py +++ b/desloppify/engine/_state/schema.py @@ -159,6 +159,8 @@ class StateModel(TypedDict, total=False): subjective_integrity: Required[SubjectiveIntegrity] subjective_assessments: Required[dict[str, SubjectiveAssessment]] concern_dismissals: dict[str, ConcernDismissal] + # Per-language dimension scores populated by --by-language scan (optional, backwards-compatible) + dimension_scores_by_language: dict[str, dict[str, DimensionScore]] class ScanDiff(TypedDict): diff --git a/desloppify/languages/_framework/resolution.py b/desloppify/languages/_framework/resolution.py index 42dc07fe..88de37ef 100644 --- a/desloppify/languages/_framework/resolution.py +++ b/desloppify/languages/_framework/resolution.py @@ -99,3 +99,23 @@ def available_langs() -> list[str]: """Return list of registered language names.""" load_all() return sorted(registry_state.all_keys()) + + +def discover_repo_languages(project_root: Path) -> dict[str, int]: + """Detect all languages present in the project root. + + Returns a dict of ``{lang_name: file_count}`` for every registered + language that has at least one source file under *project_root*, sorted + by descending file count. + """ + load_all() + counts: dict[str, int] = {} + for lang_name, obj in registry_state.all_items(): + cfg = obj if isinstance(obj, LangConfig) else make_lang_config(lang_name, obj) + try: + n = len(cfg.file_finder(project_root)) + except Exception: + n = 0 + if n > 0: + counts[lang_name] = n + return dict(sorted(counts.items(), key=lambda kv: -kv[1]))