diff --git a/README.md b/README.md index 6457f34..71c4f4c 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ AI-powered CLI that renames files based on their content. Scanned documents, downloads, and exported files often arrive with useless names like `scan_001.pdf` or `IMG_5847.jpg`. renamr reads each file — extracting text from PDFs, rendering pages as images for vision models, or encoding photos directly — sends a preview to an LLM, and renames the file to a structured format based on the content it actually finds. ``` -scan_001.pdf -> 240115_ACME_Rechnung.pdf -IMG_5847.jpg -> 241203_DeutschePost_Zustellbenachrichtigung.jpg -invoice_download.pdf -> 250110_Amazon_Bestellbestaetigung.pdf +scan_001.pdf -> 240115_ACME_Invoice.pdf +IMG_5847.jpg -> 241203_PostOffice_DeliveryNotice.jpg +invoice_download.pdf -> 250110_Amazon_OrderConfirmation.pdf ``` Only the filename changes. Files are never modified. @@ -24,7 +24,9 @@ Only the filename changes. Files are never modified. - Content-aware renaming via any LiteLLM-supported provider (OpenAI, OpenRouter, Anthropic, local models) - PDF text extraction for text-based documents - Vision model support for scanned PDFs and image files -- iCloud evicted file handling — triggers download via `brctl` before processing (macOS) +- iCloud evicted file handling — auto-downloads stubs via `brctl` before processing (macOS only) +- Multi-inbox support — configure one or more folders in a single config +- Configurable output language — extracted metadata returned in any language - Dry-run mode to preview renames without touching files - Undo the last run with a single command - Configurable output template (`{date}_{sender}_{subject}`), file extensions, and system prompt @@ -45,19 +47,22 @@ uv tool install renamr ## Quick Start ```bash -# Create config.toml and data/ in the current directory +# One-time global install +uv tool install renamr # or: pip install renamr + +# First-run setup — creates ~/.config/renamr/config.toml renamr init # Set your API key export OPENAI_API_KEY="your-key" -# Preview renames without touching any files +# Preview renames renamr run --dry-run # Rename files renamr run -# Undo the last run +# Undo last run renamr undo ``` @@ -69,10 +74,14 @@ renamr run --inbox ~/Documents/inbox --dry-run ## Configuration -`renamr init` creates a `config.toml` in the current directory. The full set of options: +`renamr init` creates `~/.config/renamr/config.toml` by default. On Linux, `XDG_CONFIG_HOME` +is respected, so the actual path becomes `$XDG_CONFIG_HOME/renamr/config.toml` when set. + +The full set of options: ```toml -inbox_path = "." +inbox_paths = ["/path/to/your/folder"] +language = "en" file_extensions = [".pdf", ".jpg", ".jpeg", ".png", ".txt"] recursive = false filename_template = "{date}_{sender}_{subject}" @@ -95,9 +104,15 @@ level = "INFO" json_logs = false ``` -`filename_template` supports three placeholders: `{date}`, `{sender}`, `{subject}`. The date is extracted from document content when available, falling back to the file's creation timestamp. +`inbox_paths` accepts one or more folders. `renamr run` processes all of them in one pass. +Use `--inbox /some/folder` for a one-off override without editing the config. + +`filename_template` supports three placeholders: `{date}`, `{sender}`, `{subject}`. Changing +the order does not affect metadata extraction — the model still returns the same fields, and +renamr only changes how they are assembled into the final filename. -`data/undo.json` is stored relative to the config file. Always run `renamr run` and `renamr undo` with the same `--config` path, or from the same directory when using the default. +`undo.json` is stored next to the config file. With the default setup, that means +`~/.config/renamr/undo.json`. **Switching providers.** Change `model` and set `api_base`. For OpenRouter: @@ -129,7 +144,7 @@ Then set `OPENROUTER_API_KEY` instead of `OPENAI_API_KEY`. Any provider supporte Additional notes: - Always use an `https://` endpoint for `api_base`. An `http://` URL sends file content unencrypted. -- Keep `data/undo.json` private on shared systems — it contains the file paths from the last run. +- Keep `~/.config/renamr/undo.json` private on shared systems — it contains the file paths from the last run. - Avoid sharing verbose log output publicly; failed auth responses may include API key fragments. ## Maintenance diff --git a/pyproject.toml b/pyproject.toml index 64ace81..b713524 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ dependencies = [ "litellm>=1.82.1", "pillow>=12.1.1", "pydantic>=2.12.5", - "pydantic-settings>=2.13.1", "pymupdf>=1.27.2", "pypdf>=6.8.0", "structlog>=25.5.0", diff --git a/src/renamr/cli.py b/src/renamr/cli.py index aa39c61..64a79f9 100644 --- a/src/renamr/cli.py +++ b/src/renamr/cli.py @@ -2,7 +2,7 @@ from __future__ import annotations -import importlib.resources +import os from pathlib import Path from typing import Annotated @@ -24,6 +24,14 @@ console = Console() +def _config_dir() -> Path: + """Return the default directory for renamr config and runtime files.""" + xdg_config_home = os.environ.get("XDG_CONFIG_HOME") + if xdg_config_home: + return Path(xdg_config_home) / "renamr" + return Path.home() / ".config" / "renamr" + + @app.command() def version() -> None: """Print the installed version.""" @@ -31,23 +39,40 @@ def version() -> None: @app.command() -def init() -> None: +def init( + config: Annotated[Path | None, typer.Option("--config")] = None, +) -> None: """Create a local config file and data directory.""" setup_logging("INFO", False) - config_path = Path("config.toml") + config_path = config or _config_dir() / "config.toml" if config_path.exists(): - typer.echo("config.toml already exists") - else: - example = importlib.resources.files("renamr").joinpath("config.toml.example").read_text() - config_path.write_text(example) - typer.echo("Created config.toml") - Path("data").mkdir(parents=True, exist_ok=True) - typer.echo("Ensured data/ exists") + typer.echo(f"{config_path} already exists. Delete it to reinitialize.") + return + config_path.parent.mkdir(parents=True, exist_ok=True) + + inbox_path = Path(typer.prompt("Inbox folder path")).expanduser().resolve() + language = typer.prompt("Language for extracted metadata", default="en") + model = typer.prompt("LLM model", default="gpt-4o-mini") + + config_path.write_text( + "\n".join( + [ + f'inbox_paths = ["{inbox_path}"]', + f'language = "{language}"', + "", + "[llm]", + f'model = "{model}"', + "", + ] + ) + ) + typer.echo(f"Created {config_path}") + typer.echo(f"Ensured {config_path.parent} exists") @app.command() def run( - config: Annotated[Path, typer.Option("--config")] = Path("config.toml"), + config: Annotated[Path | None, typer.Option("--config")] = None, dry_run: Annotated[bool, typer.Option("--dry-run/--no-dry-run")] = False, compress: Annotated[bool | None, typer.Option("--compress/--no-compress")] = None, inbox: Annotated[Path | None, typer.Option("--inbox")] = None, @@ -55,30 +80,32 @@ def run( verbose: Annotated[bool, typer.Option("--verbose/--no-verbose")] = False, ) -> None: """Scan files, extract metadata, and rename them.""" - if not config.exists(): + config_path = config or _config_dir() / "config.toml" + if not config_path.exists(): typer.secho("Missing config.toml. Run `renamr init` first.", fg=typer.colors.RED) raise typer.Exit(code=1) - app_config = load_config(config) + app_config = load_config(config_path) if inbox is not None: - app_config = app_config.model_copy(update={"inbox_path": str(inbox)}) + app_config = app_config.model_copy(update={"inbox_paths": [str(inbox)]}) if recursive is not None: app_config = app_config.model_copy(update={"recursive": recursive}) if compress is None: compress = app_config.compress.enabled log_level = "DEBUG" if verbose else app_config.logging.level setup_logging(log_level, app_config.logging.json_logs) - data_dir = config.parent / "data" + data_dir = config_path.parent summary = run_pipeline(app_config, dry_run=dry_run, compress=compress, data_dir=data_dir) _print_summary(summary) @app.command() def undo( - config: Annotated[Path, typer.Option("--config")] = Path("config.toml"), + config: Annotated[Path | None, typer.Option("--config")] = None, ) -> None: """Undo the last successful rename run.""" setup_logging("INFO", False) - data_dir = config.parent / "data" + config_path = config or _config_dir() / "config.toml" + data_dir = config_path.parent reversed_pairs = undo_last_run(data_dir) if not reversed_pairs: typer.secho("Nothing to undo.", fg=typer.colors.YELLOW) diff --git a/src/renamr/config.py b/src/renamr/config.py deleted file mode 100644 index 1bf54ed..0000000 --- a/src/renamr/config.py +++ /dev/null @@ -1,16 +0,0 @@ -"""Environment-backed settings for local development.""" - -from pydantic_settings import BaseSettings, SettingsConfigDict - - -class EnvironmentSettings(BaseSettings): - """Secrets loaded from the environment or a local .env file.""" - - openai_api_key: str | None = None - openrouter_api_key: str | None = None - - model_config = SettingsConfigDict( - env_file=".env", - env_file_encoding="utf-8", - extra="ignore", - ) diff --git a/src/renamr/config.toml.example b/src/renamr/config.toml.example index 6cf416d..8be7952 100644 --- a/src/renamr/config.toml.example +++ b/src/renamr/config.toml.example @@ -1,18 +1,11 @@ -inbox_path = "." +inbox_paths = ["/path/to/your/folder"] +# inbox_paths = ["/folder/one", "/folder/two"] # multiple inboxes supported +language = "en" # language for extracted metadata: "en", "de", or any locale name file_extensions = [".pdf", ".jpg", ".jpeg", ".png", ".txt"] recursive = false filename_template = "{date}_{sender}_{subject}" # Full default prompt lives in src/renamr/models.py as DEFAULT_RENAME_PROMPT. # Copy it here and customize if you want provider-specific behavior. -rename_prompt = """ ---- -language: en -output_format: json_only ---- - -# Purpose -Extract sender, subject, document date, and filename format from a document for file renaming. -""" [llm] model = "gpt-4o-mini" diff --git a/src/renamr/metadata.py b/src/renamr/metadata.py index c379dcd..19f72a2 100644 --- a/src/renamr/metadata.py +++ b/src/renamr/metadata.py @@ -48,8 +48,12 @@ def extract_metadata( filename_format="date_subject", ) prompt = _build_user_prompt(filename, created_at, preview_text) + system_content = ( + f"Language for all extracted metadata values: {config.language}\n\n" + f"{config.rename_prompt}" + ) messages = [ - {"role": "system", "content": config.rename_prompt}, + {"role": "system", "content": system_content}, {"role": "user", "content": _build_user_content(prompt, image_base64)}, ] for attempt in range(config.llm.max_retries + 1): @@ -140,7 +144,10 @@ def _parse_date_string(value: str) -> date | None: if not match: continue if fmt is not None: - return datetime.strptime(match.group(0), fmt).date() + try: + return datetime.strptime(match.group(0), fmt).date() + except ValueError: + continue parsed = _parse_ambiguous_date(match.group(1), match.group(2), match.group(3)) if parsed is not None: return parsed @@ -200,18 +207,25 @@ def _normalize_umlauts(value: str) -> str: _MONTH_MAP = { "januar": 1, + "january": 1, "jan": 1, "februar": 2, + "february": 2, "feb": 2, "maerz": 3, "marz": 3, + "march": 3, "mrz": 3, + "mar": 3, "april": 4, "apr": 4, "mai": 5, + "may": 5, "juni": 6, + "june": 6, "jun": 6, "juli": 7, + "july": 7, "jul": 7, "august": 8, "aug": 8, @@ -219,9 +233,13 @@ def _normalize_umlauts(value: str) -> str: "sep": 9, "sept": 9, "oktober": 10, + "october": 10, "okt": 10, + "oct": 10, "november": 11, "nov": 11, + "december": 12, "dezember": 12, + "dec": 12, "dez": 12, } diff --git a/src/renamr/models.py b/src/renamr/models.py index ff09c14..0a6de49 100644 --- a/src/renamr/models.py +++ b/src/renamr/models.py @@ -118,11 +118,12 @@ class CompressConfig(BaseModel): class AppConfig(BaseModel): """Top-level application configuration.""" - inbox_path: str = Field(default=".") + inbox_paths: list[str] = Field(default_factory=lambda: ["."]) file_extensions: list[str] = Field( default_factory=lambda: [".pdf", ".jpg", ".jpeg", ".png", ".txt"] ) recursive: bool = Field(default=False) + language: str = Field(default="en") filename_template: str = Field(default="{date}_{sender}_{subject}") rename_prompt: str = Field(default=DEFAULT_RENAME_PROMPT) llm: LLMConfig = Field(default_factory=LLMConfig) diff --git a/src/renamr/renamer.py b/src/renamr/renamer.py index 9c825d8..c756800 100644 --- a/src/renamr/renamer.py +++ b/src/renamr/renamer.py @@ -77,7 +77,8 @@ def scan_files(inbox: Path, extensions: list[str], recursive: bool) -> list[Path def process_file(filepath: Path, config: AppConfig, dry_run: bool) -> RenameResult: """Extract metadata, build a filename, and optionally rename the file.""" try: - created_at = datetime.fromtimestamp(filepath.stat().st_ctime, tz=UTC) + stat = filepath.stat() + created_at = datetime.fromtimestamp(getattr(stat, "st_birthtime", stat.st_mtime), tz=UTC) preview_text = extract_text_preview(filepath) image_base64 = _get_image_payload(filepath, preview_text) metadata = extract_metadata( @@ -105,12 +106,16 @@ def process_file(filepath: Path, config: AppConfig, dry_run: bool) -> RenameResu def run(config: AppConfig, dry_run: bool, compress: bool, data_dir: Path) -> RunSummary: """Run the rename pipeline over configured files.""" - inbox = Path(config.inbox_path) - if not inbox.exists(): - raise FileNotFoundError(f"Inbox path does not exist: {inbox}") - results = [_download_stub(stub) for stub in _scan_icloud_stubs(inbox, config.recursive)] - filepaths = scan_files(inbox, config.file_extensions, config.recursive) - results.extend(process_file(path, config, dry_run) for path in filepaths) + inboxes = [Path(path_str) for path_str in config.inbox_paths] + for inbox in inboxes: + if not inbox.exists(): + raise FileNotFoundError(f"Inbox path does not exist: {inbox}") + + results: list[RenameResult] = [] + for inbox in inboxes: + results.extend(_download_stub(stub) for stub in _scan_icloud_stubs(inbox, config.recursive)) + filepaths = scan_files(inbox, config.file_extensions, config.recursive) + results.extend(process_file(path, config, dry_run) for path in filepaths) if compress and not dry_run: _compress_renamed_pdfs(results, config) if not dry_run: diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 186d261..a1fd830 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -11,6 +11,7 @@ from typer.main import get_command from typer.testing import CliRunner +import renamr.cli as cli_module import renamr.metadata as metadata_module from renamr.cli import app @@ -25,13 +26,45 @@ def test_version_command_outputs_version() -> None: def test_init_creates_config_from_package_data(tmp_path: Path, monkeypatch) -> None: - monkeypatch.chdir(tmp_path) + inbox = tmp_path / "inbox" + inbox.mkdir() + config_dir = tmp_path / "config-home" + monkeypatch.setattr(cli_module, "_config_dir", lambda: config_dir) - result = runner.invoke(app, ["init"]) + result = runner.invoke(app, ["init"], input=f"{inbox}\nen\ngpt-4o-mini\n") + + assert result.exit_code == 0 + config_path = config_dir / "config.toml" + assert config_path.exists() + content = config_path.read_text() + assert f'inbox_paths = ["{inbox.resolve()}"]' in content + assert 'language = "en"' in content + assert result.stdout.count(str(config_path)) == 1 + + +def test_init_does_not_call_mkdir_when_config_exists(tmp_path: Path, monkeypatch) -> None: + config_path = tmp_path / "existing-config" / "config.toml" + config_path.parent.mkdir(parents=True) + config_path.write_text('language = "en"\n') + mkdir_calls: list[Path] = [] + original_mkdir = Path.mkdir + + def tracking_mkdir( + self: Path, + mode: int = 0o777, + parents: bool = False, + exist_ok: bool = False, + ) -> None: + mkdir_calls.append(self) + original_mkdir(self, mode=mode, parents=parents, exist_ok=exist_ok) + + monkeypatch.setattr(Path, "mkdir", tracking_mkdir) + + result = runner.invoke(app, ["init", "--config", str(config_path)]) assert result.exit_code == 0 - assert (tmp_path / "config.toml").exists() - assert (tmp_path / "data").is_dir() + assert f"{config_path} already exists. Delete it to reinitialize." in result.stdout + assert mkdir_calls == [] def test_run_dry_run_does_not_rename_files(tmp_path: Path, monkeypatch) -> None: @@ -40,7 +73,7 @@ def test_run_dry_run_does_not_rename_files(tmp_path: Path, monkeypatch) -> None: original_file = inbox / "note.txt" original_file.write_text("Invoice Date 2024-01-31") (tmp_path / "config.toml").write_text( - f'inbox_path = "{inbox}"\nfile_extensions = [".txt"]\n' + f'inbox_paths = ["{inbox}"]\nfile_extensions = [".txt"]\n' ) monkeypatch.setattr( diff --git a/tests/unit/test_metadata.py b/tests/unit/test_metadata.py index 81f7bf2..7b7ec6f 100644 --- a/tests/unit/test_metadata.py +++ b/tests/unit/test_metadata.py @@ -1,8 +1,11 @@ """Tests for metadata parsing.""" -from datetime import date +from datetime import date, datetime +from types import SimpleNamespace -from renamr.metadata import _parse_date_string, _parse_metadata +import renamr.metadata as metadata_module +from renamr.metadata import _parse_date_string, _parse_metadata, extract_metadata +from renamr.models import AppConfig def test_parse_metadata_handles_valid_json_and_fallbacks() -> None: @@ -26,4 +29,41 @@ def test_parse_date_string_supports_expected_formats() -> None: assert _parse_date_string("2024-01-31") == date(2024, 1, 31) assert _parse_date_string("31.01.2024") == date(2024, 1, 31) assert _parse_date_string("31. Maerz 2024") == date(2024, 3, 31) + assert _parse_date_string("20241332") is None assert _parse_date_string("none") is None + + +def test_extract_metadata_prepends_language_instruction(monkeypatch) -> None: + captured: dict[str, object] = {} + + def fake_completion(**kwargs): + captured["messages"] = kwargs["messages"] + return SimpleNamespace( + choices=[ + SimpleNamespace( + message=SimpleNamespace( + content=( + '{"sender":"ACME","subject":"Invoice",' + '"date":"2024-01-31","filename_format":"date_subject"}' + ) + ) + ) + ] + ) + + monkeypatch.setattr(metadata_module, "completion", fake_completion) + + extract_metadata( + filename="note.txt", + created_at=datetime(2024, 1, 31), + preview_text="invoice", + image_base64=None, + config=AppConfig(language="de"), + ) + + messages = captured["messages"] + assert isinstance(messages, list) + assert messages[0]["role"] == "system" + assert messages[0]["content"].startswith( + "Language for all extracted metadata values: de\n\n" + ) diff --git a/tests/unit/test_renamer.py b/tests/unit/test_renamer.py index 053dd7f..1e7bd3d 100644 --- a/tests/unit/test_renamer.py +++ b/tests/unit/test_renamer.py @@ -2,16 +2,60 @@ from __future__ import annotations +from datetime import date from pathlib import Path import pytest +import renamr.renamer as renamer_module +from renamr.metadata import FileMetadata from renamr.models import AppConfig from renamr.renamer import run +def test_run_renames_files_from_multiple_inboxes(tmp_path: Path, monkeypatch) -> None: + inbox_a = tmp_path / "inbox-a" + inbox_b = tmp_path / "inbox-b" + inbox_a.mkdir() + inbox_b.mkdir() + first_file = inbox_a / "first.txt" + second_file = inbox_b / "second.txt" + first_file.write_text("first") + second_file.write_text("second") + + monkeypatch.setattr( + renamer_module, + "extract_metadata", + lambda **_: FileMetadata( + sender="ACME", + subject="Invoice", + document_date=date(2024, 1, 31), + filename_format="date_sender_subject", + ), + ) + + config = AppConfig( + inbox_paths=[str(inbox_a), str(inbox_b)], + file_extensions=[".txt"], + ) + summary = run(config, dry_run=False, compress=False, data_dir=tmp_path / "data") + + assert summary.renamed == 2 + assert (inbox_a / "240131_ACME_Invoice.txt").exists() + assert (inbox_b / "240131_ACME_Invoice.txt").exists() + + def test_run_raises_for_missing_inbox(tmp_path: Path) -> None: - config = AppConfig(inbox_path=str(tmp_path / "missing")) + config = AppConfig(inbox_paths=[str(tmp_path / "missing")]) + + with pytest.raises(FileNotFoundError, match="Inbox path does not exist"): + run(config, dry_run=True, compress=False, data_dir=tmp_path / "data") + + +def test_run_raises_when_any_inbox_is_missing(tmp_path: Path) -> None: + existing = tmp_path / "existing" + existing.mkdir() + config = AppConfig(inbox_paths=[str(existing), str(tmp_path / "missing")]) with pytest.raises(FileNotFoundError, match="Inbox path does not exist"): run(config, dry_run=True, compress=False, data_dir=tmp_path / "data") diff --git a/uv.lock b/uv.lock index 05cf381..f4eca01 100644 --- a/uv.lock +++ b/uv.lock @@ -1456,20 +1456,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, ] -[[package]] -name = "pydantic-settings" -version = "2.13.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dotenv" }, - { name = "typing-inspection" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, -] - [[package]] name = "pygments" version = "2.19.2" @@ -1734,7 +1720,6 @@ dependencies = [ { name = "litellm" }, { name = "pillow" }, { name = "pydantic" }, - { name = "pydantic-settings" }, { name = "pymupdf" }, { name = "pypdf" }, { name = "structlog" }, @@ -1756,7 +1741,6 @@ requires-dist = [ { name = "litellm", specifier = ">=1.82.1" }, { name = "pillow", specifier = ">=12.1.1" }, { name = "pydantic", specifier = ">=2.12.5" }, - { name = "pydantic-settings", specifier = ">=2.13.1" }, { name = "pymupdf", specifier = ">=1.27.2" }, { name = "pypdf", specifier = ">=6.8.0" }, { name = "structlog", specifier = ">=25.5.0" },