diff --git a/pyproject.toml b/pyproject.toml index fb1a3e463..8076edbb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,6 +183,11 @@ source = [ [tool.ruff] line-length = 79 +[tool.ruff.format] +exclude = [ + "src/fava/help/import.md", +] + [tool.ruff.lint] extend-select = [ "ALL", diff --git a/src/fava/application.py b/src/fava/application.py index bb54e30f4..ff6b142d5 100644 --- a/src/fava/application.py +++ b/src/fava/application.py @@ -14,6 +14,7 @@ import logging import mimetypes +import re from datetime import date from datetime import datetime from datetime import timezone @@ -422,6 +423,10 @@ def help_page(page_slug: str) -> str: contents, extras=["fenced-code-blocks", "tables", "header-ids"], ) + # Convert git describe output into something to put in a GitHub URL + # remove dirty suffix + github_version = _get_github_version(fava_version) + github_url = f"https://github.com/beancount/fava/tree/{github_version}/tests/data/import_example_for_docs.py" return render_template( "help.html", page_slug=page_slug, @@ -430,6 +435,7 @@ def help_page(page_slug: str) -> str: html, beancount_version=beancount_version, fava_version=fava_version, + github_url=github_url, ), ), HELP_PAGES=HELP_PAGES, @@ -471,6 +477,31 @@ def _get_locale() -> str | None: Babel(fava_app, locale_selector=_get_locale) # type: ignore[no-untyped-call] +def _get_github_version(version_string: str) -> str: + """Convert a version string into a GitHub-compatible URL segment. + + Args: + version_string: The version string, from setuptools_scm + + Returns: + A string representing the commit hash, tag name, or 'main' as a + fallback. + """ + # Remove the dirty suffix if present + cleaned_version = re.sub(r"\.g[0-9]+$", "", version_string) + cleaned_version = re.sub(r"^v", "", version_string) + + # Extract commit hash or tag name + commit_hash = re.search(r"\+g([0-9a-f]+)", cleaned_version) + tag_name = re.search(r"^[0-9]+(\.[0-9]+)*", cleaned_version) + + if commit_hash: + return commit_hash.group(1) + if tag_name: + return tag_name.group(0) + return "main" # Fallback to main branch + + def create_app( files: Iterable[Path | str], *, diff --git a/src/fava/help/beancount_syntax.md b/src/fava/help/beancount_syntax.md index 670df9734..085e8fa6d 100644 --- a/src/fava/help/beancount_syntax.md +++ b/src/fava/help/beancount_syntax.md @@ -8,14 +8,16 @@ Beancount defines a language in which financial transactions are entered into a text-file, which then can be processed by Beancount. There are a few building blocks that are important to understand Beancount's syntax: -- Commodities, +- Commodities / Currencies, - Accounts, - Directives. -## Commodities +## Commodities / Currencies -All in CAPS: `USD`, `EUR`, `CAD`, `GOOG`, `AAPL`, `RBF1005`, `HOME_MAYST`, -`AIRMILES`, `HOURS`. +Anything that you want to track in some account is called a commodity in +Beancount. We sometimes also use the word "Currency" interchangeably in the +documentation. Write them in ALL-CAPS: `USD`, `EUR`, `CAD`, `GOOG`, `AAPL`, +`RBF1005`, `HOME_MAYST`, `AIRMILES`, `HOURS`. ## Accounts diff --git a/src/fava/help/import.md b/src/fava/help/import.md index 61863f9e4..a5a37b09a 100644 --- a/src/fava/help/import.md +++ b/src/fava/help/import.md @@ -1,27 +1,101 @@ -# Import - -Fava can use Beancount's import system to semi-automatically import entries into -your Beancount ledger. See -[Importing External Data in Beancount](http://furius.ca/beancount/doc/ingest) -for more information on how to write importers. - -You can override the hooks that should be run for your importers by specifying a -variable `HOOKS` with the a list of hooks to apply to the list of -`(filename: str, entries: list[Directive])` tuples. On Beancount version 2 the -duplicates detector hook will be run by default and no hooks will run by default -on Beancount version 3. If you want to use beangulp-style hooks that take list -of -`(filename: str, entries: list[Directive], account: str, importer: Importer)`-tuples, -you can annotate them with the appropriate Python types which Fava will detect -and call with these 4-tuples. - -Set the `import-config` option to point to your import config and set -`import-dirs` to the directories that contain the files that you want to import. -And if you wish to save new entries elsewhere than main file - use -`insert-entry` option. +# Importing entries into Fava + +Fava integrates with Beancount's import system to automatically generate +transaction entries for your Beancount file from account statements. After +setting up your import process as explained below, the *Import* page in Fava +will show an upload button that lets you upload files into your *import folder*. +If you can access the import folder (e.g., if Fava is running on your local +machine), you can also just place your account statement files there directly. + +To import the file contents into your Beancount ledger, you must set up an +*Importer* that can process the file format. Fava lists each file in the import +folder. If any Importer matches the file, a button "Extract" will be visible. +This will import its contents into your ledger. Files without a matching +Importer appear as "Non-importable file". + +You can move any file into your +[documents folder](https://beancount.github.io/docs/beancount_language_syntax.html#documents-from-a-directory) +by clicking the button "Move". If you do not use the document folder feature of +Beancount, you can delete the file from the import folder after the import is +complete by clicking the button with the cross at the top right. + +## Define the import configuration in your Beancount ledger + +Add these lines to your Beancount file: + +
+ +The first line specifies the import folder location. The second line defines the +import configuration - a Python script that handles the format of your account +statements. Fava interprets relative paths relative to your main Beancount +ledger file location. You can also use absolute paths. + +## Write your import configuration + +Your import configuration must be a Python file that defines: + +- `CONFIG`: A list of Importers that process your account statement files. + Create subclasses of `beangulp.importers.Importer` with parsing logic for your + specific account statement formats, then add instances of each class to this + list. For CSV files, subclassing `beangulp.importers.csvbase.Importer` is + recommended. + +- `HOOKS`: A list of functions to apply to all directives (e.g., transactions) + after generation by any Importer. + +For more information on how to write importers, see + +- [Importing External Data in Beancount](http://furius.ca/beancount/doc/ingest) + in the beancount manual. +- The Example input configuration file + +Hook functions are explained in more detail further down on this page. Fava currently only supports entries of type `Transaction`, `Balance`, and `Note`. Set the special metadata key `__source__` to display the corresponding text (CSV-row, XML-fragment, etc.) for the entry in the list of entries to import. Note that this metadata (and all other metadata keys starting with an underscore) will be stripped before saving the entry to file. + +## Hook functions + +Hook functions are applied to all imported transactions. A hook function +receives the parameters `hook_fn(new_entries_list, existing_entries)` and +returns the modified `new_entries_list`. + +The argument `new_entries_list` is itself a list of tuples. As Fava imports each +file individually (in contrast to the CLI of Beangulp), this list will always be +of length 1. + +The tuples can can have two (old style, default) or four elements +(beangulp-style). The type signature of the tuples is either + +- `(filename: str, entries: list[Directive])` or +- `(filename: str, entries: list[Directive], account: str, importer: Importer)`. + +You can annotate the hook function with the appropriate Python types and Fava +will detect and call it with these 4-tuples. Types `Directive` and `Importer` +are `beancount.core.data.Directive` and `beangulp.Importer`, respectively. + +So, in summary, the type signature of a hook function is either: + +``` +hook_fn(new_entries_list: List[str, List[Directive]], + existing_entries: Sequence[Directive]) -> List[str, List[Directive]] +``` + +or + +``` +hook_fn(new_entries_list: List[str, list[Directive], str, Importer], + existing_entries: Sequence[Directive]) -> List[str, List[Directive]] +``` diff --git a/src/fava/help/options.md b/src/fava/help/options.md index c76ed78bc..7123422ee 100644 --- a/src/fava/help/options.md +++ b/src/fava/help/options.md @@ -8,7 +8,9 @@ following to your Beancount file. 2016-04-14 custom "fava-option" "auto-reload" "true" 2016-04-14 custom "fava-option" "currency-column" "100" -Below is a list of all possible options for Fava. +Below is a list of all possible options for Fava. In the Beancount documentation +you can find the +[list of Beancount options](https://beancount.github.io/docs/beancount_options_reference.html) ______________________________________________________________________ diff --git a/stubs/beangulp/__init__.pyi b/stubs/beangulp/__init__.pyi index 639fc09a8..0b9f2ff87 100644 --- a/stubs/beangulp/__init__.pyi +++ b/stubs/beangulp/__init__.pyi @@ -1,6 +1,7 @@ import datetime from abc import ABC from abc import abstractmethod +from collections.abc import Callable from collections.abc import Sequence from fava.beans.abc import Account @@ -24,3 +25,23 @@ class Importer(ABC): def sort( self, entries: list[Directive], reverse: bool = False ) -> None: ... + +class Ingest: + def __init__( + self, + importer: Sequence[Importer], + hooks: Sequence[ + Callable[ + [ + Sequence[ + tuple[str, Sequence[Directive], str, Importer] + ], # new_entries + Sequence[Directive], # existing_entries + ], + Sequence[ + tuple[str, Sequence[Directive], str, Importer] + ], # (return value) + ] + ], + ) -> None: ... + def __call__(self) -> None: ... diff --git a/stubs/beangulp/importers/__init__.pyi b/stubs/beangulp/importers/__init__.pyi new file mode 100644 index 000000000..a87b6224f --- /dev/null +++ b/stubs/beangulp/importers/__init__.pyi @@ -0,0 +1 @@ +# This file is intentionally left empty to make the directory a package diff --git a/stubs/beangulp/importers/csvbase.pyi b/stubs/beangulp/importers/csvbase.pyi new file mode 100644 index 000000000..58f95e5bc --- /dev/null +++ b/stubs/beangulp/importers/csvbase.pyi @@ -0,0 +1,75 @@ +import csv +import datetime +import enum +from collections.abc import Sequence +from typing import Any +from typing import NamedTuple +from typing import TypeAlias + +import beancount.core.amount +import beangulp +from beancount.core import data + +from fava.beans.abc import Directive + +Row: TypeAlias = NamedTuple + +class Order(enum.Enum): + ASCENDING = ... + DESCENDING = ... + +class Column: + name: str + def __init__(self, name: str) -> None: ... + def parse(self, value: str) -> Any: ... + +class Date(Column): + format: str + def __init__(self, name: str, frmt: str) -> None: ... + def parse(self, value: str) -> datetime.date: ... + +class Amount(Column): + subs: dict[str, str] + def __init__( + self, name: str, subs: dict[str, str] | None = None + ) -> None: ... + def parse(self, value: str) -> beancount.core.amount.Amount: ... + +class Columns(Column): + columns: list[str] + sep: str + def __init__(self, *columns: str, sep: str = " ") -> None: ... + def parse(self, value: str) -> str: ... + +class CreditOrDebit(Column): + credit_name: str + debit_name: str + def __init__(self, credit_name: str, debit_name: str) -> None: ... + def parse(self, value: str) -> beancount.core.amount.Amount: ... + +class CSVMeta(type): ... + +class CSVReader: + encoding: str + skiplines: int + names: bool + dialect: str | csv.Dialect + comments: str + order: Order | None + + def read(self, filepath: str) -> list[Row]: ... + +class Importer(beangulp.Importer, CSVReader): + def __init__( + self, account: str, currency: str, flag: str = "*" + ) -> None: ... + def account(self, filepath: str) -> str: ... + def date(self, filepath: str) -> datetime.date: ... + def extract( + self, filepath: str, existing: Sequence[Directive] + ) -> list[Directive]: ... + def finalize( + self, txn: data.Transaction, row: Row + ) -> data.Transaction | None: ... + def identify(self, filepath: str) -> bool: ... + def metadata(self, filepath: str, lineno: int, row: Row) -> data.Meta: ... diff --git a/tests/__snapshots__/test_json_api-test_api_imports.json b/tests/__snapshots__/test_json_api-test_api_imports.json index 376735d4c..1dedcc752 100644 --- a/tests/__snapshots__/test_json_api-test_api_imports.json +++ b/tests/__snapshots__/test_json_api-test_api_imports.json @@ -69,6 +69,16 @@ "importers": [], "name": "TEST_DATA_DIR/import_config.py" }, + { + "basename": "import_example_for_docs.py", + "importers": [], + "name": "TEST_DATA_DIR/import_example_for_docs.py" + }, + { + "basename": "import_for_docs.beancount", + "importers": [], + "name": "TEST_DATA_DIR/import_for_docs.beancount" + }, { "basename": "invalid-unicode.beancount", "importers": [], diff --git a/tests/data/import_example_for_docs.py b/tests/data/import_example_for_docs.py new file mode 100644 index 000000000..9cfddd5c7 --- /dev/null +++ b/tests/data/import_example_for_docs.py @@ -0,0 +1,203 @@ +# ruff: noqa: ERA001, INP001, ARG002 +# mypy: disable-error-code="assignment" +"""An example import configuration with explanations.""" + +from __future__ import annotations + +import csv +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Any +from typing import TYPE_CHECKING +from typing import TypeAlias + +import beangulp # Importing tools +from beancount.core import data # Transaction, Posting, ... +from beangulp.importers import csvbase + +if TYPE_CHECKING: + import beancount + + Importer: TypeAlias = beangulp.Importer + Meta: TypeAlias = beancount.core.data.Meta + Transaction: TypeAlias = beancount.core.data.Transaction + Directive: TypeAlias = beancount.core.data.Directive + # dynamically created NamedTuple, see docs of using functions + Row: TypeAlias = Any + + +class MyCSVImporter(csvbase.Importer): + """Read the csv file called "import.csv", in this directory. + + This is a CSV file formatted in German style to demonstrate some formatting + options: Column separator = ";", decimals are like this: 2.032,43 (dot as + thousands separator, comma as decimal separator. + """ + + # The expected column names and formats in the input file are defined + # as member variables, that are instances of csvbase.Column or subclasses + # + # Required columns + date = csvbase.Date("Buchungsdatum", "%Y-%m-%d") + narration = csvbase.Column("Umsatztext") + # To parse amount, first remove dots, then translate commas to dots, + # to convert 2.032,43 -> 3032.43 + amount = csvbase.Amount("Betrag", subs={r"\.": "", r",": "."}) + + # Optional further columns: + # flag, payee, account, currency, tag, link, balance. + # Providing 'balance' will auto-generate a balance assertion after the + # imported entries. + balance = csvbase.Amount("Saldo", subs={r"\.": "", r",": "."}) + + # Any additional members of type "Column" can be used by your own + # `finalize()` and `metadata()` functions, access e.g. as row.sepa_iban + sepa_iban = csvbase.Column("IBAN") + + # The following variables set the CSV format (see csvbase.CSVReader): + # encoding = "utf8" # File encoding. + # skiplines = 0 # NOTE: Will be renamed to "header" in beangulp 0.3.0 + # names = True # Whether the input contains a row with column names. + # dialect = None # The CSV dialect used in the input file + # # (str or csv.Delimiter object). + # comments = "#" # Comment character. + # order = None # Order of entries in the CSV file + # # (Default: Infer from file) + order = csvbase.Order.DESCENDING + + # Set CSV dialect to use semicolon as separator + dialect = csv.excel + dialect.delimiter = ";" + + def __init__(self) -> None: + super().__init__( + account="Assets:MyBank", # default if no account column is defined + currency="EUR", + flag="*", # optional + ) + + def identify(self, filepath: str) -> bool: + """Return True if this importer is suitable for the given filename. + + This allows to auto-choose the right importer for a file. + + Arguments: + filepath: File path to read. + """ + return filepath.endswith("import.csv") + + # ruff: noqa: SIM115 + def read(self, filepath: str) -> Row: + """Override the read method to preprocess the CSV file. + + Arguments: + filepath: File path to read. + + Returns: + Values from one line of the input. Named tuple with attributess + named like class members of type Column. + """ + # Add some pre-processing of the input file, then + # call the parent read method with the processed file + + # Truncate the last line + try: + tmp = NamedTemporaryFile("w", delete=False) + with tmp: + lines = Path(filepath).read_text().splitlines() + lines = lines[:-1] + tmp.write("\n".join(lines)) + + yield from super().read(tmp.name) + + finally: + Path(tmp.name).unlink() + + def metadata(self, filepath: str, lineno: int, row: Row) -> Meta: + """Set the metadata of imported transactions. + + This is called for each row of the input file. + + Arguments: + filepath: Import file name + lineno: Import file line number + row: Values from one line of the input. Named tuple with + attributes named like class members of type Column. + + Returns: + Object as created by beancount.core.data.new_metadata() + """ + # Example: Add the values from the additional sepa_iban column as + # metadata + return data.new_metadata(filepath, lineno, {"iban": row.sepa_iban}) + # To set posting metadata, use the finalize() function. There you can + # access txn.postings[i].meta as a simple dictionary. + + def finalize(self, txn: Transaction, row: Row) -> Transaction: + """Called for each generated transaction to make user-defined changes. + + Arguments: + txn: beancount.data.core.Transaction. + row: Values from one line of the input. Named tuple with + attributes named like class members of type Column. + + Returns: + beancount.core.data.Transaction object. + """ + # Example: Add a default second transaction leg to Expenses:Unknown + # Documentation of Transaction object (txt): + # https://beancount.github.io/docs/api_reference/beancount.core.html#beancount.core.data.Transaction + txn.postings.append( + data.Posting( + "Expenses:Unknown", + # "and" handles case if .units is None + (txn.postings[0].units and -txn.postings[0].units), + None, + None, + None, + None, + ) + ) + + return txn + + +# All available importers, one for each file format you need to process +CONFIG = [MyCSVImporter()] + + +# Hooks: Process beancount transaction objects after they have been extracted + + +# ruff: noqa: ARG001 +def example_hook( + new_entries: list[tuple[str, list[Directive], str, Importer]], + existing_entries: list[Directive], +) -> list[tuple[str, list[Directive], str, Importer]]: + """Example hook function. + + Arguments: + new_entries: New entries. One tuple per input file. Note that for Fava, + this list will always have only one tuple as the user starts the + import from a single file via the user interface. + existing_entries: List of existing entries, for example to do + deduplication + """ + out = [] + # For each imported file: + for filename, entries, account, importer in new_entries: + # ... Edit entries (note that this is itself a list of Directives!), + # then ... + out.append((filename, entries, account, importer)) + + return out + + +# List all hooks here +HOOKS = [example_hook] + +# Allows to call this script as './import extract