diff --git a/docs/source/workbook/_downloads/gnucash_demo/gnucash_demo_daily_totals.csv b/docs/source/workbook/_downloads/gnucash_demo/gnucash_demo_daily_totals.csv index f747370..9087b85 100644 --- a/docs/source/workbook/_downloads/gnucash_demo/gnucash_demo_daily_totals.csv +++ b/docs/source/workbook/_downloads/gnucash_demo/gnucash_demo_daily_totals.csv @@ -1,8 +1,8 @@ -date,revenue_proxy,expenses_proxy -2026-01-03,250.0,0.0 -2026-01-05,300.0,20.0 -2026-01-07,180.0,0.0 -2026-01-09,100.0,0.0 -2026-01-02,0.0,120.0 -2026-01-04,0.0,800.0 -2026-01-06,0.0,50.0 +date,revenue_proxy,expenses_proxy,net_proxy +2026-01-02,0.00,120.00,-120.00 +2026-01-03,250.00,0.00,250.00 +2026-01-04,0.00,800.00,-800.00 +2026-01-05,300.00,20.00,280.00 +2026-01-06,0.00,50.00,-50.00 +2026-01-07,180.00,0.00,180.00 +2026-01-09,100.00,0.00,100.00 diff --git a/docs/source/workbook/track_d_byod.rst b/docs/source/workbook/track_d_byod.rst index b97e197..2b1fd85 100644 --- a/docs/source/workbook/track_d_byod.rst +++ b/docs/source/workbook/track_d_byod.rst @@ -52,6 +52,34 @@ Validate the normalized tables: pystatsv1 trackd validate --datadir byod/my_project/normalized --profile core_gl +PyPI-only setup (no Git required) +---------------------------------------------- + +If you just want to *use* Track D tools (you don’t need to clone the repo): + +1. Create a virtual environment. +2. Install PyStatsV1 from PyPI. +3. Use the CLI + the workbook downloads in this section. + +.. code-block:: console + + python -m venv .venv + + # Windows (Git Bash) + source .venv/Scripts/activate + + # macOS/Linux + # source .venv/bin/activate + + python -m pip install -U pip + pip install "pystatsv1[workbook]" + +(Optional) sanity check: + +.. code-block:: console + + pystatsv1 doctor + Next: choose a tutorial ----------------------- diff --git a/docs/source/workbook/track_d_byod_gnucash.rst b/docs/source/workbook/track_d_byod_gnucash.rst index 222f7eb..f7e7243 100644 --- a/docs/source/workbook/track_d_byod_gnucash.rst +++ b/docs/source/workbook/track_d_byod_gnucash.rst @@ -83,7 +83,7 @@ Export a CSV that includes your accounts and the date range that covers your dem Step 4 — Initialize a Track D BYOD project ------------------------------------------ -From your PyStatsV1 repo root (or any folder you like): +From any folder you like (your BYOD projects can live anywhere): .. code-block:: console @@ -123,6 +123,12 @@ The adapter reads this file and writes the canonical tables to ``normalized/``. If you want to test without GnuCash, copy the demo export instead: +If you installed PyStatsV1 from PyPI (no repo clone), download **"Demo export (complex/multi-line)"** above +and copy that file to ``byod/gnucash_demo/tables/gl_journal.csv``. + +If you have the repo source code, you can copy the demo export from this docs folder: + + .. code-block:: console # (Windows PowerShell) diff --git a/docs/source/workbook/track_d_byod_gnucash_demo_analysis.rst b/docs/source/workbook/track_d_byod_gnucash_demo_analysis.rst index 1e729d0..cd13cb7 100644 --- a/docs/source/workbook/track_d_byod_gnucash_demo_analysis.rst +++ b/docs/source/workbook/track_d_byod_gnucash_demo_analysis.rst @@ -13,8 +13,9 @@ What we’ll build A small daily table: - ``date`` -- ``revenue_proxy`` (credits to Income accounts) -- ``expenses_proxy`` (debits to Expense accounts) +- ``revenue_proxy`` (net credits to Revenue accounts) +- ``expenses_proxy`` (net debits to Expense accounts) +- ``net_proxy`` (revenue_proxy − expenses_proxy) Download (prebuilt) ------------------- @@ -31,62 +32,42 @@ Assume your BYOD project is at ``byod/gnucash_demo`` and you have: - ``byod/gnucash_demo/normalized/gl_journal.csv`` - ``byod/gnucash_demo/normalized/chart_of_accounts.csv`` -Create a tiny script (for example ``scripts/gnucash_daily_totals.py``) with: +Run the built-in helper: -.. code-block:: python - - from pathlib import Path - import pandas as pd - - root = Path("byod/gnucash_demo") - - gl = pd.read_csv(root / "normalized" / "gl_journal.csv") - coa = pd.read_csv(root / "normalized" / "chart_of_accounts.csv") - - gl["date"] = pd.to_datetime(gl["date"], errors="coerce") - - gl["debit"] = pd.to_numeric(gl["debit"], errors="coerce").fillna(0.0) - gl["credit"] = pd.to_numeric(gl["credit"], errors="coerce").fillna(0.0) +.. code-block:: console - gl = gl.merge(coa[["account_id", "account_type"]], on="account_id", how="left") + pystatsv1 trackd byod daily-totals --project byod/gnucash_demo - revenue = ( - gl.query("account_type == 'Income'") - .groupby(gl["date"].dt.date)["credit"] - .sum() - .rename("revenue_proxy") - ) +This writes: - expenses = ( - gl.query("account_type == 'Expenses'") - .groupby(gl["date"].dt.date)["debit"] - .sum() - .rename("expenses_proxy") - ) +- ``byod/gnucash_demo/normalized/daily_totals.csv`` - daily = ( - pd.concat([revenue, expenses], axis=1) - .fillna(0.0) - .reset_index() - .rename(columns={"index": "date"}) - ) +Option B — Quick first analysis (no repo clone required) +-------------------------------------------------------- - out = root / "normalized" / "daily_totals.csv" - daily.to_csv(out, index=False) - print("Wrote:", out) - print(daily) +Once you have ``normalized/daily_totals.csv``, you can do a quick first pass with pandas. +This snippet prints a few summary stats and writes a simple plot: -Option B — Run the existing “My Own Data” explore scaffold ----------------------------------------------------------- +.. code-block:: console -PyStatsV1 includes a beginner-friendly scaffold script: + python - <<'PY' + import pandas as pd + import matplotlib.pyplot as plt + from pathlib import Path -``scripts/my_data_01_explore.py`` + csv_path = Path("byod/gnucash_demo/normalized/daily_totals.csv") + outdir = Path("outputs/gnucash_demo") + outdir.mkdir(parents=True, exist_ok=True) -Run it against the daily totals CSV (either the prebuilt download, or the file you generated above): + df = pd.read_csv(csv_path, parse_dates=["date"]) + df = df.sort_values("date") -.. code-block:: console + print(df.describe(include="all")) - python scripts/my_data_01_explore.py --csv byod/gnucash_demo/normalized/daily_totals.csv --outdir outputs/gnucash_demo + ax = df.plot(x="date", y=["revenue_proxy", "expenses_proxy", "net_proxy"]) + ax.figure.tight_layout() + ax.figure.savefig(outdir / "daily_totals.png") + print(f"Wrote: {outdir / 'daily_totals.png'}") + PY -This will write a few simple outputs under ``outputs/gnucash_demo/`` (tables + quick plots). +(If you *are* working from the repo source, you can also adapt ``scripts/my_data_01_explore.py`` to your needs.) diff --git a/examples/trackd_byod_gnucash_demo/README.md b/examples/trackd_byod_gnucash_demo/README.md index f8489f6..f80197e 100644 --- a/examples/trackd_byod_gnucash_demo/README.md +++ b/examples/trackd_byod_gnucash_demo/README.md @@ -6,7 +6,11 @@ This is a small runnable example project for the Track D BYOD pipeline using the ## Run ```bash +# 1) Normalize the raw export into canonical Track D tables pystatsv1 trackd byod normalize --project . + +# 2) Produce an analysis-ready daily time series +pystatsv1 trackd byod daily-totals --project . ``` Outputs are written under `normalized/`. diff --git a/pyproject.toml b/pyproject.toml index ce754f1..41ca029 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "pystatsv1" -version = "0.22.3" +version = "0.22.4" description = "PyStatsV1: applied statistics labs in Python." readme = "README.md" requires-python = ">=3.10" diff --git a/src/pystatsv1/cli.py b/src/pystatsv1/cli.py index 36f93ac..c8fae69 100644 --- a/src/pystatsv1/cli.py +++ b/src/pystatsv1/cli.py @@ -473,6 +473,30 @@ def cmd_trackd_byod_normalize(args: argparse.Namespace) -> int: return 0 +def cmd_trackd_byod_daily_totals(args: argparse.Namespace) -> int: + from pystatsv1.trackd import TrackDDataError + from pystatsv1.trackd.byod import build_daily_totals + + try: + report = build_daily_totals(args.project, out=args.out) + except TrackDDataError as e: + print(str(e)) + return 1 + + print( + textwrap.dedent( + f"""\ + Track D BYOD daily totals written. + + Project: {report.get('project')} + Output: {report.get('out')} + Days: {report.get('days')} + """ + ).rstrip() + ) + return 0 + + def build_parser() -> argparse.ArgumentParser: p = argparse.ArgumentParser( prog="pystatsv1", @@ -609,6 +633,21 @@ def build_parser() -> argparse.ArgumentParser: ) p_byod_norm.set_defaults(func=cmd_trackd_byod_normalize) + p_byod_daily = byod_sub.add_parser( + "daily-totals", + help="Compute daily revenue/expense proxies from normalized tables.", + ) + p_byod_daily.add_argument( + "--project", + required=True, + help="Path to a BYOD project folder created by 'pystatsv1 trackd byod init'.", + ) + p_byod_daily.add_argument( + "--out", + default=None, + help="Optional output CSV path (default: /normalized/daily_totals.csv).", + ) + p_byod_daily.set_defaults(func=cmd_trackd_byod_daily_totals) return p diff --git a/src/pystatsv1/trackd/byod.py b/src/pystatsv1/trackd/byod.py index e3ec91d..53d3c95 100644 --- a/src/pystatsv1/trackd/byod.py +++ b/src/pystatsv1/trackd/byod.py @@ -13,6 +13,8 @@ from __future__ import annotations import csv +from collections import defaultdict +from decimal import Decimal, InvalidOperation import textwrap from pathlib import Path from typing import Any @@ -331,3 +333,142 @@ def normalize_byod_project(project: PathLike, *, profile: str | None = None) -> validate_dataset(ctx.normalized_dir, profile=p) return report + + +# --- Phase 3 helper: daily totals from normalized GL --- + +def _parse_decimal_money(value: str) -> Decimal: + """Parse a money-like string into a Decimal. + + Notes + ----- + - Accepts blanks and treats them as zero. + - Strips commas so values like "1,234.56" work. + """ + + raw = (value or "").strip() + if raw == "": + return Decimal("0") + + cleaned = raw.replace(",", "") + try: + return Decimal(cleaned) + except InvalidOperation as exc: + raise TrackDDataError(f"Invalid money amount: {value!r}") from exc + + +def _fmt_2dp(x: Decimal) -> str: + q = x.quantize(Decimal("0.01")) + return f"{q:.2f}" + + +def build_daily_totals(project: PathLike, *, out: PathLike | None = None) -> dict[str, Any]: + """Compute daily revenue/expense proxies from normalized Track D tables. + + This helper is designed for students using **pip-installed** PyStatsV1. + It turns: + + - normalized/gl_journal.csv + - normalized/chart_of_accounts.csv + + into a small analysis-ready table: + + - date + - revenue_proxy (net credits to Revenue accounts) + - expenses_proxy (net debits to Expense accounts) + - net_proxy (revenue_proxy - expenses_proxy) + + Parameters + ---------- + project: + BYOD project root (created by ``pystatsv1 trackd byod init``). + out: + Optional output CSV path. Default: ``/normalized/daily_totals.csv``. + + Returns + ------- + dict + Report with keys: ok, project, out, days, rows. + """ + + root = Path(project).expanduser().resolve() + normalized = root / "normalized" + gl_path = normalized / "gl_journal.csv" + coa_path = normalized / "chart_of_accounts.csv" + + if not gl_path.exists(): + raise TrackDDataError( + f"Missing normalized/gl_journal.csv under: {root}\n" + "Run: pystatsv1 trackd byod normalize --project " + ) + if not coa_path.exists(): + raise TrackDDataError( + f"Missing normalized/chart_of_accounts.csv under: {root}\n" + "Run: pystatsv1 trackd byod normalize --project " + ) + + # Map account_id -> account_type + acct_type: dict[str, str] = {} + with coa_path.open("r", newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + aid = (row.get("account_id") or "").strip() + at = (row.get("account_type") or "").strip() + if aid: + acct_type[aid] = at + + rev_by_date: dict[str, Decimal] = defaultdict(lambda: Decimal("0")) + exp_by_date: dict[str, Decimal] = defaultdict(lambda: Decimal("0")) + + with gl_path.open("r", newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + date = (row.get("date") or "").strip() + if not date: + continue + + aid = (row.get("account_id") or "").strip() + at = acct_type.get(aid, "") + + debit = _parse_decimal_money(row.get("debit") or "") + credit = _parse_decimal_money(row.get("credit") or "") + + if at == "Revenue": + # Revenue is credit-normal; net revenue is credits minus debits. + rev_by_date[date] += credit - debit + elif at == "Expense": + # Expenses are debit-normal; net expense is debits minus credits. + exp_by_date[date] += debit - credit + + out_path = Path(out).expanduser().resolve() if out else (normalized / "daily_totals.csv") + out_path.parent.mkdir(parents=True, exist_ok=True) + + dates = sorted(set(rev_by_date) | set(exp_by_date)) + + with out_path.open("w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter( + f, + fieldnames=("date", "revenue_proxy", "expenses_proxy", "net_proxy"), + ) + writer.writeheader() + rows = 0 + for d in dates: + r = rev_by_date.get(d, Decimal("0")) + e = exp_by_date.get(d, Decimal("0")) + writer.writerow( + { + "date": d, + "revenue_proxy": _fmt_2dp(r), + "expenses_proxy": _fmt_2dp(e), + "net_proxy": _fmt_2dp(r - e), + } + ) + rows += 1 + + return { + "ok": True, + "project": str(root), + "out": str(out_path), + "days": len(dates), + "rows": rows, + } diff --git a/tests/test_trackd_byod_gnucash_demo_example_smoke.py b/tests/test_trackd_byod_gnucash_demo_example_smoke.py index 5ae3d66..6beab37 100644 --- a/tests/test_trackd_byod_gnucash_demo_example_smoke.py +++ b/tests/test_trackd_byod_gnucash_demo_example_smoke.py @@ -49,3 +49,21 @@ def test_trackd_byod_gnucash_demo_example_smoke(tmp_path: Path) -> None: ids = {r["account_id"] for r in coa_rows} assert "Assets:Current Assets:Checking" in ids assert "Equity:Owner Capital" in ids + + # Daily totals helper (analysis-ready) + rc2 = main(["trackd", "byod", "daily-totals", "--project", str(proj)]) + assert rc2 == 0 + + daily_path = normalized / "daily_totals.csv" + assert daily_path.exists() + + with daily_path.open("r", encoding="utf-8", newline="") as f: + reader = csv.DictReader(f) + assert reader.fieldnames is not None + for col in ("date", "revenue_proxy", "expenses_proxy", "net_proxy"): + assert col in reader.fieldnames + daily_rows = list(reader) + + assert daily_rows + total_rev = sum(float(r["revenue_proxy"] or 0.0) for r in daily_rows) + assert total_rev > 0.0