From 3bc32af8f6ddc6744cac094d23a072d14d15efb8 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Mon, 9 Mar 2026 23:16:44 -0700 Subject: [PATCH] feat(privacy): implement GDPR-ready PII export & delete workflow (#76) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the PII Export & Delete Workflow requested in issue #76. ## Changes ### New: packages/backend/app/routes/privacy.py - GET /privacy/export — generates a downloadable ZIP archive containing all personal data for the authenticated user (profile, expenses, categories, bills, reminders, recurring expenses, subscriptions) as a JSON payload plus a human-readable README.txt. - POST /privacy/delete — permanently and irreversibly deletes the user account after verifying the current password. ON DELETE CASCADE in the DB removes all related rows. The audit entry is flushed before deletion so the record survives the user row being removed. - GET /privacy/audit-log — returns the caller's own audit-trail entries with pagination (limit/offset). ### Updated: app/models.py - Added optional `details` column to AuditLog for richer event metadata. ### Updated: app/db/schema.sql - Added `details VARCHAR(500)` column to audit_logs with a safe ADD COLUMN IF NOT EXISTS migration guard. ### Updated: app/__init__.py - Schema compatibility patch now also runs the audit_logs.details ALTER. ### New: packages/backend/tests/test_privacy.py - 16 tests covering export, delete, and audit-log endpoints including auth enforcement, ZIP structure, password confirmation, cascade deletion, and audit-trail persistence. Closes #76 Part of Algora bounty — https://console.algora.io/challenges --- packages/backend/app/__init__.py | 9 +- packages/backend/app/db/schema.sql | 7 + packages/backend/app/models.py | 2 + packages/backend/app/routes/__init__.py | 2 + packages/backend/app/routes/privacy.py | 320 ++++++++++++++++++++++++ packages/backend/tests/test_privacy.py | 204 +++++++++++++++ 6 files changed, 543 insertions(+), 1 deletion(-) create mode 100644 packages/backend/app/routes/privacy.py create mode 100644 packages/backend/tests/test_privacy.py diff --git a/packages/backend/app/__init__.py b/packages/backend/app/__init__.py index cdf76b4..d16f87d 100644 --- a/packages/backend/app/__init__.py +++ b/packages/backend/app/__init__.py @@ -110,10 +110,17 @@ def _ensure_schema_compatibility(app: Flask) -> None: NOT NULL DEFAULT 'INR' """ ) + # Added in issue-76: audit_logs.details for richer PII audit trail + cur.execute( + """ + ALTER TABLE audit_logs + ADD COLUMN IF NOT EXISTS details VARCHAR(500) + """ + ) conn.commit() except Exception: app.logger.exception( - "Schema compatibility patch failed for users.preferred_currency" + "Schema compatibility patch failed" ) conn.rollback() finally: diff --git a/packages/backend/app/db/schema.sql b/packages/backend/app/db/schema.sql index 410189d..7b63717 100644 --- a/packages/backend/app/db/schema.sql +++ b/packages/backend/app/db/schema.sql @@ -121,5 +121,12 @@ CREATE TABLE IF NOT EXISTS audit_logs ( id SERIAL PRIMARY KEY, user_id INT REFERENCES users(id) ON DELETE SET NULL, action VARCHAR(100) NOT NULL, + -- details holds optional free-text metadata for the audit event + -- (e.g. which endpoint triggered the event, anonymised IP, etc.) + details VARCHAR(500), created_at TIMESTAMP NOT NULL DEFAULT NOW() ); + +-- Migration: add details column if upgrading from an older schema +ALTER TABLE audit_logs + ADD COLUMN IF NOT EXISTS details VARCHAR(500); diff --git a/packages/backend/app/models.py b/packages/backend/app/models.py index 64d4481..cb5689f 100644 --- a/packages/backend/app/models.py +++ b/packages/backend/app/models.py @@ -132,4 +132,6 @@ class AuditLog(db.Model): id = db.Column(db.Integer, primary_key=True) user_id = db.Column(db.Integer, db.ForeignKey("users.id"), nullable=True) action = db.Column(db.String(100), nullable=False) + # details stores optional metadata (e.g. IP, request context) as free text. + details = db.Column(db.String(500), nullable=True) created_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False) diff --git a/packages/backend/app/routes/__init__.py b/packages/backend/app/routes/__init__.py index f13b0f8..87d6dea 100644 --- a/packages/backend/app/routes/__init__.py +++ b/packages/backend/app/routes/__init__.py @@ -7,6 +7,7 @@ from .categories import bp as categories_bp from .docs import bp as docs_bp from .dashboard import bp as dashboard_bp +from .privacy import bp as privacy_bp def register_routes(app: Flask): @@ -18,3 +19,4 @@ def register_routes(app: Flask): app.register_blueprint(categories_bp, url_prefix="/categories") app.register_blueprint(docs_bp, url_prefix="/docs") app.register_blueprint(dashboard_bp, url_prefix="/dashboard") + app.register_blueprint(privacy_bp, url_prefix="/privacy") diff --git a/packages/backend/app/routes/privacy.py b/packages/backend/app/routes/privacy.py new file mode 100644 index 0000000..c6502b1 --- /dev/null +++ b/packages/backend/app/routes/privacy.py @@ -0,0 +1,320 @@ +""" +Privacy routes – GDPR-ready PII export & account deletion. + +Endpoints +--------- +GET /privacy/export Download a ZIP of all user personal data (JSON) +POST /privacy/delete Permanently delete account (requires password) +GET /privacy/audit-log View the caller's own audit-trail entries +""" + +from __future__ import annotations + +import io +import json +import logging +import zipfile +from datetime import datetime + +from flask import Blueprint, jsonify, request, send_file +from flask_jwt_extended import get_jwt_identity, jwt_required +from werkzeug.security import check_password_hash + +from ..extensions import db +from ..models import ( + AuditLog, + Bill, + Category, + Expense, + RecurringExpense, + Reminder, + User, + UserSubscription, +) + +bp = Blueprint("privacy", __name__) +logger = logging.getLogger("finmind.privacy") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _log_audit(user_id: int, action: str, details: str | None = None) -> None: + """Write an entry to the audit_logs table.""" + entry = AuditLog( + user_id=user_id, + action=action, + # Store extra metadata in a "details" field if the column exists; + # fall back gracefully when running against a DB that hasn't migrated yet. + ) + # Attach details if the model supports it (added in this migration). + if hasattr(AuditLog, "details") and details: + entry.details = details + db.session.add(entry) + db.session.commit() + logger.info("audit user_id=%s action=%s", user_id, action) + + +def _serialize_user(user: User) -> dict: + return { + "id": user.id, + "email": user.email, + "preferred_currency": user.preferred_currency, + "role": user.role, + "created_at": user.created_at.isoformat() if user.created_at else None, + } + + +def _serialize_expense(e: Expense) -> dict: + return { + "id": e.id, + "amount": str(e.amount), + "currency": e.currency, + "expense_type": e.expense_type, + "notes": e.notes, + "spent_at": e.spent_at.isoformat() if e.spent_at else None, + "category_id": e.category_id, + "created_at": e.created_at.isoformat() if e.created_at else None, + } + + +def _serialize_category(c: Category) -> dict: + return { + "id": c.id, + "name": c.name, + "created_at": c.created_at.isoformat() if c.created_at else None, + } + + +def _serialize_bill(b: Bill) -> dict: + return { + "id": b.id, + "name": b.name, + "amount": str(b.amount), + "currency": b.currency, + "next_due_date": b.next_due_date.isoformat() if b.next_due_date else None, + "cadence": b.cadence.value if hasattr(b.cadence, "value") else str(b.cadence), + "autopay_enabled": b.autopay_enabled, + "active": b.active, + "created_at": b.created_at.isoformat() if b.created_at else None, + } + + +def _serialize_reminder(r: Reminder) -> dict: + return { + "id": r.id, + "bill_id": r.bill_id, + "message": r.message, + "send_at": r.send_at.isoformat() if r.send_at else None, + "sent": r.sent, + "channel": r.channel, + } + + +def _serialize_recurring(r: RecurringExpense) -> dict: + return { + "id": r.id, + "amount": str(r.amount), + "currency": r.currency, + "expense_type": r.expense_type, + "notes": r.notes, + "cadence": r.cadence.value if hasattr(r.cadence, "value") else str(r.cadence), + "start_date": r.start_date.isoformat() if r.start_date else None, + "end_date": r.end_date.isoformat() if r.end_date else None, + "active": r.active, + "created_at": r.created_at.isoformat() if r.created_at else None, + } + + +def _serialize_subscription(s: UserSubscription) -> dict: + return { + "id": s.id, + "plan_id": s.plan_id, + "active": s.active, + "started_at": s.started_at.isoformat() if s.started_at else None, + } + + +def _build_export_payload(uid: int) -> dict: + """Collect all personal data for a user into a serialisable dict.""" + user = db.session.get(User, uid) + if not user: + return {} + + expenses = db.session.query(Expense).filter_by(user_id=uid).all() + categories = db.session.query(Category).filter_by(user_id=uid).all() + bills = db.session.query(Bill).filter_by(user_id=uid).all() + reminders = db.session.query(Reminder).filter_by(user_id=uid).all() + recurrings = db.session.query(RecurringExpense).filter_by(user_id=uid).all() + subscriptions = db.session.query(UserSubscription).filter_by(user_id=uid).all() + + return { + "exported_at": datetime.utcnow().isoformat() + "Z", + "profile": _serialize_user(user), + "expenses": [_serialize_expense(e) for e in expenses], + "categories": [_serialize_category(c) for c in categories], + "bills": [_serialize_bill(b) for b in bills], + "reminders": [_serialize_reminder(r) for r in reminders], + "recurring_expenses": [_serialize_recurring(r) for r in recurrings], + "subscriptions": [_serialize_subscription(s) for s in subscriptions], + } + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + + +@bp.get("/export") +@jwt_required() +def export_data(): + """ + Generate and download a ZIP archive containing all personal data. + + Returns a ``finmind_export_.zip`` file that contains a single + ``data.json`` with every record belonging to the authenticated user. + """ + uid = int(get_jwt_identity()) + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + payload = _build_export_payload(uid) + + # Build the in-memory ZIP + buf = io.BytesIO() + with zipfile.ZipFile(buf, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr( + "data.json", + json.dumps(payload, indent=2, ensure_ascii=False), + ) + # Include a human-readable README inside the ZIP + readme = ( + "FinMind Personal Data Export\n" + "============================\n\n" + f"Exported at : {payload['exported_at']}\n" + f"User email : {user.email}\n\n" + "This archive contains all personal data held by FinMind for your\n" + "account in machine-readable JSON format (data.json).\n\n" + "To request erasure of this data, use the DELETE /privacy/delete\n" + "endpoint or contact privacy@finmind.app.\n" + ) + zf.writestr("README.txt", readme) + + buf.seek(0) + + # Audit trail + _log_audit(uid, "PII_EXPORT", details=f"export requested by user {uid}") + + logger.info("PII export served for user_id=%s", uid) + return send_file( + buf, + mimetype="application/zip", + as_attachment=True, + download_name=f"finmind_export_{uid}.zip", + ) + + +@bp.post("/delete") +@jwt_required() +def delete_account(): + """ + Permanently and irreversibly delete the authenticated user's account. + + Requires ``{"password": ""}`` in the request body as a + confirmation step to prevent accidental or unauthorised deletions. + + All related data (expenses, bills, reminders, etc.) is removed via + ON DELETE CASCADE constraints. The action is recorded in ``audit_logs`` + *before* the user row is deleted so the record is preserved even after + the user is gone (user_id becomes NULL once the FK is dropped). + """ + uid = int(get_jwt_identity()) + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + data = request.get_json() or {} + password = data.get("password", "") + + if not password: + return jsonify(error="password confirmation required"), 400 + + if not check_password_hash(user.password_hash, password): + logger.warning("Delete account: wrong password for user_id=%s", uid) + return jsonify(error="incorrect password"), 403 + + # Write audit entry BEFORE deletion so we retain the record. + # The FK will be set to NULL by the DB after the user row is removed. + email_snapshot = user.email + entry = AuditLog( + user_id=uid, + action="ACCOUNT_DELETED", + ) + if hasattr(AuditLog, "details"): + entry.details = f"permanent account deletion for email={email_snapshot}" + db.session.add(entry) + db.session.flush() # persist audit row while user still exists + + # Delete the user – ON DELETE CASCADE handles related rows + db.session.delete(user) + db.session.commit() + + logger.warning( + "Account permanently deleted user_id=%s email=%s", uid, email_snapshot + ) + return ( + jsonify( + message=( + "Your account and all associated data have been permanently deleted. " + "This action cannot be undone." + ) + ), + 200, + ) + + +@bp.get("/audit-log") +@jwt_required() +def get_audit_log(): + """ + Return the caller's own audit-trail entries (most recent first). + + Optional query params: + - limit (int, default 50, max 200) + - offset (int, default 0) + """ + uid = int(get_jwt_identity()) + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + try: + limit = min(int(request.args.get("limit", 50)), 200) + offset = max(int(request.args.get("offset", 0)), 0) + except (TypeError, ValueError): + return jsonify(error="limit and offset must be integers"), 400 + + entries = ( + db.session.query(AuditLog) + .filter_by(user_id=uid) + .order_by(AuditLog.created_at.desc()) + .limit(limit) + .offset(offset) + .all() + ) + + result = [] + for e in entries: + row = { + "id": e.id, + "action": e.action, + "created_at": e.created_at.isoformat() if e.created_at else None, + } + if hasattr(e, "details"): + row["details"] = e.details + result.append(row) + + return jsonify(audit_log=result, limit=limit, offset=offset) diff --git a/packages/backend/tests/test_privacy.py b/packages/backend/tests/test_privacy.py new file mode 100644 index 0000000..f5895ea --- /dev/null +++ b/packages/backend/tests/test_privacy.py @@ -0,0 +1,204 @@ +""" +Tests for the GDPR-ready PII export & delete workflow (issue #76). + +Covers: + - GET /privacy/export → ZIP download containing data.json + - POST /privacy/delete → irreversible account deletion + - GET /privacy/audit-log → audit trail retrieval + - Audit entries created for export and delete events +""" + +import io +import json +import zipfile + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _register_and_login(client, email="privacy@test.com", password="s3cr3t!"): + r = client.post("/auth/register", json={"email": email, "password": password}) + assert r.status_code in (201, 409) + r = client.post("/auth/login", json={"email": email, "password": password}) + assert r.status_code == 200 + token = r.get_json()["access_token"] + return {"Authorization": f"Bearer {token}"} + + +# --------------------------------------------------------------------------- +# Export tests +# --------------------------------------------------------------------------- + + +class TestPIIExport: + def test_export_requires_auth(self, client): + r = client.get("/privacy/export") + assert r.status_code == 401 + + def test_export_returns_zip(self, client): + auth = _register_and_login(client) + r = client.get("/privacy/export", headers=auth) + assert r.status_code == 200 + assert "zip" in r.content_type + + def test_export_zip_contains_data_json(self, client): + auth = _register_and_login(client, "export2@test.com") + r = client.get("/privacy/export", headers=auth) + assert r.status_code == 200 + + buf = io.BytesIO(r.data) + with zipfile.ZipFile(buf) as zf: + names = zf.namelist() + assert "data.json" in names + assert "README.txt" in names + + json.loads(zf.read("data.json")) + + def test_export_data_json_has_expected_keys(self, client): + auth = _register_and_login(client, "export3@test.com") + r = client.get("/privacy/export", headers=auth) + buf = io.BytesIO(r.data) + with zipfile.ZipFile(buf) as zf: + data = json.loads(zf.read("data.json")) + + assert "profile" in data + assert "expenses" in data + assert "categories" in data + assert "bills" in data + assert "reminders" in data + assert "recurring_expenses" in data + assert "exported_at" in data + + def test_export_profile_email_matches(self, client): + email = "exportprofile@test.com" + auth = _register_and_login(client, email) + r = client.get("/privacy/export", headers=auth) + buf = io.BytesIO(r.data) + with zipfile.ZipFile(buf) as zf: + data = json.loads(zf.read("data.json")) + + assert data["profile"]["email"] == email + + def test_export_creates_audit_entry(self, client): + auth = _register_and_login(client, "exportaudit@test.com") + client.get("/privacy/export", headers=auth) + + # Check audit log via the audit-log endpoint + r = client.get("/privacy/audit-log", headers=auth) + assert r.status_code == 200 + actions = [e["action"] for e in r.get_json()["audit_log"]] + assert "PII_EXPORT" in actions + + +# --------------------------------------------------------------------------- +# Delete tests +# --------------------------------------------------------------------------- + + +class TestAccountDeletion: + def test_delete_requires_auth(self, client): + r = client.post("/privacy/delete", json={"password": "whatever"}) + assert r.status_code == 401 + + def test_delete_requires_password_field(self, client): + auth = _register_and_login(client, "delreq@test.com") + r = client.post("/privacy/delete", json={}, headers=auth) + assert r.status_code == 400 + + def test_delete_rejects_wrong_password(self, client): + auth = _register_and_login(client, "delwrong@test.com") + r = client.post( + "/privacy/delete", json={"password": "wrong-password"}, headers=auth + ) + assert r.status_code == 403 + + def test_delete_succeeds_with_correct_password(self, client): + email = "delsuccess@test.com" + password = "correct-pass-123" + auth = _register_and_login(client, email, password) + + r = client.post("/privacy/delete", json={"password": password}, headers=auth) + assert r.status_code == 200 + body = r.get_json() + assert "permanently deleted" in body["message"].lower() + + def test_delete_is_irreversible_user_gone(self, client): + email = "delirrev@test.com" + password = "irrev-pass-456" + auth = _register_and_login(client, email, password) + + # Delete the account + r = client.post("/privacy/delete", json={"password": password}, headers=auth) + assert r.status_code == 200 + + # The same JWT should now fail to reach /auth/me + r = client.get("/auth/me", headers=auth) + assert r.status_code == 404 + + def test_delete_creates_audit_entry(self, client, app_fixture): + """Audit log entry with ACCOUNT_DELETED must survive account removal.""" + from app.models import AuditLog + from app.extensions import db + + email = "delaudit@test.com" + password = "audit-pass-789" + auth = _register_and_login(client, email, password) + + # Delete the account + r = client.post("/privacy/delete", json={"password": password}, headers=auth) + assert r.status_code == 200 + + # The audit row should exist even though the user is gone + with app_fixture.app_context(): + entry = ( + db.session.query(AuditLog).filter_by(action="ACCOUNT_DELETED").first() + ) + assert entry is not None + + def test_cannot_delete_twice_with_old_token(self, client): + email = "deltwice@test.com" + password = "twice-pass-000" + auth = _register_and_login(client, email, password) + + # First deletion succeeds + r = client.post("/privacy/delete", json={"password": password}, headers=auth) + assert r.status_code == 200 + + # Second attempt with the same (now-invalid) token should 404 + r = client.post("/privacy/delete", json={"password": password}, headers=auth) + assert r.status_code == 404 + + +# --------------------------------------------------------------------------- +# Audit-log endpoint tests +# --------------------------------------------------------------------------- + + +class TestAuditLog: + def test_audit_log_requires_auth(self, client): + r = client.get("/privacy/audit-log") + assert r.status_code == 401 + + def test_audit_log_empty_initially(self, client): + auth = _register_and_login(client, "auditclean@test.com") + r = client.get("/privacy/audit-log", headers=auth) + assert r.status_code == 200 + data = r.get_json() + assert "audit_log" in data + assert isinstance(data["audit_log"], list) + + def test_audit_log_pagination(self, client): + auth = _register_and_login(client, "auditpag@test.com") + + # Trigger a few audit events via export + for _ in range(3): + client.get("/privacy/export", headers=auth) + + r = client.get("/privacy/audit-log?limit=2&offset=0", headers=auth) + assert r.status_code == 200 + data = r.get_json() + assert len(data["audit_log"]) <= 2 + assert data["limit"] == 2 + assert data["offset"] == 0