diff --git a/README.md b/README.md index 9206ed3..8789128 100644 --- a/README.md +++ b/README.md @@ -307,10 +307,10 @@ Open-source engines for temporal AI. Render the past. Simulate the future. Score | **SNAG Bench** | Open Source | timepoint-snag-bench | Quality Certifier — measures Causal Resolution across renderings | | **Proteus** | Open Source | proteus | Settlement Layer — prediction markets that validate Rendered Futures | | **TDF** | Open Source | timepoint-tdf | Data Format — JSON-LD interchange across all services | -| **Web App** | Private | timepoint-web-app | Browser client at app.timepointai.com | -| **iPhone App** | Private | timepoint-iphone-app | iOS client — Synthetic Time Travel on mobile | -| **Billing** | Private | timepoint-billing | Payment processing — Apple IAP + Stripe | -| **Landing** | Private | timepoint-landing | Marketing site at timepointai.com | +| **Web App** | — | — | Browser client at app.timepointai.com | +| **iPhone App** | — | — | iOS client — Synthetic Time Travel on mobile | +| **Billing** | — | — | Payment processing — Apple IAP + Stripe | +| **Landing** | — | — | Marketing site at timepointai.com | --- diff --git a/app/api/v1/models.py b/app/api/v1/models.py index 4ae5ec7..14480fa 100644 --- a/app/api/v1/models.py +++ b/app/api/v1/models.py @@ -141,6 +141,36 @@ def get_configured_models() -> list[ModelInfo]: capabilities=["image_generation"], pricing={"prompt": 0.00012, "completion": 0.0}, ), + # NVIDIA Nemotron + ModelInfo( + id="nvidia/llama-3.3-nemotron-super-49b-v1.5", + name="Nemotron Super 49B v1.5", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + ModelInfo( + id="nvidia/llama-3.1-nemotron-70b-instruct", + name="Nemotron 70B Instruct", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + # NousResearch Hermes + ModelInfo( + id="nousresearch/hermes-4-70b", + name="Hermes 4 70B", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + ModelInfo( + id="nousresearch/hermes-3-llama-3.1-405b", + name="Hermes 3 405B", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), ]) return models diff --git a/app/api/v1/timepoints.py b/app/api/v1/timepoints.py index fd40669..db9d2fb 100644 --- a/app/api/v1/timepoints.py +++ b/app/api/v1/timepoints.py @@ -234,10 +234,14 @@ def _get_permissive_text_model() -> str: preference = [ "meta-llama/llama-4-scout-17b-16e-instruct", "meta-llama/llama-4-maverick-17b-128e-instruct", - "deepseek/deepseek-chat-v3-0324", # Fast chat model - "qwen/qwen3-30b-a3b", # Fast MoE model + "nvidia/llama-3.3-nemotron-super-49b-v1.5", # Nemotron Super, fast MoE + "nousresearch/hermes-4-70b", # Hermes 4 70B, strong reasoning + "deepseek/deepseek-chat-v3-0324", # Fast chat model + "qwen/qwen3-30b-a3b", # Fast MoE model + "nvidia/nemotron-3-nano-30b-a3b", # Nemotron Nano, very fast + "nousresearch/hermes-3-llama-3.1-70b", # Hermes 3 70B fallback "mistralai/mistral-small-3.2-24b-instruct", - "qwen/qwen3-235b-a22b", # Large but non-thinking + "qwen/qwen3-235b-a22b", # Large but non-thinking "deepseek/deepseek-r1-0528", # Thinking model — slow, last resort ] for model_id in preference: diff --git a/app/config.py b/app/config.py index c36255c..27c289a 100644 --- a/app/config.py +++ b/app/config.py @@ -54,12 +54,14 @@ class QualityPreset(str, Enum): - HYPER: Fastest speed with Gemini 2.0 Flash via OpenRouter - BALANCED: Default balance of quality and speed - GEMINI3: Latest Gemini 3 Flash Preview via OpenRouter (thinking model) + - FREE_DISTILLABLE: Free distillable models — $0 cost, outputs usable for training/distillation """ HD = "hd" HYPER = "hyper" BALANCED = "balanced" GEMINI3 = "gemini3" + FREE_DISTILLABLE = "free_distillable" class Environment(str, Enum): @@ -104,6 +106,26 @@ class VerifiedModels: "google/gemini-2.0-flash-001", # Fast, handles JSON well "google/gemini-2.0-flash-001:free", # Free tier (rate limited) "google/gemini-3-flash-preview", # Latest thinking model, agentic workflows + # NVIDIA Nemotron family + "nvidia/llama-3.1-nemotron-70b-instruct", + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "nvidia/nemotron-3-nano-30b-a3b", + "nvidia/nemotron-3-nano-30b-a3b:free", + "nvidia/nemotron-3-super-120b-a12b:free", + "nvidia/nemotron-nano-12b-v2-vl", + "nvidia/nemotron-nano-12b-v2-vl:free", + "nvidia/nemotron-nano-9b-v2", + "nvidia/nemotron-nano-9b-v2:free", + # NousResearch Hermes family + "nousresearch/hermes-2-pro-llama-3-8b", + "nousresearch/hermes-3-llama-3.1-405b", + "nousresearch/hermes-3-llama-3.1-405b:free", + "nousresearch/hermes-3-llama-3.1-70b", + "nousresearch/hermes-4-405b", + "nousresearch/hermes-4-70b", + # OpenRouter free distillable models + "openrouter/hunter-alpha", + "openrouter/healer-alpha", ] # Fallback chains - ordered by preference @@ -228,6 +250,18 @@ def is_verified_or_available(cls, model: str, provider: "ProviderType") -> bool: "thinking_level": "medium", # Gemini 3 supports configurable thinking "image_supported": True, }, + QualityPreset.FREE_DISTILLABLE: { + "name": "Free Distillable", + "description": "Free models with distillation rights — $0 cost, text-only (no image gen)", + "text_model": "openrouter/hunter-alpha", + "judge_model": "openrouter/healer-alpha", + "image_model": None, # No free distillable image models available yet + "image_provider": None, + "text_provider": ProviderType.OPENROUTER, + "max_tokens": 4096, + "thinking_level": None, + "image_supported": False, # Text-only mode + }, } @@ -238,6 +272,7 @@ def is_verified_or_available(cls, model: str, provider: "ProviderType") -> bool: QualityPreset.BALANCED: ParallelismMode.NORMAL, # Default behavior QualityPreset.HYPER: ParallelismMode.MAX, # Speed focus, maximum parallelism QualityPreset.GEMINI3: ParallelismMode.AGGRESSIVE, # Thinking model, moderate parallelism + QualityPreset.FREE_DISTILLABLE: ParallelismMode.SEQUENTIAL, # Free models need sequential } # Provider rate limits (requests per minute and safe concurrent calls) @@ -444,7 +479,7 @@ class Settings(BaseSettings): ) CORS_ORIGINS: str = Field( default="", - description="Comma-separated additional CORS origins (e.g. https://your-app.up.railway.app)", + description="Comma-separated additional CORS origins (e.g. https://your-domain.example.com)", ) # Share URL diff --git a/app/core/llm_router.py b/app/core/llm_router.py index 9ad4f2c..5c52d8e 100644 --- a/app/core/llm_router.py +++ b/app/core/llm_router.py @@ -134,6 +134,13 @@ class ModelTier(str, Enum): } +# Known free distillable models (no :free suffix but still free) +FREE_DISTILLABLE_MODELS = { + "openrouter/hunter-alpha", + "openrouter/healer-alpha", +} + + def is_free_model(model_id: str) -> bool: """Check if a model is a free tier model on OpenRouter. @@ -146,7 +153,9 @@ def is_free_model(model_id: str) -> bool: if not model_id: return False model_lower = model_id.lower() - return ":free" in model_lower or "/free" in model_lower + if ":free" in model_lower or "/free" in model_lower: + return True + return model_lower in FREE_DISTILLABLE_MODELS class LLMRouter: diff --git a/app/core/model_capabilities.py b/app/core/model_capabilities.py index 6719121..3fb1cc2 100644 --- a/app/core/model_capabilities.py +++ b/app/core/model_capabilities.py @@ -404,6 +404,196 @@ class TextModelConfig: max_output_tokens=16384, notes="GPT-4o Mini via OpenRouter", ), + # NVIDIA Nemotron family (via OpenRouter) + "nvidia/llama-3.1-nemotron-70b-instruct": TextModelConfig( + model_id="nvidia/llama-3.1-nemotron-70b-instruct", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 70B instruct, strong reasoning", + ), + "nvidia/llama-3.3-nemotron-super-49b-v1.5": TextModelConfig( + model_id="nvidia/llama-3.3-nemotron-super-49b-v1.5", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Super 49B v1.5, efficient MoE", + ), + "nvidia/nemotron-3-nano-30b-a3b": TextModelConfig( + model_id="nvidia/nemotron-3-nano-30b-a3b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Nano 30B, fast MoE (3B active)", + ), + "nvidia/nemotron-3-nano-30b-a3b:free": TextModelConfig( + model_id="nvidia/nemotron-3-nano-30b-a3b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Nano 30B free tier", + ), + "nvidia/nemotron-3-super-120b-a12b:free": TextModelConfig( + model_id="nvidia/nemotron-3-super-120b-a12b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Super 120B free tier, large MoE (12B active)", + ), + "nvidia/nemotron-nano-12b-v2-vl": TextModelConfig( + model_id="nvidia/nemotron-nano-12b-v2-vl", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 12B v2 with vision", + ), + "nvidia/nemotron-nano-12b-v2-vl:free": TextModelConfig( + model_id="nvidia/nemotron-nano-12b-v2-vl:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 12B v2 with vision, free tier", + ), + "nvidia/nemotron-nano-9b-v2": TextModelConfig( + model_id="nvidia/nemotron-nano-9b-v2", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 9B v2, compact and fast", + ), + "nvidia/nemotron-nano-9b-v2:free": TextModelConfig( + model_id="nvidia/nemotron-nano-9b-v2:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 9B v2 free tier", + ), + # NousResearch Hermes family (via OpenRouter) + "nousresearch/hermes-2-pro-llama-3-8b": TextModelConfig( + model_id="nousresearch/hermes-2-pro-llama-3-8b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 2 Pro 8B, compact function-calling model", + ), + "nousresearch/hermes-3-llama-3.1-405b": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-405b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 405B, flagship open-weight model", + ), + "nousresearch/hermes-3-llama-3.1-405b:free": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-405b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 405B free tier", + ), + "nousresearch/hermes-3-llama-3.1-70b": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-70b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 70B, strong general-purpose model", + ), + "nousresearch/hermes-4-405b": TextModelConfig( + model_id="nousresearch/hermes-4-405b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 4 405B, latest flagship", + ), + "nousresearch/hermes-4-70b": TextModelConfig( + model_id="nousresearch/hermes-4-70b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 4 70B, strong reasoning", + ), + # OpenRouter free distillable models + "openrouter/hunter-alpha": TextModelConfig( + model_id="openrouter/hunter-alpha", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=32000, + notes="Free distillable, 1M context, text+image input", + ), + "openrouter/healer-alpha": TextModelConfig( + model_id="openrouter/healer-alpha", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=32000, + notes="Free distillable, 262K context, multimodal input", + ), } # Default config for unknown models (conservative - assume JSON mode works) diff --git a/app/core/model_policy.py b/app/core/model_policy.py index ff8145c..4135983 100644 --- a/app/core/model_policy.py +++ b/app/core/model_policy.py @@ -13,15 +13,17 @@ "microsoft/", # Phi family "google/gemma", # Gemma open-weight "allenai/", - "nvidia/", + "nvidia/", # Nemotron family + "nousresearch/", # Hermes family (open-weight) "black-forest-labs/", # FLUX open-weight image models + "openrouter/", # OpenRouter free distillable models (Hunter, Healer) ) # Google-native model prefixes (always restricted) GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell") # Prefixes routed through OpenRouter (may be restricted or permissive) -OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "black-forest-labs/") +OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "nvidia/", "nousresearch/", "black-forest-labs/", "openrouter/") def derive_model_provider(model_id: str | None) -> str: @@ -47,8 +49,9 @@ def is_model_permissive(model_id: str | None) -> bool: def derive_model_permissiveness(model_id: str | None) -> str: """Derive distillation licensing permissiveness from a model ID. - Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma) are - 'permissive' — safe for distillation and derivative works. + Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma, + Nemotron, Hermes) are 'permissive' — safe for distillation and + derivative works. Frontier models (Google Gemini, Anthropic, OpenAI) are 'restricted'. """ if not model_id: diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index aa042ab..81591bf 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -63,14 +63,14 @@ Railway auto-detects the `Dockerfile` and deploys with PostgreSQL, health checks ### Verify ```bash -curl https://your-app.up.railway.app/health +curl https://your-domain.example.com/health # → {"status":"healthy","version":"2.4.0","database":true,"providers":{"google":true,"openrouter":true}} ``` ### Generate a Scene ```bash -curl -X POST https://your-app.up.railway.app/api/v1/timepoints/generate/sync \ +curl -X POST https://your-domain.example.com/api/v1/timepoints/generate/sync \ -H "Content-Type: application/json" \ -d '{"query": "Alan Turing breaks Enigma at Bletchley Park Hut 8, winter 1941", "preset": "balanced", "generate_image": true}' ``` @@ -80,7 +80,7 @@ curl -X POST https://your-app.up.railway.app/api/v1/timepoints/generate/sync \ Run the smoke test workflow manually from GitHub Actions: ```bash -gh workflow run smoke.yml -f target_url=https://your-app.up.railway.app +gh workflow run smoke.yml -f target_url=https://your-domain.example.com ``` --- @@ -150,7 +150,7 @@ Set `CORS_ENABLED=false` when Flash is internal-only and never called from brows The open-source app ships with `NoOpBilling` — all credit checks pass and access is unlimited. The `BillingProvider` protocol in `app/services/billing.py` provides hooks for custom billing integrations. -The deployed version (`timepoint-flash-deploy`) uses a separate billing microservice (`timepoint-billing`) that handles Apple IAP and Stripe payments as its own Railway service with its own PostgreSQL database. The main app proxies billing requests and exposes an internal credits API for the billing service to grant/spend credits after purchases. +The deployed version uses a separate billing microservice that handles Apple IAP and Stripe payments as its own service with its own PostgreSQL database. The main app proxies billing requests and exposes an internal credits API for the billing service to grant/spend credits after purchases. --- diff --git a/docs/FIRST-TIME-SETUP.md b/docs/FIRST-TIME-SETUP.md index 5444453..b6e6195 100644 --- a/docs/FIRST-TIME-SETUP.md +++ b/docs/FIRST-TIME-SETUP.md @@ -2,7 +2,7 @@ You're an AI agent with access to a running TIMEPOINT Flash server. This guide shows you how to use it. -**Base URL:** `https://timepoint-flash-deploy-production.up.railway.app` +**Base URL:** `https://flash.timepointai.com` No authentication is required (AUTH_ENABLED=false). All endpoints are open-access. @@ -11,7 +11,7 @@ No authentication is required (AUTH_ENABLED=false). All endpoints are open-acces ## 1. Check the Server ```bash -curl https://timepoint-flash-deploy-production.up.railway.app/health +curl https://flash.timepointai.com/health ``` Expected: @@ -30,7 +30,7 @@ The core operation. Give it a historical moment, get back characters, dialog, re ### Synchronous (simplest) ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/timepoints/generate/sync \ +curl -X POST https://flash.timepointai.com/api/v1/timepoints/generate/sync \ -H "Content-Type: application/json" \ -d '{ "query": "Alan Turing breaks Enigma at Bletchley Park Hut 8, winter 1941", @@ -44,7 +44,7 @@ This blocks for 30-120 seconds and returns the complete scene. ### Streaming (recommended for UIs) ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/timepoints/generate/stream \ +curl -X POST https://flash.timepointai.com/api/v1/timepoints/generate/stream \ -H "Content-Type: application/json" \ -d '{ "query": "Oppenheimer watches the Trinity test, 5:29 AM July 16 1945", @@ -64,7 +64,7 @@ data: {"event": "done", "progress": 100, "data": {"timepoint_id": "abc123", ...} ### Background (fire and forget) ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/timepoints/generate \ +curl -X POST https://flash.timepointai.com/api/v1/timepoints/generate \ -H "Content-Type: application/json" \ -d '{"query": "Gavrilo Princip at Schiller Deli Sarajevo June 28 1914", "preset": "balanced"}' ``` @@ -96,7 +96,7 @@ Returns immediately with a timepoint ID. Poll `GET /api/v1/timepoints/{id}` unti ## 3. Retrieve a Scene ```bash -curl "https://timepoint-flash-deploy-production.up.railway.app/api/v1/timepoints/{id}?full=true&include_image=true" +curl "https://flash.timepointai.com/api/v1/timepoints/{id}?full=true&include_image=true" ``` - `full=true` — include scene, characters, dialog, relationships @@ -129,7 +129,7 @@ When `AUTH_ENABLED=true`, interaction endpoints require a Bearer JWT and deduct After generating a scene, chat with any character in it: ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/interactions/{timepoint_id}/chat \ +curl -X POST https://flash.timepointai.com/api/v1/interactions/{timepoint_id}/chat \ -H "Content-Type: application/json" \ -d '{ "character": "Oppenheimer", @@ -171,14 +171,14 @@ Jump forward or backward from any scene. The new scene preserves characters and **Jump forward:** ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/temporal/{timepoint_id}/next \ +curl -X POST https://flash.timepointai.com/api/v1/temporal/{timepoint_id}/next \ -H "Content-Type: application/json" \ -d '{"units": 1, "unit": "hour"}' ``` **Jump backward:** ```bash -curl -X POST https://timepoint-flash-deploy-production.up.railway.app/api/v1/temporal/{timepoint_id}/prior \ +curl -X POST https://flash.timepointai.com/api/v1/temporal/{timepoint_id}/prior \ -H "Content-Type: application/json" \ -d '{"units": 30, "unit": "minute"}' ``` @@ -205,7 +205,7 @@ GET /api/v1/timepoints?visibility=public GET /api/v1/timepoints?visibility=private # owner only (requires auth) # Set a scene to private -curl -X PATCH https://timepoint-flash-deploy-production.up.railway.app/api/v1/timepoints/{id}/visibility \ +curl -X PATCH https://flash.timepointai.com/api/v1/timepoints/{id}/visibility \ -H "Content-Type: application/json" \ -d '{"visibility": "private"}' @@ -269,7 +269,7 @@ All errors return `{"detail": "Error message"}`. For complete endpoint documentation including auth, credits, and eval: [API.md](API.md) -OpenAPI schema available at: `https://timepoint-flash-deploy-production.up.railway.app/openapi.json` +OpenAPI schema available at: `https://flash.timepointai.com/openapi.json` --- diff --git a/docs/IOS_INTEGRATION.md b/docs/IOS_INTEGRATION.md index dbd68cb..89d1183 100644 --- a/docs/IOS_INTEGRATION.md +++ b/docs/IOS_INTEGRATION.md @@ -134,7 +134,7 @@ When receiving a 402: ## 4. Endpoint Map for iOS MVP -All endpoints are under `/api/v1`. Prefix with your Railway base URL (e.g. `https://your-app.up.railway.app`). +All endpoints are under `/api/v1`. Prefix with your deployment base URL (e.g. `https://your-domain.example.com`). ### Auth (requires `AUTH_ENABLED=true`) @@ -362,7 +362,7 @@ The admin grant endpoint (`POST /credits/admin/grant`) now accepts an optional ` ## 13. Billing Hooks -The open-source app includes a `BillingProvider` protocol (`app/services/billing.py`) with a default `NoOpBilling` implementation (unlimited access). The deployed version (`timepoint-flash-deploy`) uses a separate billing microservice that handles Apple IAP and Stripe payments, proxying billing requests through the main app. +The open-source app includes a `BillingProvider` protocol (`app/services/billing.py`) with a default `NoOpBilling` implementation (unlimited access). The deployed version uses a separate billing microservice that handles Apple IAP and Stripe payments, proxying billing requests through the main app. The billing hooks provide: - `check_credits(user_id, cost)` — called before credit-consuming operations