mm65x · Sagargupta16 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 13, 2026
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @mm65x
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -65,7 +65,7 @@ Creates a GitHub Release marked as pre-release. Does NOT publish to crates.io.
 ## Code Conventions
 
 - **New JSONL sources**: Implement `JsonlSourceConfig` (~15 lines) and use `JsonlSource<C>` from `source/jsonl_source.rs`
-- **Cline-derived sources**: Use `ClineFormat` from `source/cline_format.rs`
+- **Cline-derived sources**: Implement `ClineSourceConfig` and use `ClineDerivedSource<C>` from `source/cline_format.rs`
 - **SQLite sources**: See `source/opencode.rs` for the pattern — open read-only, busy_timeout, `json_extract` for JSON columns
 - **Timestamps**: Always use `timestamp::parse_timestamp()`, never inline parsing
 - **File discovery**: Each `Source` implements `discover_files()` using helpers from `source/discover.rs` (`collect_by_ext`, `walk_by_ext`). No glob crate — use bounded `read_dir` walking only.

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tokemon"
-version = "0.2.3"
+version = "0.2.5"
 edition = "2021"
 description = "Unified LLM token usage tracking across all providers"
 license = "MIT"

diff --git a/README.md b/README.md
@@ -207,29 +207,36 @@ make ci            # Run all checks (fmt + lint + test)
 
 ```
 src/
-├── main.rs              # CLI entry, command dispatch, cache-aware parsing
+├── main.rs              # CLI entry and command dispatch
+├── lib.rs               # Library entry point
 ├── cli.rs               # clap argument definitions
 ├── config.rs            # TOML config loading and validation
-├── types.rs             # Core data types (Record, Report, etc.)
+├── types.rs             # Core data types (Record, ModelUsage, etc.)
 ├── error.rs             # Error types
 ├── cache.rs             # SQLite cache layer
+├── pipeline.rs          # Shared data loading orchestration (used by CLI and MCP)
 ├── display.rs           # Name translation (client, model, API provider)
 ├── pacemaker.rs         # Budget tracking and limits
 ├── timestamp.rs         # Shared timestamp parsing
-├── cost.rs              # LiteLLM cost calculation engine
+├── cost.rs              # Pricing engine
 ├── rollup.rs            # Daily/weekly/monthly grouping
 ├── dedup.rs             # Hash-based deduplication
-├── render.rs            # Table and JSON rendering with responsive columns
+├── render/              # Table, CSV, and JSON rendering
+├── tui/                 # Terminal UI dashboard (`tokemon top`)
+│   ├── app.rs           # Core state and event loop
+│   ├── watcher.rs       # Background file modification watcher
+│   ├── settings_state.rs# Configuration settings state
+│   ├── sparkline_data.rs# Rendering sparklines
+│   ├── theme.rs         # TUI color palette
+│   └── widgets/         # TUI components (usage table, summary cards)
 ├── mcp.rs               # MCP server (Model Context Protocol)
 ├── paths.rs             # Platform-specific path resolution
 └── source/
     ├── mod.rs            # Source trait and SourceSet
     ├── discover.rs       # Bounded read_dir file discovery utilities
-    ├── jsonl_source.rs   # Generic JSONL source (4 sources use this)
-    ├── cline_format.rs   # Shared Cline-format parser (3 sources use this)
-    ├── claude_code.rs    # Claude Code parser (structural discovery)
-    ├── codex.rs          # Codex CLI parser (state machine, YYYY/MM/DD nav)
-    └── ...               # One file per source
+    ├── jsonl_source.rs   # Generic JSONL source
+    ├── cline_format.rs   # Shared Cline-format parser
+    └── ...               # One file per provider
 ```
 
 ## License

diff --git a/src/cache.rs b/src/cache.rs
@@ -423,7 +423,19 @@ impl Cache {
             .prepare("SELECT DISTINCT source_file FROM usage_entries WHERE preserved = 0")?;
         let rows = stmt.query_map([], |row| row.get::<_, String>(0))?;
 
-        let cached_files: Vec<String> = rows.flatten().collect();
+        let mut skipped = 0u64;
+        let mut cached_files = Vec::new();
+        for row in rows {
+            match row {
+                Ok(file) => cached_files.push(file),
+                Err(_) => skipped += 1,
+            }
+        }
+        if skipped > 0 {
+            eprintln!(
+                "[tokemon] Warning: skipped {skipped} cached rows while checking preserved files"
+            );
+        }
         for file in &cached_files {
             if !discovered_files.contains(file) {
                 self.conn.execute(

diff --git a/src/cost.rs b/src/cost.rs
@@ -107,11 +107,15 @@ impl PricingEngine {
         let mut pricing_cache: HashMap<&str, Option<&ModelPricing>> = HashMap::new();
 
         for entry in entries.iter_mut() {
-            // If entry already has a cost (even $0.00), keep it.
-            // Some(0.0) means "already priced, result was zero" (e.g.
-            // free model or no pricing data). Re-pricing would cause
+            // Skip records that already have a positive cost — these were
+            // priced correctly on a previous run and re-pricing would cause
             // cost fluctuations when records are loaded from cache.
-            if entry.cost_usd.is_some() {
+            //
+            // Records with `Some(0.0)` are treated as *unpriced*: some
+            // source parsers store `cost: 0` when they don't know the
+            // price for a model, so we give the pricing engine a chance
+            // to fill in the real cost.
+            if entry.cost_usd.is_some_and(|c| c > 0.0) {
                 continue;
             }
 
@@ -143,11 +147,11 @@ impl PricingEngine {
         }
     }
 
-    /// Three-level model matching
+    /// Four-level model matching
     fn find_pricing(&self, model: &str) -> Option<&ModelPricing> {
-        // Strip source-level provider prefix (e.g., "vertexai." from Vertex AI detection)
+        // Strip all routing prefixes (bedrock/, openai/, vertexai., anthropic., @deploy)
         // so that the model name is clean for lookup against litellm pricing data.
-        let model = model.strip_prefix("vertexai.").unwrap_or(model);
+        let model = crate::display::strip_routing_prefix(model);
 
         // 1. Exact match
         if let Some(p) = self.models.get(model) {
@@ -246,7 +250,7 @@ impl PricingEngine {
 }
 
 fn normalize_model_name(model: &str) -> String {
-    let s = model.to_lowercase();
+    let s = crate::display::strip_routing_prefix(model).to_lowercase();
     let stripped = crate::display::strip_date_suffix(&s);
     stripped.replace('.', "-")
 }
@@ -368,4 +372,141 @@ mod tests {
             .expect("should prefix match gpt-4-32k");
         assert_eq!(p2.input_cost_per_token, Some(0.06));
     }
+
+    #[test]
+    fn test_zero_cost_gets_repriced() {
+        use chrono::Utc;
+        use std::borrow::Cow;
+
+        let engine = PricingEngine::parse_pricing(DUMMY_JSON).unwrap();
+
+        let mut records = vec![
+            // Record with cost_usd = Some(0.0) should be re-priced
+            Record {
+                timestamp: Utc::now(),
+                provider: Cow::Borrowed("test"),
+                model: Some("model-a".to_string()),
+                input_tokens: 1000,
+                output_tokens: 500,
+                cache_read_tokens: 0,
+                cache_creation_tokens: 0,
+                thinking_tokens: 0,
+                cost_usd: Some(0.0),
+                message_id: None,
+                request_id: None,
+                session_id: None,
+            },
+            // Record with a positive cost should be kept as-is
+            Record {
+                timestamp: Utc::now(),
+                provider: Cow::Borrowed("test"),
+                model: Some("model-a".to_string()),
+                input_tokens: 1000,
+                output_tokens: 500,
+                cache_read_tokens: 0,
+                cache_creation_tokens: 0,
+                thinking_tokens: 0,
+                cost_usd: Some(99.0),
+                message_id: None,
+                request_id: None,
+                session_id: None,
+            },
+            // Record with cost_usd = None should also be priced
+            Record {
+                timestamp: Utc::now(),
+                provider: Cow::Borrowed("test"),
+                model: Some("model-a".to_string()),
+                input_tokens: 1000,
+                output_tokens: 500,
+                cache_read_tokens: 0,
+                cache_creation_tokens: 0,
+                thinking_tokens: 0,
+                cost_usd: None,
+                message_id: None,
+                request_id: None,
+                session_id: None,
+            },
+        ];
+
+        engine.apply_costs(&mut records);
+
+        // model-a: input=0.001, output=0.002
+        // expected = 1000 * 0.001 + 500 * 0.002 = 1.0 + 1.0 = 2.0
+        let expected_cost = 2.0;
+
+        // Some(0.0) record got re-priced
+        assert_eq!(
+            records[0].cost_usd,
+            Some(expected_cost),
+            "record with cost_usd=Some(0.0) should be re-priced"
+        );
+
+        // Positive cost record kept original value
+        assert_eq!(
+            records[1].cost_usd,
+            Some(99.0),
+            "record with positive cost should not be re-priced"
+        );
+
+        // None record got priced
+        assert_eq!(
+            records[2].cost_usd,
+            Some(expected_cost),
+            "record with cost_usd=None should be priced"
+        );
+    }
+
+    #[test]
+    fn test_find_pricing_cross_provider_same_model() {
+        // The same model accessed via different providers must resolve to identical pricing
+        let json = r#"{
+            "anthropic/claude-3-5-sonnet-20241022": {
+                "input_cost_per_token": 0.003,
+                "output_cost_per_token": 0.015
+            }
+        }"#;
+        let engine = PricingEngine::parse_pricing(json).unwrap();
+
+        let variants = [
+            "claude-3-5-sonnet-20241022",
+            "anthropic/claude-3-5-sonnet-20241022",
+            "vertexai.claude-3-5-sonnet-20241022",
+            "bedrock/anthropic.claude-3-5-sonnet-20241022",
+            "openai/claude-3-5-sonnet-20241022",
+        ];
+
+        for variant in &variants {
+            let pricing = engine.find_pricing(variant);
+            assert!(
+                pricing.is_some(),
+                "find_pricing failed for variant: {variant}"
+            );
+            assert_eq!(
+                pricing.unwrap().input_cost_per_token,
+                Some(0.003),
+                "wrong input cost for variant: {variant}"
+            );
+            assert_eq!(
+                pricing.unwrap().output_cost_per_token,
+                Some(0.015),
+                "wrong output cost for variant: {variant}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_find_pricing_strips_deploy_suffix() {
+        let json = r#"{
+            "gpt-4o": {
+                "input_cost_per_token": 0.0025,
+                "output_cost_per_token": 0.01
+            }
+        }"#;
+        let engine = PricingEngine::parse_pricing(json).unwrap();
+
+        let p = engine
+            .find_pricing("gpt-4o@my-deployment")
+            .expect("should strip @deploy suffix");
+        assert_eq!(p.input_cost_per_token, Some(0.0025));
+    }
 }
diff --git a/src/display.rs b/src/display.rs
@@ -74,7 +74,7 @@ pub fn display_model(raw: &str) -> String {
 /// and dot-based prefixes (`vertexai.`, `anthropic.`).
 ///
 /// Returns a `&str` borrowed from the input — no allocation.
-fn strip_routing_prefix(raw: &str) -> &str {
+pub fn strip_routing_prefix(raw: &str) -> &str {
     // Strip @... deployment suffix
     let raw = raw.split('@').next().unwrap_or(raw);
     // Strip slash-based prefixes (e.g., "bedrock/", "openai/")

diff --git a/src/source/claude_code.rs b/src/source/claude_code.rs
@@ -102,22 +102,30 @@ impl super::Source for ClaudeCodeSource {
         let reader = BufReader::with_capacity(64 * 1024, file);
         let session_id = timestamp::extract_session_id(path);
 
-        let mut error_logged = false;
+        let mut io_errors = 0u64;
+        let mut json_errors = 0u64;
         let entries = reader
             .lines()
-            .map_while(std::result::Result::ok)
-            .filter(|line| line.contains("\"assistant\""))
-            .filter_map(|line| match serde_json::from_str::<ClaudeLine>(&line) {
-                Ok(parsed) => Some(parsed),
+            .filter_map(|r| match r {
+                Ok(line) => Some(line),
                 Err(e) => {
-                    if !error_logged {
+                    if io_errors == 0 {
                         eprintln!(
-                            "[tokemon] Warning: skipped malformed JSON in {}: {}",
+                            "[tokemon] Warning: I/O error reading {}: {}",
                             path.display(),
                             e
                         );
-                        error_logged = true;
                     }
+                    io_errors += 1;
+                    None
+                }
+            })
+            .filter(|line| line.contains("\"assistant\""))
+            .filter_map(|line| {
+                if let Ok(parsed) = serde_json::from_str::<ClaudeLine>(&line) {
+                    Some(parsed)
+                } else {
+                    json_errors += 1;
                     None
                 }
             })
@@ -163,6 +171,19 @@ impl super::Source for ClaudeCodeSource {
             })
             .collect();
 
+        if io_errors > 0 {
+            eprintln!(
+                "[tokemon] Warning: skipped {io_errors} lines in {} due to I/O errors",
+                path.display()
+            );
+        }
+        if json_errors > 0 {
+            eprintln!(
+                "[tokemon] Warning: skipped {json_errors} malformed JSON lines in {}",
+                path.display()
+            );
+        }
+
         Ok(entries)
     }
 }