Skip to content

Commit a225eff

Browse files
authored
Nico/eng 1134 implement toon compression for mcp query results (#3033)
<!-- CURSOR_SUMMARY --> > [!NOTE] > MCP tools now return TOON-formatted results with a new compressed infrastructure map; the diagnose tool is renamed, embedded docs and get_source are removed, and SQL/flow resources capture source_file across TS/Py/Rust/Proto. > > - **MCP server/tools**: > - Add `compressed_map` with lineage-focused `CompressedInfraMap` and TOON output in `get_infra_map` (fuzzy search support). > - Rename `diagnose_infrastructure` → `get_issues` and emit TOON. > - Make `query_olap` default to `toon` (also supports `table`). > - Make `get_stream_sample` default to TOON (also supports JSON). > - Remove `get_source` tool and embedded docs (resources API removed); update server routing and tests. > - **Infra modeling**: > - Track `source_file` for `SqlResource` end-to-end (TS/Py SDKs, Rust structs, ClickHouse introspection default, protobuf schema); include in infra map and tests. > - Capture source locations for transforms/consumers/functions; expose `InfrastructureSignature::id()`. > - **Build/docs**: > - Drop build-time docs generator; update docs to new tools/flows with TOON examples. > - **Deps**: > - Add `toon-format` and related libraries; refresh lockfiles. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 75bf961. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY -->
1 parent c974f4c commit a225eff

File tree

35 files changed

+1837
-1878
lines changed

35 files changed

+1837
-1878
lines changed

Cargo.lock

Lines changed: 517 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/framework-cli/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ keyring = { version = "3.6", features = ["apple-native", "linux-native"] }
116116
rmcp = { version = "0.8.1", features = ["server", "transport-streamable-http-server"] }
117117
percent-encoding = "2.3.2"
118118
dotenvy = "0.15"
119+
toon-format = "0.4.0"
119120

120121
[dev-dependencies]
121122
clickhouse = { version = "0.14.0", features = ["uuid", "test-util"] }

apps/framework-cli/build.rs

Lines changed: 1 addition & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,7 @@
1-
use regex::Regex;
2-
use serde::{Deserialize, Serialize};
3-
use std::fs;
4-
use std::io::{Result, Write};
5-
use std::path::Path;
6-
use walkdir::WalkDir;
7-
8-
#[derive(Debug, Serialize, Deserialize)]
9-
struct DocMetadata {
10-
title: String,
11-
description: String,
12-
priority: f32,
13-
category: String,
14-
language: String,
15-
}
16-
17-
struct DocResource {
18-
relative_path: String,
19-
metadata: DocMetadata,
20-
}
1+
use std::io::Result;
212

223
fn main() -> Result<()> {
234
println!("cargo:rerun-if-changed=../../packages/protobuf");
24-
println!("cargo:rerun-if-changed=../framework-docs/llm-docs");
255

266
// Pass PostHog API key from environment variable at build time
277
if let Ok(posthog_api_key) = std::env::var("POSTHOG_API_KEY") {
@@ -38,184 +18,5 @@ fn main() -> Result<()> {
3818
.out_dir("src/proto/")
3919
.run_from_script();
4020

41-
// Generate embedded documentation
42-
generate_embedded_docs()?;
43-
44-
Ok(())
45-
}
46-
47-
fn generate_embedded_docs() -> Result<()> {
48-
// Try multiple possible paths since cargo might run from different directories
49-
let possible_paths = vec![
50-
Path::new("../framework-docs/llm-docs"),
51-
Path::new("./apps/framework-docs/llm-docs"),
52-
Path::new("../../apps/framework-docs/llm-docs"),
53-
];
54-
55-
let docs_dir = possible_paths.iter().find(|p| p.exists()).ok_or_else(|| {
56-
std::io::Error::new(
57-
std::io::ErrorKind::NotFound,
58-
format!(
59-
"Could not find llm-docs directory. Tried: {:?}. Current dir: {:?}",
60-
possible_paths,
61-
std::env::current_dir()
62-
),
63-
)
64-
})?;
65-
66-
eprintln!("[build] Using docs_dir: {:?}", docs_dir);
67-
eprintln!("[build] Current directory: {:?}", std::env::current_dir()?);
68-
69-
if !docs_dir.exists() {
70-
eprintln!(
71-
"[build] WARNING: llm-docs directory not found at {:?}",
72-
docs_dir
73-
);
74-
// Create empty generated file
75-
let out_path = Path::new("src/mcp/generated_docs.rs");
76-
std::fs::create_dir_all("src/mcp")?;
77-
let mut file = fs::File::create(out_path)?;
78-
writeln!(file, "// No documentation files found")?;
79-
writeln!(file, "pub const EMBEDDED_DOCS: &[EmbeddedDoc] = &[];")?;
80-
return Ok(());
81-
}
82-
83-
// Discover all markdown files
84-
let mut docs = Vec::new();
85-
let mut processed_count = 0;
86-
let mut failed_count = 0;
87-
88-
for entry in WalkDir::new(docs_dir)
89-
.follow_links(true)
90-
.into_iter()
91-
.filter_map(|e| e.ok())
92-
{
93-
let path = entry.path();
94-
if path.extension().and_then(|s| s.to_str()) == Some("md") {
95-
processed_count += 1;
96-
97-
if let Ok(relative_path) = path.strip_prefix(docs_dir) {
98-
let relative_str = relative_path.to_string_lossy().to_string();
99-
100-
// Skip the index file
101-
if relative_str == "llms.txt" {
102-
continue;
103-
}
104-
105-
// Parse frontmatter
106-
match parse_frontmatter(path) {
107-
Ok(metadata) => {
108-
docs.push(DocResource {
109-
relative_path: relative_str,
110-
metadata,
111-
});
112-
}
113-
Err(e) => {
114-
eprintln!("[build] Failed to parse {:?}: {}", path, e);
115-
failed_count += 1;
116-
}
117-
}
118-
}
119-
}
120-
}
121-
122-
eprintln!(
123-
"[build] Processed {} markdown files, {} succeeded, {} failed",
124-
processed_count,
125-
docs.len(),
126-
failed_count
127-
);
128-
129-
// Sort by priority (highest first)
130-
docs.sort_by(|a, b| {
131-
b.metadata
132-
.priority
133-
.partial_cmp(&a.metadata.priority)
134-
.unwrap_or(std::cmp::Ordering::Equal)
135-
});
136-
137-
// Generate Rust code
138-
let out_path = Path::new("src/mcp/generated_docs.rs");
139-
std::fs::create_dir_all("src/mcp")?;
140-
let mut file = fs::File::create(out_path)?;
141-
142-
writeln!(file, "// This file is automatically generated by build.rs")?;
143-
writeln!(file, "// Do not edit manually\n")?;
144-
writeln!(file, "#[derive(Debug, Clone)]")?;
145-
writeln!(file, "#[allow(dead_code)]")?;
146-
writeln!(file, "pub struct EmbeddedDoc {{")?;
147-
writeln!(file, " pub uri: &'static str,")?;
148-
writeln!(file, " pub name: &'static str,")?;
149-
writeln!(file, " pub title: &'static str,")?;
150-
writeln!(file, " pub description: &'static str,")?;
151-
writeln!(file, " pub content: &'static str,")?;
152-
writeln!(file, " pub priority: f32,")?;
153-
writeln!(file, " pub category: &'static str,")?;
154-
writeln!(file, " pub language: &'static str,")?;
155-
writeln!(file, "}}\n")?;
156-
157-
writeln!(file, "pub const EMBEDDED_DOCS: &[EmbeddedDoc] = &[")?;
158-
159-
for doc in &docs {
160-
let uri = format!(
161-
"moose://docs/{}",
162-
doc.relative_path.trim_end_matches(".md").replace('\\', "/")
163-
);
164-
let name = doc.relative_path.trim_end_matches(".md").replace('\\', "/");
165-
166-
// Calculate path relative to the generated file location (src/mcp/)
167-
// Generated file: apps/framework-cli/src/mcp/generated_docs.rs
168-
// Docs location: apps/framework-docs/llm-docs/...
169-
// Relative path: ../../../framework-docs/llm-docs/...
170-
let relative_include_path =
171-
format!("../../../framework-docs/llm-docs/{}", doc.relative_path);
172-
173-
writeln!(file, " EmbeddedDoc {{")?;
174-
writeln!(file, " uri: {:?},", uri)?;
175-
writeln!(file, " name: {:?},", name)?;
176-
writeln!(file, " title: {:?},", doc.metadata.title)?;
177-
writeln!(file, " description: {:?},", doc.metadata.description)?;
178-
writeln!(
179-
file,
180-
" content: include_str!({:?}),",
181-
relative_include_path
182-
)?;
183-
writeln!(file, " priority: {},", doc.metadata.priority)?;
184-
writeln!(file, " category: {:?},", doc.metadata.category)?;
185-
writeln!(file, " language: {:?},", doc.metadata.language)?;
186-
writeln!(file, " }},")?;
187-
}
188-
189-
writeln!(file, "];")?;
190-
191-
eprintln!(
192-
"[build] Generated {} embedded documentation files",
193-
docs.len()
194-
);
195-
19621
Ok(())
19722
}
198-
199-
fn parse_frontmatter(path: &Path) -> Result<DocMetadata> {
200-
let content = fs::read_to_string(path)?;
201-
202-
// Regex to match YAML frontmatter between --- delimiters (with multiline support)
203-
let frontmatter_re = Regex::new(r"(?s)^---\s*\n(.*?)\n---\s*\n").unwrap();
204-
205-
if let Some(captures) = frontmatter_re.captures(&content) {
206-
let yaml_content = captures.get(1).map_or("", |m| m.as_str());
207-
208-
match serde_yaml::from_str::<DocMetadata>(yaml_content) {
209-
Ok(metadata) => Ok(metadata),
210-
Err(e) => Err(std::io::Error::new(
211-
std::io::ErrorKind::InvalidData,
212-
format!("Failed to parse YAML in {:?}: {}", path, e),
213-
)),
214-
}
215-
} else {
216-
Err(std::io::Error::new(
217-
std::io::ErrorKind::InvalidData,
218-
"No frontmatter found",
219-
))
220-
}
221-
}

apps/framework-cli/src/framework/core/infra_reality_checker.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ mod tests {
815815
let actual_resource = SqlResource {
816816
name: "test_view".to_string(),
817817
database: None,
818+
source_file: None,
818819
setup: vec!["CREATE VIEW test_view AS SELECT 1".to_string()],
819820
teardown: vec!["DROP VIEW test_view".to_string()],
820821
pulls_data_from: vec![],
@@ -824,6 +825,7 @@ mod tests {
824825
let infra_resource = SqlResource {
825826
name: "test_view".to_string(),
826827
database: None,
828+
source_file: None,
827829
setup: vec!["CREATE VIEW test_view AS SELECT 2".to_string()], // Difference here
828830
teardown: vec!["DROP VIEW test_view".to_string()],
829831
pulls_data_from: vec![],

apps/framework-cli/src/framework/core/infrastructure/function_process.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,12 @@ impl FunctionProcess {
7070
name: function.name.clone(),
7171
primitive_type: PrimitiveTypes::Function,
7272
},
73-
metadata: None,
73+
metadata: Some(Metadata {
74+
description: None,
75+
source: Some(super::table::SourceLocation {
76+
file: function.executable.to_string_lossy().to_string(),
77+
}),
78+
}),
7479
}
7580
}
7681

apps/framework-cli/src/framework/core/infrastructure/mod.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ pub enum InfrastructureSignature {
5757
}
5858

5959
impl InfrastructureSignature {
60+
/// Get the ID string for any signature variant
61+
pub fn id(&self) -> &str {
62+
match self {
63+
Self::Table { id }
64+
| Self::Topic { id }
65+
| Self::ApiEndpoint { id }
66+
| Self::TopicToTableSyncProcess { id }
67+
| Self::View { id }
68+
| Self::SqlResource { id } => id,
69+
}
70+
}
71+
6072
pub fn to_proto(&self) -> ProtoInfrastructureSignature {
6173
match self {
6274
InfrastructureSignature::Table { id } => {

apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ pub struct SqlResource {
2121
#[serde(skip_serializing_if = "Option::is_none", default)]
2222
pub database: Option<String>,
2323

24+
/// Optional source file path where this SQL resource is defined
25+
#[serde(skip_serializing_if = "Option::is_none", default, alias = "sourceFile")]
26+
pub source_file: Option<String>,
27+
2428
/// A list of SQL commands or script paths executed during the setup phase.
2529
pub setup: Vec<String>,
2630
/// A list of SQL commands or script paths executed during the teardown phase.
@@ -55,6 +59,7 @@ impl SqlResource {
5559
ProtoSqlResource {
5660
name: self.name.clone(),
5761
database: self.database.clone(),
62+
source_file: self.source_file.clone().unwrap_or_default(),
5863
setup: self.setup.clone(),
5964
teardown: self.teardown.clone(),
6065
special_fields: Default::default(),
@@ -68,6 +73,11 @@ impl SqlResource {
6873
Self {
6974
name: proto.name,
7075
database: proto.database,
76+
source_file: if proto.source_file.is_empty() {
77+
None
78+
} else {
79+
Some(proto.source_file)
80+
},
7181
setup: proto.setup,
7282
teardown: proto.teardown,
7383
pulls_data_from: proto
@@ -158,6 +168,7 @@ mod tests {
158168
SqlResource {
159169
name: name.to_string(),
160170
database: None,
171+
source_file: None,
161172
setup: setup.into_iter().map(String::from).collect(),
162173
teardown: teardown.into_iter().map(String::from).collect(),
163174
pulls_data_from: vec![],
@@ -303,6 +314,7 @@ mod tests {
303314
let resource_with_db = SqlResource {
304315
name: "MyView".to_string(),
305316
database: Some("custom".to_string()),
317+
source_file: None,
306318
setup: vec![],
307319
teardown: vec![],
308320
pulls_data_from: vec![],
@@ -314,6 +326,7 @@ mod tests {
314326
let resource_no_db = SqlResource {
315327
name: "MyView".to_string(),
316328
database: None,
329+
source_file: None,
317330
setup: vec![],
318331
teardown: vec![],
319332
pulls_data_from: vec![],
@@ -330,6 +343,7 @@ mod tests {
330343
let resource_no_db = SqlResource {
331344
name: "MyView".to_string(),
332345
database: None,
346+
source_file: None,
333347
setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()],
334348
teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()],
335349
pulls_data_from: vec![],
@@ -339,6 +353,7 @@ mod tests {
339353
let resource_with_db = SqlResource {
340354
name: "MyView".to_string(),
341355
database: Some("local".to_string()),
356+
source_file: None,
342357
setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()],
343358
teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()],
344359
pulls_data_from: vec![],
@@ -355,6 +370,7 @@ mod tests {
355370
let resource_formatted = SqlResource {
356371
name: "TestView".to_string(),
357372
database: None,
373+
source_file: None,
358374
setup: vec![
359375
"CREATE VIEW IF NOT EXISTS TestView \n AS SELECT\n `primaryKey`,\n `utcTimestamp`,\n `textLength`\n FROM `Bar`\n WHERE `hasText` = true".to_string()
360376
],
@@ -366,6 +382,7 @@ mod tests {
366382
let resource_compact = SqlResource {
367383
name: "TestView".to_string(),
368384
database: None,
385+
source_file: None,
369386
setup: vec![
370387
"CREATE VIEW IF NOT EXISTS TestView AS SELECT primaryKey, utcTimestamp, textLength FROM Bar WHERE hasText = true".to_string()
371388
],

apps/framework-cli/src/framework/core/infrastructure_map.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4714,6 +4714,7 @@ mod diff_sql_resources_tests {
47144714
SqlResource {
47154715
name: name.to_string(),
47164716
database: None,
4717+
source_file: None,
47174718
setup: setup.iter().map(|s| s.to_string()).collect(),
47184719
teardown: teardown.iter().map(|s| s.to_string()).collect(),
47194720
pulls_data_from: vec![],
@@ -4950,6 +4951,7 @@ mod diff_sql_resources_tests {
49504951
let mv_before = SqlResource {
49514952
name: "events_summary_mv".to_string(),
49524953
database: None,
4954+
source_file: None,
49534955
setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name FROM events".to_string()],
49544956
teardown: vec!["DROP VIEW events_summary_mv".to_string()],
49554957
pulls_data_from: vec![InfrastructureSignature::Table {
@@ -4963,6 +4965,7 @@ mod diff_sql_resources_tests {
49634965
let mv_after = SqlResource {
49644966
name: "events_summary_mv".to_string(),
49654967
database: None,
4968+
source_file: None,
49664969
setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name, timestamp FROM events".to_string()],
49674970
teardown: vec!["DROP VIEW events_summary_mv".to_string()],
49684971
pulls_data_from: vec![InfrastructureSignature::Table {

0 commit comments

Comments
 (0)