From 529637b8a9bd3a2dc7df714840f1efd3efee66a0 Mon Sep 17 00:00:00 2001 From: Tim Delisle Date: Fri, 14 Nov 2025 21:24:25 -0800 Subject: [PATCH 01/59] fixed templates page --- apps/framework-docs-v2/.npmrc | 20 +- .../content/templates/index.mdx | 60 +--- apps/framework-docs-v2/package.json | 2 + .../src/app/[...slug]/page.tsx | 12 +- .../strategy/platform-engineering/page.tsx | 66 ++++ .../src/app/templates/layout.tsx | 31 ++ .../src/app/templates/page.tsx | 62 ++++ .../src/app/templates/templates-side-nav.tsx | 296 ++++++++++++++++++ .../src/components/mdx-renderer.tsx | 2 + .../src/components/mdx/command-snippet.tsx | 42 +++ .../src/components/mdx/index.ts | 1 + .../src/components/mdx/template-card.tsx | 155 +++++---- .../src/components/mdx/template-grid.tsx | 159 ++-------- .../src/components/ui/checkbox.tsx | 30 ++ .../src/components/ui/command.tsx | 2 +- .../src/components/ui/input.tsx | 2 +- .../src/components/ui/item.tsx | 164 ++++++++++ .../src/components/ui/select.tsx | 2 +- apps/framework-docs-v2/src/lib/content.ts | 12 +- apps/framework-docs-v2/src/styles/globals.css | 4 +- pnpm-lock.yaml | 93 +++++- 21 files changed, 937 insertions(+), 280 deletions(-) create mode 100644 apps/framework-docs-v2/src/app/guides/strategy/platform-engineering/page.tsx create mode 100644 apps/framework-docs-v2/src/app/templates/layout.tsx create mode 100644 apps/framework-docs-v2/src/app/templates/page.tsx create mode 100644 apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx create mode 100644 apps/framework-docs-v2/src/components/mdx/command-snippet.tsx create mode 100644 apps/framework-docs-v2/src/components/ui/checkbox.tsx create mode 100644 apps/framework-docs-v2/src/components/ui/item.tsx diff --git a/apps/framework-docs-v2/.npmrc b/apps/framework-docs-v2/.npmrc index afab184d37..be313dafb4 100644 --- a/apps/framework-docs-v2/.npmrc +++ b/apps/framework-docs-v2/.npmrc @@ -1,6 +1,20 @@ # Force all dependencies to be hoisted locally to this app's node_modules # This prevents TypeScript from finding React types in nested node_modules -# This overrides the root .npmrc which prevents hoisting to support multiple React versions -# Since this app only uses React 19, we can safely hoist everything here -shamefully-hoist=true +# This works with the root .npmrc which prevents React from being hoisted to root +# Since this app only uses React 19, we can safely hoist everything locally here +# +# IMPORTANT: We use hoist-pattern instead of shamefully-hoist=true to avoid +# conflicts with the root hoisting pattern when running pnpm add from this directory +# (e.g., via shadcn CLI). This hoists everything locally without modifying root structure. +hoist-pattern[]=* + +# Match root public-hoist-pattern to prevent ERR_PNPM_PUBLIC_HOIST_PATTERN_DIFF +# This ensures compatibility when running pnpm add from this directory +public-hoist-pattern[]=!react +public-hoist-pattern[]=!react-dom +public-hoist-pattern[]=!react/jsx-runtime +public-hoist-pattern[]=!react-dom/server +public-hoist-pattern[]=!react/jsx-dev-runtime +public-hoist-pattern[]=!@types/react +public-hoist-pattern[]=!@types/react-dom diff --git a/apps/framework-docs-v2/content/templates/index.mdx b/apps/framework-docs-v2/content/templates/index.mdx index 29cdba52cf..149114ccc2 100644 --- a/apps/framework-docs-v2/content/templates/index.mdx +++ b/apps/framework-docs-v2/content/templates/index.mdx @@ -5,69 +5,13 @@ order: 2 category: getting-started --- -import { CTACards, CTACard } from "@/components/mdx"; -import { Badge } from "@/components/ui/badge"; -import Link from "next/link"; -import { TemplatesGridServer } from "@/components/mdx"; +import { TemplatesGridServer, CommandSnippet } from "@/components/mdx"; # Templates & Apps Moose provides two ways to get started: **templates** and **demo apps**. Templates are simple skeleton applications that you can initialize with `moose init`, while demo apps are more advanced examples available on GitHub that showcase real-world use cases and integrations. -**Initialize a template:** -```bash filename="Terminal" copy -moose init PROJECT_NAME TEMPLATE_NAME -``` - -**List available templates:** -```bash filename="Terminal" copy -moose template list -``` - -## Popular Apps - - - - - - - - - - ---- + ## Browse Apps and Templates diff --git a/apps/framework-docs-v2/package.json b/apps/framework-docs-v2/package.json index ebb96fdf62..9780a46f10 100644 --- a/apps/framework-docs-v2/package.json +++ b/apps/framework-docs-v2/package.json @@ -21,11 +21,13 @@ "@next/mdx": "^16.0.1", "@radix-ui/react-accordion": "^1.2.11", "@radix-ui/react-avatar": "^1.0.4", + "@radix-ui/react-checkbox": "^1.3.3", "@radix-ui/react-collapsible": "^1.1.11", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.15", "@radix-ui/react-label": "^2.1.7", "@radix-ui/react-navigation-menu": "^1.2.13", + "@radix-ui/react-popover": "^1.1.15", "@radix-ui/react-scroll-area": "^1.2.2", "@radix-ui/react-select": "^2.0.0", "@radix-ui/react-separator": "^1.1.7", diff --git a/apps/framework-docs-v2/src/app/[...slug]/page.tsx b/apps/framework-docs-v2/src/app/[...slug]/page.tsx index 35a58dc1d6..cfb3523efc 100644 --- a/apps/framework-docs-v2/src/app/[...slug]/page.tsx +++ b/apps/framework-docs-v2/src/app/[...slug]/page.tsx @@ -19,17 +19,18 @@ export async function generateStaticParams() { const slugs = getAllSlugs(); // Generate params for each slug + // Note: templates is excluded from getAllSlugs() as it is now an explicit page const allParams: { slug: string[] }[] = slugs.map((slug) => ({ slug: slug.split("/"), })); - // Also add section index routes (moosestack, ai, hosting, templates) - // These map to section/index.mdx files + // Also add section index routes (moosestack, ai, hosting, guides) + // Note: templates is now an explicit page, so it's excluded here allParams.push( { slug: ["moosestack"] }, { slug: ["ai"] }, { slug: ["hosting"] }, - { slug: ["templates"] }, + { slug: ["guides"] }, ); return allParams; @@ -81,6 +82,11 @@ export default async function DocPage({ params }: PageProps) { const slug = slugArray.join("/"); + // Templates is now an explicit page, so it should not be handled by this catch-all route + if (slug.startsWith("templates/")) { + notFound(); + } + let content; try { content = await parseMarkdownContent(slug); diff --git a/apps/framework-docs-v2/src/app/guides/strategy/platform-engineering/page.tsx b/apps/framework-docs-v2/src/app/guides/strategy/platform-engineering/page.tsx new file mode 100644 index 0000000000..9439ecb4e1 --- /dev/null +++ b/apps/framework-docs-v2/src/app/guides/strategy/platform-engineering/page.tsx @@ -0,0 +1,66 @@ +import { notFound } from "next/navigation"; +import type { Metadata } from "next"; +import { parseMarkdownContent } from "@/lib/content"; +import { TOCNav } from "@/components/navigation/toc-nav"; +import { MDXRenderer } from "@/components/mdx-renderer"; +import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; +import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; + +export const dynamic = "force-dynamic"; + +export async function generateMetadata(): Promise { + try { + const content = await parseMarkdownContent( + "guides/strategy/platform-engineering", + ); + return { + title: + content.frontMatter.title ? + `${content.frontMatter.title} | MooseStack Documentation` + : "Platform Engineering | MooseStack Documentation", + description: + content.frontMatter.description || + "Guide to platform engineering strategy with MooseStack", + }; + } catch (error) { + return { + title: "Platform Engineering | MooseStack Documentation", + description: "Guide to platform engineering strategy with MooseStack", + }; + } +} + +export default async function PlatformEngineeringPage() { + let content; + try { + content = await parseMarkdownContent( + "guides/strategy/platform-engineering", + ); + } catch (error) { + notFound(); + } + + const breadcrumbs = buildDocBreadcrumbs( + "guides/strategy/platform-engineering", + typeof content.frontMatter.title === "string" ? + content.frontMatter.title + : undefined, + ); + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+
+ + + ); +} diff --git a/apps/framework-docs-v2/src/app/templates/layout.tsx b/apps/framework-docs-v2/src/app/templates/layout.tsx new file mode 100644 index 0000000000..25d5d3afb1 --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/layout.tsx @@ -0,0 +1,31 @@ +import type { ReactNode } from "react"; +import { Suspense } from "react"; +import { TemplatesSideNav } from "./templates-side-nav"; +import { AnalyticsProvider } from "@/components/analytics-provider"; +import { SidebarInset } from "@/components/ui/sidebar"; + +interface TemplatesLayoutProps { + children: ReactNode; +} + +export default async function TemplatesLayout({ + children, +}: TemplatesLayoutProps) { + return ( + +
+ }> + + + +
+ {/* Reserve space for the right TOC on xl+ screens */} +
+ {children} +
+
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/app/templates/page.tsx b/apps/framework-docs-v2/src/app/templates/page.tsx new file mode 100644 index 0000000000..ca7f80b21c --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/page.tsx @@ -0,0 +1,62 @@ +import { notFound } from "next/navigation"; +import type { Metadata } from "next"; +import { parseMarkdownContent } from "@/lib/content"; +import { TOCNav } from "@/components/navigation/toc-nav"; +import { MDXRenderer } from "@/components/mdx-renderer"; +import { DocBreadcrumbs } from "@/components/navigation/doc-breadcrumbs"; +import { buildDocBreadcrumbs } from "@/lib/breadcrumbs"; + +export const dynamic = "force-dynamic"; + +export async function generateMetadata(): Promise { + try { + const content = await parseMarkdownContent("templates/index"); + return { + title: + content.frontMatter.title ? + `${content.frontMatter.title} | MooseStack Documentation` + : "Templates & Apps | MooseStack Documentation", + description: + content.frontMatter.description || + "Browse templates and demo apps for MooseStack", + }; + } catch (error) { + return { + title: "Templates & Apps | MooseStack Documentation", + description: "Browse templates and demo apps for MooseStack", + }; + } +} + +export default async function TemplatesPage() { + let content; + try { + content = await parseMarkdownContent("templates/index"); + } catch (error) { + notFound(); + } + + const breadcrumbs = buildDocBreadcrumbs( + "templates/index", + typeof content.frontMatter.title === "string" ? + content.frontMatter.title + : undefined, + ); + + return ( + <> +
+ +
+ {content.isMDX ? + + :
} +
+
+ + + ); +} diff --git a/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx b/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx new file mode 100644 index 0000000000..dc0d894822 --- /dev/null +++ b/apps/framework-docs-v2/src/app/templates/templates-side-nav.tsx @@ -0,0 +1,296 @@ +"use client"; + +import * as React from "react"; +import { useSearchParams, useRouter, usePathname } from "next/navigation"; +import { + Sidebar, + SidebarContent, + SidebarGroup, + SidebarGroupLabel, + SidebarMenu, + SidebarMenuButton, + SidebarMenuItem, +} from "@/components/ui/sidebar"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Label } from "@/components/ui/label"; +import { IconX } from "@tabler/icons-react"; + +type LanguageFilter = "typescript" | "python" | null; +type CategoryFilter = ("starter" | "framework" | "example")[]; +type TypeFilter = "template" | "app" | null; + +export function TemplatesSideNav() { + const router = useRouter(); + const pathname = usePathname(); + const searchParams = useSearchParams(); + + // Get filter values from URL params + const typeFilter = (searchParams.get("type") as TypeFilter) || null; + const languageFilter = + (searchParams.get("language") as LanguageFilter) || null; + const categoryFilter = React.useMemo(() => { + const categoryParam = searchParams.get("category"); + if (!categoryParam) return []; + return categoryParam + .split(",") + .filter( + (c): c is "starter" | "framework" | "example" => + c === "starter" || c === "framework" || c === "example", + ); + }, [searchParams]); + + const hasActiveFilters = + typeFilter !== null || languageFilter !== null || categoryFilter.length > 0; + + // Update URL params when filters change + const updateFilters = React.useCallback( + (updates: { + type?: TypeFilter; + language?: LanguageFilter; + category?: CategoryFilter; + }) => { + const params = new URLSearchParams(searchParams.toString()); + + if (updates.type !== undefined) { + if (updates.type === null) { + params.delete("type"); + } else { + params.set("type", updates.type); + } + } + + if (updates.language !== undefined) { + if (updates.language === null) { + params.delete("language"); + } else { + params.set("language", updates.language); + } + } + + if (updates.category !== undefined) { + if (updates.category.length === 0) { + params.delete("category"); + } else { + params.set("category", updates.category.join(",")); + } + } + + router.push(`${pathname}?${params.toString()}`); + }, + [router, pathname, searchParams], + ); + + const clearFilters = () => { + updateFilters({ type: null, language: null, category: [] }); + }; + + return ( + + + + Filters + + {/* Type Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ type: "template" }); + } else { + updateFilters({ type: null }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ type: "app" }); + } else { + updateFilters({ type: null }); + } + }} + /> + +
+
+
+
+ + {/* Language Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ language: "typescript" }); + } else { + updateFilters({ language: null }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ language: "python" }); + } else { + updateFilters({ language: null }); + } + }} + /> + +
+
+
+
+ + {/* Category Filter */} + +
+ +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "starter"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "starter", + ), + }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "framework"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "framework", + ), + }); + } + }} + /> + +
+
+ { + if (checked) { + updateFilters({ + category: [...categoryFilter, "example"], + }); + } else { + updateFilters({ + category: categoryFilter.filter( + (c) => c !== "example", + ), + }); + } + }} + /> + +
+
+
+
+ + {/* Clear Filters Button */} + {hasActiveFilters && ( + + + + Clear Filters + + + )} +
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/mdx-renderer.tsx b/apps/framework-docs-v2/src/components/mdx-renderer.tsx index cb30fc5550..3f4ff5ccf7 100644 --- a/apps/framework-docs-v2/src/components/mdx-renderer.tsx +++ b/apps/framework-docs-v2/src/components/mdx-renderer.tsx @@ -27,6 +27,7 @@ import { Security, BreakingChanges, TemplatesGridServer, + CommandSnippet, } from "@/components/mdx"; import { FileTreeFolder, FileTreeFile } from "@/components/mdx/file-tree"; import { CodeEditor } from "@/components/ui/shadcn-io/code-editor"; @@ -120,6 +121,7 @@ export async function MDXRenderer({ source }: MDXRendererProps) { Security, BreakingChanges, TemplatesGridServer, + CommandSnippet, CodeEditor, Separator, Tabs, diff --git a/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx new file mode 100644 index 0000000000..b43cbaf35b --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/command-snippet.tsx @@ -0,0 +1,42 @@ +"use client"; + +import * as React from "react"; +import { + Snippet, + SnippetHeader, + SnippetTabsList, + SnippetTabsTrigger, + SnippetTabsContent, + SnippetCopyButton, +} from "@/components/ui/snippet"; + +interface CommandSnippetProps { + initCommand?: string; + listCommand?: string; + initLabel?: string; + listLabel?: string; +} + +export function CommandSnippet({ + initCommand = "moose init PROJECT_NAME TEMPLATE_NAME", + listCommand = "moose template list", + initLabel = "Init", + listLabel = "List", +}: CommandSnippetProps) { + const [value, setValue] = React.useState("init"); + const currentCommand = value === "init" ? initCommand : listCommand; + + return ( + + + + {initLabel} + {listLabel} + + + + {initCommand} + {listCommand} + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/index.ts b/apps/framework-docs-v2/src/components/mdx/index.ts index ebdb8480cf..e34439c4a0 100644 --- a/apps/framework-docs-v2/src/components/mdx/index.ts +++ b/apps/framework-docs-v2/src/components/mdx/index.ts @@ -8,6 +8,7 @@ export { } from "./staggered-card"; export { Callout } from "./callout"; export { LanguageTabs, LanguageTabContent } from "./language-tabs"; +export { CommandSnippet } from "./command-snippet"; export { CodeSnippet } from "./code-snippet"; export { CodeEditorWrapper } from "./code-editor-wrapper"; export { ToggleBlock } from "./toggle-block"; diff --git a/apps/framework-docs-v2/src/components/mdx/template-card.tsx b/apps/framework-docs-v2/src/components/mdx/template-card.tsx index 0e3ccba497..9e78e47774 100644 --- a/apps/framework-docs-v2/src/components/mdx/template-card.tsx +++ b/apps/framework-docs-v2/src/components/mdx/template-card.tsx @@ -11,7 +11,9 @@ import { CardFooter, CardHeader, } from "@/components/ui/card"; -import { IconBrandGithub } from "@tabler/icons-react"; +import { IconBrandGithub, IconRocket } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; import { Snippet, SnippetCopyButton, @@ -52,12 +54,6 @@ export function TemplateCard({ item, className }: TemplateCardProps) { const template = isTemplate ? (item as TemplateMetadata) : null; const app = !isTemplate ? (item as AppMetadata) : null; - const categoryColors = { - starter: "border-blue-200 dark:border-blue-800", - framework: "border-purple-200 dark:border-purple-800", - example: "border-green-200 dark:border-green-800", - }; - const categoryLabels = { starter: "Starter", framework: "Framework", @@ -82,9 +78,6 @@ export function TemplateCard({ item, className }: TemplateCardProps) { @@ -93,11 +86,8 @@ export function TemplateCard({ item, className }: TemplateCardProps) {
{language && ( - - {language === "typescript" ? "TS" : "Python"} + + {language === "typescript" ? "TypeScript" : "Python"} )} {isTemplate && template && ( @@ -111,70 +101,101 @@ export function TemplateCard({ item, className }: TemplateCardProps) { )}
-

+

{isTemplate ? formatTemplateName(name) : name}

- - {description} - - {frameworks.length > 0 && ( -
-

- Frameworks: -

-
- {frameworks.map((framework) => ( - - {framework} - - ))} -
-
- )} - - {features.length > 0 && ( -
-

- Features: -

-
- {features.map((feature) => ( - - {feature} - - ))} -
+ + {description} + {isTemplate && template && ( +
+
)}
{isTemplate && template && ( -
- -
+ <> + {(frameworks.length > 0 || features.length > 0) && ( + <> + +
+ {frameworks.map((framework) => ( + + {framework} + + ))} + {features.map((feature) => ( + + {feature} + + ))} +
+ + )} + )} - {!isTemplate && app && app.blogPost && ( - - Read Blog Post → - + {!isTemplate && app && ( + <> + {app.blogPost && ( + + Read Blog Post → + + )} + {app.blogPost && (frameworks.length > 0 || features.length > 0) && ( + + )} + {(frameworks.length > 0 || features.length > 0) && ( +
+ {frameworks.map((framework) => ( + + {framework} + + ))} + {features.map((feature) => ( + + {feature} + + ))} +
+ )} + )} - - - View on GitHub - +
+ + +
); diff --git a/apps/framework-docs-v2/src/components/mdx/template-grid.tsx b/apps/framework-docs-v2/src/components/mdx/template-grid.tsx index 753ec43fa1..23eb147392 100644 --- a/apps/framework-docs-v2/src/components/mdx/template-grid.tsx +++ b/apps/framework-docs-v2/src/components/mdx/template-grid.tsx @@ -1,11 +1,11 @@ "use client"; import * as React from "react"; +import { useSearchParams } from "next/navigation"; import { cn } from "@/lib/utils"; import { Input } from "@/components/ui/input"; import { Button } from "@/components/ui/button"; import { Badge } from "@/components/ui/badge"; -import { ToggleGroup, ToggleGroupItem } from "@/components/ui/toggle-group"; import { TemplateCard } from "./template-card"; import type { ItemMetadata, TemplateMetadata } from "@/lib/template-types"; import { IconSearch, IconX } from "@tabler/icons-react"; @@ -20,13 +20,33 @@ type CategoryFilter = ("starter" | "framework" | "example")[]; type TypeFilter = "template" | "app" | null; export function TemplateGrid({ items, className }: TemplateGridProps) { + const searchParams = useSearchParams(); const [searchQuery, setSearchQuery] = React.useState(""); - const [languageFilter, setLanguageFilter] = - React.useState(null); - const [categoryFilter, setCategoryFilter] = React.useState( - [], - ); - const [typeFilter, setTypeFilter] = React.useState(null); + + // Read filters from URL params (set by TemplatesSideNav) + const typeFilter = React.useMemo(() => { + const type = searchParams.get("type"); + return (type === "template" || type === "app" ? type : null) as TypeFilter; + }, [searchParams]); + + const languageFilter = React.useMemo(() => { + const language = searchParams.get("language"); + return ( + language === "typescript" || language === "python" ? + language + : null) as LanguageFilter; + }, [searchParams]); + + const categoryFilter = React.useMemo(() => { + const categoryParam = searchParams.get("category"); + if (!categoryParam) return []; + return categoryParam + .split(",") + .filter( + (c): c is "starter" | "framework" | "example" => + c === "starter" || c === "framework" || c === "example", + ) as CategoryFilter; + }, [searchParams]); const filteredItems = React.useMemo(() => { return items.filter((item) => { @@ -88,18 +108,10 @@ export function TemplateGrid({ items, className }: TemplateGridProps) { categoryFilter.length > 0 || typeFilter !== null; - const clearFilters = () => { - setSearchQuery(""); - setLanguageFilter(null); - setCategoryFilter([]); - setTypeFilter(null); - }; - return (
- {/* Filters */} -
- {/* Search */} + {/* Search - kept in main content area */} +
)}
- - {/* Type Filter */} -
- - { - if (value === "" || value === undefined) { - setTypeFilter(null); - } else if (value === "template" || value === "app") { - setTypeFilter(value as TypeFilter); - } - }} - variant="outline" - className="w-full" - > - - Templates - - - Apps - - -
- - {/* Language and Category Filters */} -
-
- - { - if (value === "" || value === undefined) { - setLanguageFilter(null); - } else if (value === "typescript" || value === "python") { - setLanguageFilter(value as LanguageFilter); - } - }} - variant="outline" - className="w-full" - > - - TypeScript - - - Python - - -
- -
- - { - setCategoryFilter(value as CategoryFilter); - }} - variant="outline" - className="w-full" - > - - Starter - - - Framework - - - Example - - -
-
- - {/* Clear filters button */} + {/* Results count */} {hasActiveFilters && ( -
- +
{filteredItems.length} item{filteredItems.length !== 1 ? "s" : ""} diff --git a/apps/framework-docs-v2/src/components/ui/checkbox.tsx b/apps/framework-docs-v2/src/components/ui/checkbox.tsx new file mode 100644 index 0000000000..c450e30dd5 --- /dev/null +++ b/apps/framework-docs-v2/src/components/ui/checkbox.tsx @@ -0,0 +1,30 @@ +"use client"; + +import * as React from "react"; +import * as CheckboxPrimitive from "@radix-ui/react-checkbox"; +import { IconCheck } from "@tabler/icons-react"; + +import { cn } from "@/lib/utils"; + +const Checkbox = React.forwardRef< + React.ElementRef, + React.ComponentPropsWithoutRef +>(({ className, ...props }, ref) => ( + + + + + +)); +Checkbox.displayName = CheckboxPrimitive.Root.displayName; + +export { Checkbox }; diff --git a/apps/framework-docs-v2/src/components/ui/command.tsx b/apps/framework-docs-v2/src/components/ui/command.tsx index 525ebddd2f..6be2a79deb 100644 --- a/apps/framework-docs-v2/src/components/ui/command.tsx +++ b/apps/framework-docs-v2/src/components/ui/command.tsx @@ -66,7 +66,7 @@ const CommandInput = React.forwardRef< >( , + VariantProps { + asChild?: boolean; +} + +const Item = React.forwardRef( + ({ className, variant, size, asChild = false, ...props }, ref) => { + const Comp = asChild ? Slot : "div"; + return ( + + ); + }, +); +Item.displayName = "Item"; + +const ItemGroup = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemGroup.displayName = "ItemGroup"; + +const ItemSeparator = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemSeparator.displayName = "ItemSeparator"; + +const ItemMedia = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes & { + variant?: "default" | "icon" | "image"; + } +>(({ className, variant = "default", ...props }, ref) => ( +
+)); +ItemMedia.displayName = "ItemMedia"; + +const ItemContent = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemContent.displayName = "ItemContent"; + +const ItemTitle = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemTitle.displayName = "ItemTitle"; + +const ItemDescription = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemDescription.displayName = "ItemDescription"; + +const ItemActions = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemActions.displayName = "ItemActions"; + +const ItemHeader = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemHeader.displayName = "ItemHeader"; + +const ItemFooter = React.forwardRef< + HTMLDivElement, + React.HTMLAttributes +>(({ className, ...props }, ref) => ( +
+)); +ItemFooter.displayName = "ItemFooter"; + +export { + Item, + ItemGroup, + ItemSeparator, + ItemMedia, + ItemContent, + ItemTitle, + ItemDescription, + ItemActions, + ItemHeader, + ItemFooter, +}; diff --git a/apps/framework-docs-v2/src/components/ui/select.tsx b/apps/framework-docs-v2/src/components/ui/select.tsx index 9b01fc1fb9..0a7d581cd5 100644 --- a/apps/framework-docs-v2/src/components/ui/select.tsx +++ b/apps/framework-docs-v2/src/components/ui/select.tsx @@ -19,7 +19,7 @@ const SelectTrigger = React.forwardRef< span]:line-clamp-1", + "flex h-9 w-full items-center justify-between whitespace-nowrap rounded-md border border-input bg-card px-3 py-2 text-sm shadow-sm ring-offset-background data-[placeholder]:text-muted-foreground focus:outline-none focus:ring-1 focus:ring-ring disabled:cursor-not-allowed disabled:opacity-50 [&>span]:line-clamp-1", className, )} {...props} diff --git a/apps/framework-docs-v2/src/lib/content.ts b/apps/framework-docs-v2/src/lib/content.ts index 15af3315d9..861e1b7538 100644 --- a/apps/framework-docs-v2/src/lib/content.ts +++ b/apps/framework-docs-v2/src/lib/content.ts @@ -31,7 +31,8 @@ export function getContentFiles(): string[] { /** * Recursively get all markdown files in a directory - * Excludes the 'shared' folder + * Excludes the 'shared' folder and 'templates' folder + * (templates is now an explicit page in the app directory) */ function getAllMarkdownFiles(dir: string, baseDir: string): string[] { const files: string[] = []; @@ -39,11 +40,12 @@ function getAllMarkdownFiles(dir: string, baseDir: string): string[] { for (const entry of entries) { const fullPath = path.join(dir, entry.name); - // Skip the shared folder - if (entry.isDirectory() && entry.name === "shared") { - continue; - } + // Skip the shared folder and templates folder + // (templates is now an explicit page in app directory) if (entry.isDirectory()) { + if (entry.name === "shared" || entry.name === "templates") { + continue; + } files.push(...getAllMarkdownFiles(fullPath, baseDir)); } else if ( entry.isFile() && diff --git a/apps/framework-docs-v2/src/styles/globals.css b/apps/framework-docs-v2/src/styles/globals.css index bcb1670268..2b86243865 100644 --- a/apps/framework-docs-v2/src/styles/globals.css +++ b/apps/framework-docs-v2/src/styles/globals.css @@ -40,9 +40,9 @@ } .dark { - --background: 0 0% 3.9%; + --background: 0 0% 0%; --foreground: 0 0% 98%; - --card: 0 0% 3.9%; + --card: 240 8.9% 3.9%; --card-foreground: 0 0% 98%; --popover: 0 0% 3.9%; --popover-foreground: 0 0% 98%; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 20d90858fb..96a5522d1e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -62,7 +62,7 @@ importers: devDependencies: '@clickhouse/client': specifier: latest - version: 1.12.1 + version: 1.13.0 '@iarna/toml': specifier: ^3.0.0 version: 3.0.0 @@ -256,6 +256,9 @@ importers: '@radix-ui/react-avatar': specifier: ^1.0.4 version: 1.1.11(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-checkbox': + specifier: ^1.3.3 + version: 1.3.3(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) '@radix-ui/react-collapsible': specifier: ^1.1.11 version: 1.1.12(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) @@ -271,6 +274,9 @@ importers: '@radix-ui/react-navigation-menu': specifier: ^1.2.13 version: 1.2.14(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-popover': + specifier: ^1.1.15 + version: 1.1.15(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) '@radix-ui/react-scroll-area': specifier: ^1.2.2 version: 1.2.10(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) @@ -1280,8 +1286,8 @@ packages: '@chevrotain/utils@11.0.3': resolution: {integrity: sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==} - '@clickhouse/client-common@1.12.1': - resolution: {integrity: sha512-ccw1N6hB4+MyaAHIaWBwGZ6O2GgMlO99FlMj0B0UEGfjxM9v5dYVYql6FpP19rMwrVAroYs/IgX2vyZEBvzQLg==} + '@clickhouse/client-common@1.13.0': + resolution: {integrity: sha512-QlGUMd3EaKkIRLCv0WW8Rw9cOlqhwQPT+ucNWY8eC4UALsMhJLpa0H7Cd7MYc9CEtTv/xlr3IcYw5Tdho4Hr2g==} '@clickhouse/client-common@1.5.0': resolution: {integrity: sha512-U3vDp+PDnNVEv6kia+Mq5ygnlMZzsYU+3TX+0da3XvL926jzYLMBlIvFUxe2+/5k47ySvnINRC/2QxVK7PC2/A==} @@ -1292,8 +1298,8 @@ packages: '@clickhouse/client-web@1.5.0': resolution: {integrity: sha512-21+c2UJ4cx9SPiIWQThCLULb8h/zng0pNrtTwbbnaoCqMbasyRCyRTHs3wRr7fqRUcZ3p9krIPuN0gnJw3GJ6Q==} - '@clickhouse/client@1.12.1': - resolution: {integrity: sha512-7ORY85rphRazqHzImNXMrh4vsaPrpetFoTWpZYueCO2bbO6PXYDXp/GQ4DgxnGIqbWB/Di1Ai+Xuwq2o7DJ36A==} + '@clickhouse/client@1.13.0': + resolution: {integrity: sha512-uK+zqPaJnAoq3QIOvUNbHtbWUhyg2A/aSbdJtrY2+kawp4SMBLcfIbB9ucRv5Yht1CAa3b24CiUlypkmgarukg==} engines: {node: '>=16'} '@clickhouse/client@1.8.1': @@ -2493,6 +2499,19 @@ packages: '@types/react-dom': optional: true + '@radix-ui/react-checkbox@1.3.3': + resolution: {integrity: sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + '@radix-ui/react-collapsible@1.1.12': resolution: {integrity: sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==} peerDependencies: @@ -2682,6 +2701,19 @@ packages: '@types/react-dom': optional: true + '@radix-ui/react-popover@1.1.15': + resolution: {integrity: sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==} + peerDependencies: + '@types/react': '*' + '@types/react-dom': '*' + react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc + peerDependenciesMeta: + '@types/react': + optional: true + '@types/react-dom': + optional: true + '@radix-ui/react-popper@1.2.8': resolution: {integrity: sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==} peerDependencies: @@ -10104,7 +10136,7 @@ snapshots: '@chevrotain/utils@11.0.3': {} - '@clickhouse/client-common@1.12.1': {} + '@clickhouse/client-common@1.13.0': {} '@clickhouse/client-common@1.5.0': {} @@ -10114,9 +10146,9 @@ snapshots: dependencies: '@clickhouse/client-common': 1.5.0 - '@clickhouse/client@1.12.1': + '@clickhouse/client@1.13.0': dependencies: - '@clickhouse/client-common': 1.12.1 + '@clickhouse/client-common': 1.13.0 '@clickhouse/client@1.8.1': dependencies: @@ -11309,6 +11341,22 @@ snapshots: '@types/react': 19.2.2 '@types/react-dom': 19.2.2(@types/react@19.2.2) + '@radix-ui/react-checkbox@1.3.3(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0)': + dependencies: + '@radix-ui/primitive': 1.1.3 + '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-context': 1.1.2(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-presence': 1.1.5(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-use-previous': 1.1.1(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-use-size': 1.1.1(@types/react@19.2.2)(react@19.2.0) + react: 19.2.0 + react-dom: 19.2.0(react@19.2.0) + optionalDependencies: + '@types/react': 19.2.2 + '@types/react-dom': 19.2.2(@types/react@19.2.2) + '@radix-ui/react-collapsible@1.1.12(@types/react-dom@18.3.7(@types/react@18.3.26))(@types/react@18.3.26)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@radix-ui/primitive': 1.1.3 @@ -11704,6 +11752,29 @@ snapshots: '@types/react': 19.2.2 '@types/react-dom': 19.2.2(@types/react@19.2.2) + '@radix-ui/react-popover@1.1.15(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0)': + dependencies: + '@radix-ui/primitive': 1.1.3 + '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-context': 1.1.2(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-dismissable-layer': 1.1.11(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-focus-scope': 1.1.7(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-id': 1.1.1(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-popper': 1.2.8(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-portal': 1.1.9(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-presence': 1.1.5(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-primitive': 2.1.3(@types/react-dom@19.2.2(@types/react@19.2.2))(@types/react@19.2.2)(react-dom@19.2.0(react@19.2.0))(react@19.2.0) + '@radix-ui/react-slot': 1.2.3(@types/react@19.2.2)(react@19.2.0) + '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.2.2)(react@19.2.0) + aria-hidden: 1.2.6 + react: 19.2.0 + react-dom: 19.2.0(react@19.2.0) + react-remove-scroll: 2.7.1(@types/react@19.2.2)(react@19.2.0) + optionalDependencies: + '@types/react': 19.2.2 + '@types/react-dom': 19.2.2(@types/react@19.2.2) + '@radix-ui/react-popper@1.2.8(@types/react-dom@18.3.7(@types/react@18.3.26))(@types/react@18.3.26)(react-dom@18.3.1(react@18.3.1))(react@18.3.1)': dependencies: '@floating-ui/react-dom': 2.1.6(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -15267,7 +15338,7 @@ snapshots: eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1) - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1) eslint-plugin-react: 7.37.4(eslint@8.57.1) eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.57.1) @@ -15327,7 +15398,7 @@ snapshots: tinyglobby: 0.2.15 unrs-resolver: 1.11.1 optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) transitivePeerDependencies: - supports-color @@ -15368,7 +15439,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-plugin-import@2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): + eslint-plugin-import@2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.9 From 7a3aac7942fa2bddcfb3d9aa56e97bee3787d03a Mon Sep 17 00:00:00 2001 From: Tim Delisle Date: Fri, 14 Nov 2025 21:47:46 -0800 Subject: [PATCH 02/59] attempt build fix --- apps/framework-docs-v2/.npmrc | 20 -------------------- apps/framework-docs-v2/tsconfig.json | 7 +++++-- apps/framework-docs/tsconfig.json | 7 +++++-- packages/ts-config/nextjs.json | 13 ++++--------- 4 files changed, 14 insertions(+), 33 deletions(-) delete mode 100644 apps/framework-docs-v2/.npmrc diff --git a/apps/framework-docs-v2/.npmrc b/apps/framework-docs-v2/.npmrc deleted file mode 100644 index be313dafb4..0000000000 --- a/apps/framework-docs-v2/.npmrc +++ /dev/null @@ -1,20 +0,0 @@ -# Force all dependencies to be hoisted locally to this app's node_modules -# This prevents TypeScript from finding React types in nested node_modules -# This works with the root .npmrc which prevents React from being hoisted to root -# Since this app only uses React 19, we can safely hoist everything locally here -# -# IMPORTANT: We use hoist-pattern instead of shamefully-hoist=true to avoid -# conflicts with the root hoisting pattern when running pnpm add from this directory -# (e.g., via shadcn CLI). This hoists everything locally without modifying root structure. -hoist-pattern[]=* - -# Match root public-hoist-pattern to prevent ERR_PNPM_PUBLIC_HOIST_PATTERN_DIFF -# This ensures compatibility when running pnpm add from this directory -public-hoist-pattern[]=!react -public-hoist-pattern[]=!react-dom -public-hoist-pattern[]=!react/jsx-runtime -public-hoist-pattern[]=!react-dom/server -public-hoist-pattern[]=!react/jsx-dev-runtime -public-hoist-pattern[]=!@types/react -public-hoist-pattern[]=!@types/react-dom - diff --git a/apps/framework-docs-v2/tsconfig.json b/apps/framework-docs-v2/tsconfig.json index 7a7e4d75f0..29c0f8e1cc 100644 --- a/apps/framework-docs-v2/tsconfig.json +++ b/apps/framework-docs-v2/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "@repo/ts-config/base.json", + "extends": "@repo/ts-config/nextjs.json", "compilerOptions": { "plugins": [ { @@ -21,7 +21,10 @@ "resolveJsonModule": true, "isolatedModules": true, "jsx": "react-jsx", - "incremental": true + "incremental": true, + "declaration": false, + "declarationMap": false, + "emitDeclarationOnly": false }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules"] diff --git a/apps/framework-docs/tsconfig.json b/apps/framework-docs/tsconfig.json index 9b213fb0d0..aa438ff45e 100644 --- a/apps/framework-docs/tsconfig.json +++ b/apps/framework-docs/tsconfig.json @@ -1,5 +1,5 @@ { - "extends": "@repo/ts-config/base.json", + "extends": "@repo/ts-config/nextjs.json", "compilerOptions": { "plugins": [ { @@ -21,7 +21,10 @@ "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", - "incremental": true + "incremental": true, + "declaration": false, + "declarationMap": false, + "emitDeclarationOnly": false }, "include": [ "next-env.d.ts", diff --git a/packages/ts-config/nextjs.json b/packages/ts-config/nextjs.json index 6a8050d396..f83274c7ec 100644 --- a/packages/ts-config/nextjs.json +++ b/packages/ts-config/nextjs.json @@ -1,15 +1,10 @@ { "$schema": "https://json.schemastore.org/tsconfig", - "display": "Next.js", + "display": "Next.js App", "extends": "./base.json", "compilerOptions": { - "plugins": [{ "name": "next" }], - "moduleResolution": "NodeNext", - "allowJs": true, - "jsx": "preserve", - "noEmit": true, - "paths": { - "@ui/*": ["../../packages/design-system/*"] - } + "declaration": false, + "declarationMap": false, + "emitDeclarationOnly": false } } From 5a3dceca39517da5587f238f621e8a507addaf78 Mon Sep 17 00:00:00 2001 From: Tim Delisle Date: Sat, 15 Nov 2025 16:05:45 -0800 Subject: [PATCH 03/59] updated template cards --- .../src/components/mdx/template-card.tsx | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/apps/framework-docs-v2/src/components/mdx/template-card.tsx b/apps/framework-docs-v2/src/components/mdx/template-card.tsx index 9e78e47774..81225fb132 100644 --- a/apps/framework-docs-v2/src/components/mdx/template-card.tsx +++ b/apps/framework-docs-v2/src/components/mdx/template-card.tsx @@ -11,9 +11,8 @@ import { CardFooter, CardHeader, } from "@/components/ui/card"; -import { IconBrandGithub, IconRocket } from "@tabler/icons-react"; +import { IconBrandGithub, IconRocket, IconBook } from "@tabler/icons-react"; import { Button } from "@/components/ui/button"; -import { Separator } from "@/components/ui/separator"; import { Snippet, SnippetCopyButton, @@ -53,6 +52,7 @@ export function TemplateCard({ item, className }: TemplateCardProps) { const isTemplate = item.type === "template"; const template = isTemplate ? (item as TemplateMetadata) : null; const app = !isTemplate ? (item as AppMetadata) : null; + const [chipsExpanded, setChipsExpanded] = React.useState(false); const categoryLabels = { starter: "Starter", @@ -74,6 +74,17 @@ export function TemplateCard({ item, className }: TemplateCardProps) { const description = isTemplate ? template!.description : app!.description; const name = isTemplate ? template!.name : app!.name; + // Combine frameworks and features into a single array with type info + const allChips = [ + ...frameworks.map((f) => ({ value: f, type: "framework" as const })), + ...features.map((f) => ({ value: f, type: "feature" as const })), + ]; + + const MAX_VISIBLE_CHIPS = 3; + const visibleChips = + chipsExpanded ? allChips : allChips.slice(0, MAX_VISIBLE_CHIPS); + const hiddenCount = allChips.length - MAX_VISIBLE_CHIPS; + return (
-
- {language && ( - - {language === "typescript" ? "TypeScript" : "Python"} - - )} - {isTemplate && template && ( - - {categoryLabels[template.category]} - - )} - {!isTemplate && ( - - Demo App - - )} +
+ {(() => { + const labels: string[] = []; + if (language) { + labels.push( + language === "typescript" ? "TypeScript" : "Python", + ); + } + if (isTemplate && template) { + labels.push(categoryLabels[template.category]); + } + if (!isTemplate) { + labels.push("Demo App"); + } + return ( + + {labels.join(" • ")} + + ); + })()}

{isTemplate ? formatTemplateName(name) : name}

+ {allChips.length > 0 && ( +
+ {visibleChips.map((chip) => ( + + {chip.value} + + ))} + {!chipsExpanded && hiddenCount > 0 && ( + setChipsExpanded(true)} + > + {hiddenCount} more + + )} + {chipsExpanded && ( + setChipsExpanded(false)} + > + Show less + + )} +
+ )}
@@ -116,66 +164,6 @@ export function TemplateCard({ item, className }: TemplateCardProps) { )} - {isTemplate && template && ( - <> - {(frameworks.length > 0 || features.length > 0) && ( - <> - -
- {frameworks.map((framework) => ( - - {framework} - - ))} - {features.map((feature) => ( - - {feature} - - ))} -
- - )} - - )} - {!isTemplate && app && ( - <> - {app.blogPost && ( - - Read Blog Post → - - )} - {app.blogPost && (frameworks.length > 0 || features.length > 0) && ( - - )} - {(frameworks.length > 0 || features.length > 0) && ( -
- {frameworks.map((framework) => ( - - {framework} - - ))} - {features.map((feature) => ( - - {feature} - - ))} -
- )} - - )}
+ {!isTemplate && app && app.blogPost && ( + + )} + +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx new file mode 100644 index 0000000000..c21d443733 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps-nav.tsx @@ -0,0 +1,190 @@ +"use client"; + +import * as React from "react"; +import { usePathname, useSearchParams } from "next/navigation"; +import Link from "next/link"; +import { IconChevronLeft, IconChevronRight } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { useLanguage } from "@/hooks/use-language"; + +interface Step { + slug: string; + stepNumber: number; + title: string; +} + +interface GuideStepsNavProps { + steps: Step[]; + currentSlug: string; + children?: React.ReactNode; +} + +export function GuideStepsNav({ + steps, + currentSlug, + children, +}: GuideStepsNavProps) { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const { language } = useLanguage(); + const [currentStepIndex, setCurrentStepIndex] = React.useState(0); + + // Determine current step from URL hash or default to first step + React.useEffect(() => { + const hash = window.location.hash; + if (hash) { + const stepMatch = hash.match(/step-(\d+)/); + if (stepMatch) { + const stepNum = parseInt(stepMatch[1]!, 10); + const index = steps.findIndex((s) => s.stepNumber === stepNum); + if (index >= 0) { + setCurrentStepIndex(index); + } + } + } + }, [steps]); + + // Update URL hash and show/hide steps when step changes + React.useEffect(() => { + if (steps.length > 0 && currentStepIndex < steps.length) { + const currentStep = steps[currentStepIndex]; + if (currentStep) { + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + // Update URL hash + window.history.replaceState( + null, + "", + `${pathname}${searchParams.toString() ? `?${searchParams.toString()}` : ""}#step-${currentStep.stepNumber}`, + ); + + // Show/hide step content + const stepContents = document.querySelectorAll(".step-content"); + stepContents.forEach((content, index) => { + if (index === currentStepIndex) { + content.classList.remove("hidden"); + content.classList.add("block"); + } else { + content.classList.add("hidden"); + content.classList.remove("block"); + } + }); + + // Update card header with current step info + const cardTitle = document.querySelector(".step-card-title"); + const cardBadge = document.querySelector(".step-card-badge"); + const buttonsContainer = document.getElementById( + "step-nav-buttons-container", + ); + if (cardTitle) cardTitle.textContent = currentStep.title; + if (cardBadge) + cardBadge.textContent = currentStep.stepNumber.toString(); + + // Update navigation buttons + if (buttonsContainer) { + buttonsContainer.innerHTML = ` + + + `; + } + } + } + }, [currentStepIndex, steps, pathname, searchParams]); + + if (steps.length === 0) return null; + + const currentStep = steps[currentStepIndex]; + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + const goToStep = (index: number) => { + if (index >= 0 && index < steps.length) { + setCurrentStepIndex(index); + // Scroll to top of steps section + const element = document.getElementById("guide-steps"); + if (element) { + element.scrollIntoView({ behavior: "smooth", block: "start" }); + } + } + }; + + // Expose goToStep to window for button onclick handlers + React.useEffect(() => { + (window as any).__goToStep = goToStep; + return () => { + delete (window as any).__goToStep; + }; + }, [goToStep]); + + const buildUrl = (stepSlug: string) => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${stepSlug}?${params.toString()}`; + }; + + return ( + <> +
+

Implementation Steps

+
+ {steps.map((step, index) => ( + + ))} +
+
+ + {children} + + {/* Step list for navigation */} +
+

All Steps

+
+ {steps.map((step, index) => ( + { + e.preventDefault(); + goToStep(index); + }} + className={`flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors ${ + index === currentStepIndex ? + "bg-accent text-accent-foreground" + : "hover:bg-accent/50" + }`} + > + + {step.stepNumber} + + {step.title} + + ))} +
+
+ + ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx new file mode 100644 index 0000000000..f23846b21b --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps-wrapper.tsx @@ -0,0 +1,66 @@ +import { GuideStepsNav } from "./guide-steps-nav"; +import { StepContent } from "./step-content"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; + +interface GuideStepsWrapperProps { + steps: Array<{ + slug: string; + stepNumber: number; + title: string; + }>; + stepsWithContent: Array<{ + slug: string; + stepNumber: number; + title: string; + content: string | null; + isMDX: boolean; + }>; + currentSlug: string; +} + +export async function GuideStepsWrapper({ + steps, + stepsWithContent, + currentSlug, +}: GuideStepsWrapperProps) { + // Render all step content on the server + const renderedSteps = await Promise.all( + stepsWithContent.map(async (step, index) => { + if (!step.content) return null; + return ( +
+ +
+ ); + }), + ); + + return ( +
+ + + +
+
+ + {steps[0]?.stepNumber || 1} + + + {steps[0]?.title || "Step 1"} + +
+
+
+
+ +
{renderedSteps}
+
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/guide-steps.tsx b/apps/framework-docs-v2/src/components/guides/guide-steps.tsx new file mode 100644 index 0000000000..ab5f980354 --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/guide-steps.tsx @@ -0,0 +1,183 @@ +"use client"; + +import * as React from "react"; +import { usePathname, useSearchParams } from "next/navigation"; +import Link from "next/link"; +import { IconChevronLeft, IconChevronRight } from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { Badge } from "@/components/ui/badge"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { useLanguage } from "@/hooks/use-language"; + +interface Step { + slug: string; + stepNumber: number; + title: string; +} + +interface GuideStepsProps { + steps: Step[]; + renderedSteps: React.ReactElement[]; + currentSlug: string; +} + +export function GuideSteps({ + steps, + renderedSteps, + currentSlug, +}: GuideStepsProps) { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const { language } = useLanguage(); + const [currentStepIndex, setCurrentStepIndex] = React.useState(0); + + // Determine current step from URL hash or default to first step + React.useEffect(() => { + const hash = window.location.hash; + if (hash) { + const stepMatch = hash.match(/step-(\d+)/); + if (stepMatch) { + const stepNum = parseInt(stepMatch[1]!, 10); + const index = steps.findIndex((s) => s.stepNumber === stepNum); + if (index >= 0) { + setCurrentStepIndex(index); + } + } + } + }, [steps]); + + // Update URL hash when step changes + React.useEffect(() => { + if (steps.length > 0 && currentStepIndex < steps.length) { + const currentStep = steps[currentStepIndex]; + if (currentStep) { + window.history.replaceState( + null, + "", + `${pathname}${searchParams.toString() ? `?${searchParams.toString()}` : ""}#step-${currentStep.stepNumber}`, + ); + } + } + }, [currentStepIndex, steps, pathname, searchParams]); + + if (steps.length === 0) return null; + + const currentStep = steps[currentStepIndex]; + const currentRenderedStep = renderedSteps[currentStepIndex]; + const hasPrevious = currentStepIndex > 0; + const hasNext = currentStepIndex < steps.length - 1; + + const goToStep = (index: number) => { + if (index >= 0 && index < steps.length) { + setCurrentStepIndex(index); + // Scroll to top of steps section + const element = document.getElementById("guide-steps"); + if (element) { + element.scrollIntoView({ behavior: "smooth", block: "start" }); + } + } + }; + + const buildUrl = (stepSlug: string) => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${stepSlug}?${params.toString()}`; + }; + + return ( +
+
+

Implementation Steps

+
+ {steps.map((step, index) => ( + + ))} +
+
+ + + +
+
+ {currentStep.stepNumber} + {currentStep.title} +
+
+ + +
+
+
+ +
+ {renderedSteps.map((stepContent, index) => ( +
+ {stepContent || ( +
+ Step content not available +
+ )} +
+ ))} +
+
+
+ + {/* Step list for navigation */} +
+

All Steps

+
+ {steps.map((step, index) => ( + { + e.preventDefault(); + goToStep(index); + }} + className={`flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors ${ + index === currentStepIndex ? + "bg-accent text-accent-foreground" + : "hover:bg-accent/50" + }`} + > + + {step.stepNumber} + + {step.title} + + ))} +
+
+
+ ); +} diff --git a/apps/framework-docs-v2/src/components/guides/step-content.tsx b/apps/framework-docs-v2/src/components/guides/step-content.tsx new file mode 100644 index 0000000000..b64c151cfc --- /dev/null +++ b/apps/framework-docs-v2/src/components/guides/step-content.tsx @@ -0,0 +1,22 @@ +import { MDXRenderer } from "@/components/mdx-renderer"; + +interface StepContentProps { + content: string; + isMDX: boolean; +} + +export async function StepContent({ content, isMDX }: StepContentProps) { + if (!content) { + return ( +
Step content not available
+ ); + } + + return ( +
+ {isMDX ? + + :
} +
+ ); +} diff --git a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx index fb1380ed5f..e3b5ef66bf 100644 --- a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx @@ -9,9 +9,11 @@ import { Sidebar, SidebarContent, SidebarGroup, + SidebarGroupContent, SidebarGroupLabel, SidebarMenu, SidebarMenuAction, + SidebarMenuBadge, SidebarMenuButton, SidebarMenuItem, SidebarMenuSub, @@ -174,10 +176,21 @@ function NavItemComponent({ item }: { item: NavPage }) { }, [hasChildren, item.children, pathname]); const defaultOpen = isActive || hasActiveDescendant; + const [isOpen, setIsOpen] = React.useState(defaultOpen); + + // Update open state when active state changes + React.useEffect(() => { + setIsOpen(isActive || hasActiveDescendant); + }, [isActive, hasActiveDescendant]); if (hasChildren) { return ( - + @@ -224,6 +237,91 @@ function NavItemComponent({ item }: { item: NavPage }) { ); } +function NestedNavItemComponent({ + item, + pathname, + searchParams, + language, +}: { + item: NavPage; + pathname: string; + searchParams: URLSearchParams; + language: string; +}) { + const childHasChildren = item.children && item.children.length > 0; + const childHref = (() => { + const params = new URLSearchParams(searchParams.toString()); + params.set("lang", language); + return `/${item.slug}?${params.toString()}`; + })(); + const childIsActive = pathname === `/${item.slug}`; + + // Recursively check if any descendant is active + const checkDescendant = (children: NavItem[]): boolean => { + return children.some((c) => { + if (c.type === "page") { + if (pathname === `/${c.slug}`) return true; + if (c.children) return checkDescendant(c.children); + } + return false; + }); + }; + const hasActiveDescendant = + childHasChildren ? checkDescendant(item.children!) : false; + const defaultOpen = childIsActive || hasActiveDescendant; + const [isOpen, setIsOpen] = React.useState(defaultOpen); + + React.useEffect(() => { + setIsOpen(childIsActive || hasActiveDescendant); + }, [childIsActive, hasActiveDescendant]); + + if (childHasChildren) { + return ( + + + + + {item.icon && } + {item.title} + + + + + + Toggle + + + + + {renderNavChildren( + item.children!, + pathname, + searchParams, + language, + )} + + + + + ); + } + + return ( + + + + {item.icon && } + {item.title} + + + + ); +} + function renderNavChildren( children: NavItem[], pathname: string, @@ -231,169 +329,19 @@ function renderNavChildren( language: string, ): React.ReactNode[] { const elements: React.ReactNode[] = []; - let currentGroup: NavPage[] = []; - let currentLabel: string | null = null; - - const flushGroup = () => { - if (currentGroup.length > 0) { - currentGroup.forEach((child: NavPage) => { - const childHasChildren = child.children && child.children.length > 0; - const childHref = (() => { - const params = new URLSearchParams(searchParams.toString()); - params.set("lang", language); - return `/${child.slug}?${params.toString()}`; - })(); - const childIsActive = pathname === `/${child.slug}`; - - // Recursively check if any descendant is active - const checkDescendant = (children: NavItem[]): boolean => { - return children.some((c) => { - if (c.type === "page") { - if (pathname === `/${c.slug}`) return true; - if (c.children) return checkDescendant(c.children); - } - return false; - }); - }; - const hasActiveDescendant = - childHasChildren ? checkDescendant(child.children!) : false; - const defaultOpen = childIsActive || hasActiveDescendant; - - if (childHasChildren) { - // Render nested collapsible item - using same pattern as top-level - // SidebarMenuSubItem needs relative positioning for SidebarMenuAction - elements.push( - - - - - {child.icon && } - {child.title} - - - - - - Toggle - - - - - {renderNavChildren( - child.children!, - pathname, - searchParams, - language, - )} - - - - , - ); - } else { - // Render simple link for leaf nodes - elements.push( - - - - {child.icon && } - {child.title} - - - , - ); - } - }); - currentGroup = []; - } - }; children.forEach((child) => { - if (child.type === "separator") { - flushGroup(); - currentLabel = null; - } else if (child.type === "label") { - flushGroup(); - currentLabel = child.title; - } else if (child.type === "section") { - flushGroup(); - // Check if any item in the section is active to determine default open state - const hasActiveItem = child.items.some((item) => { - if (item.type === "page") { - return pathname === `/${item.slug}`; - } - return false; - }); - - // Render collapsible section within the submenu - // We need to render the trigger and items as siblings, not nested - const sectionItems: React.ReactNode[] = []; - child.items.forEach((item) => { - if (item.type === "page") { - const itemHref = (() => { - const params = new URLSearchParams(searchParams.toString()); - params.set("lang", language); - return `/${item.slug}?${params.toString()}`; - })(); - const itemIsActive = pathname === `/${item.slug}`; - sectionItems.push( - - - - {item.icon && } - {item.title} - - - , - ); - } - }); - - elements.push( - - - - - - {child.icon && ( - - )} - - {child.title} - - - - - - - - {sectionItems} - - , - ); - } else if (child.type === "page") { - if (currentLabel && currentGroup.length === 0) { - // Add label before first item in group - elements.push( - - {currentLabel} - , - ); - } - currentGroup.push(child); - } + if (child.type !== "page") return; + elements.push( + , + ); }); - flushGroup(); + return elements; } diff --git a/apps/framework-docs-v2/src/config/navigation.ts b/apps/framework-docs-v2/src/config/navigation.ts index b387df8438..e1348ca9e9 100644 --- a/apps/framework-docs-v2/src/config/navigation.ts +++ b/apps/framework-docs-v2/src/config/navigation.ts @@ -1,3 +1,4 @@ +import * as React from "react"; import type { Language } from "@/lib/content-types"; import { IconChartArea, @@ -877,259 +878,85 @@ const guidesNavigationConfig: NavigationConfig = [ items: [ { type: "page", - slug: "guides/applications/performant-dashboards/overview", + slug: "guides/applications/performant-dashboards/guide-overview", title: "Performant Dashboards", icon: IconChartLine, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/applications/performant-dashboards/guide-overview", - title: "Overview", + slug: "guides/applications/performant-dashboards/existing-oltp-db", + title: "From Existing OLTP DB", languages: ["typescript", "python"], }, { - type: "section", - title: "Existing OLTP DB", - items: [ - { - type: "page", - slug: "guides/applications/performant-dashboards/existing-oltp-db/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/performant-dashboards/existing-oltp-db/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection", - title: "Setup Connection", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/performant-dashboards/existing-oltp-db/2-create-materialized-view", - title: "Create Materialized View", - languages: ["typescript", "python"], - }, - ], - }, - { - type: "section", + type: "page", + slug: "guides/applications/performant-dashboards/new-application", title: "New Application", - items: [ - { - type: "page", - slug: "guides/applications/performant-dashboards/new-application/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/performant-dashboards/new-application/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/performant-dashboards/new-application/1-initialize-project", - title: "Initialize Project", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/applications/in-app-chat-analytics/overview", + slug: "guides/applications/in-app-chat-analytics/guide-overview", title: "In-App Chat Analytics", icon: IconMessageChatbot, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/applications/in-app-chat-analytics/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/applications/in-app-chat-analytics/existing-chat-system", title: "Existing Chat System", - items: [ - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/existing-chat-system/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/existing-chat-system/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/existing-chat-system/1-integrate-event-tracking", - title: "Integrate Event Tracking", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/applications/in-app-chat-analytics/new-chat-feature", title: "New Chat Feature", - items: [ - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/new-chat-feature/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/new-chat-feature/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/in-app-chat-analytics/new-chat-feature/1-setup-chat-schema", - title: "Setup Chat Schema", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/applications/automated-reports/overview", + slug: "guides/applications/automated-reports/guide-overview", title: "Automated Reports", icon: IconFileReport, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/applications/automated-reports/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/applications/automated-reports/scheduled-reports", title: "Scheduled Reports", - items: [ - { - type: "page", - slug: "guides/applications/automated-reports/scheduled-reports/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/automated-reports/scheduled-reports/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/automated-reports/scheduled-reports/1-create-report-template", - title: "Create Report Template", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/applications/automated-reports/event-driven-reports", title: "Event-Driven Reports", - items: [ - { - type: "page", - slug: "guides/applications/automated-reports/event-driven-reports/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/automated-reports/event-driven-reports/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/automated-reports/event-driven-reports/1-setup-event-triggers", - title: "Setup Event Triggers", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/applications/going-to-production/overview", + slug: "guides/applications/going-to-production/guide-overview", title: "Going to Production", icon: IconCloudUpload, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/applications/going-to-production/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/applications/going-to-production/local-development", title: "Local Development", - items: [ - { - type: "page", - slug: "guides/applications/going-to-production/local-development/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/going-to-production/local-development/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/going-to-production/local-development/1-prepare-environment", - title: "Prepare Environment", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/applications/going-to-production/staging-environment", title: "Staging Environment", - items: [ - { - type: "page", - slug: "guides/applications/going-to-production/staging-environment/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/going-to-production/staging-environment/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/applications/going-to-production/staging-environment/1-deploy-infrastructure", - title: "Deploy Infrastructure", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, @@ -1141,214 +968,70 @@ const guidesNavigationConfig: NavigationConfig = [ items: [ { type: "page", - slug: "guides/data-management/migrations/overview", + slug: "guides/data-management/migrations/guide-overview", title: "Migrations", icon: IconDatabaseImport, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-management/migrations/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/data-management/migrations/schema-changes", title: "Schema Changes", - items: [ - { - type: "page", - slug: "guides/data-management/migrations/schema-changes/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/schema-changes/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/schema-changes/1-create-migration-script", - title: "Create Migration Script", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/data-management/migrations/data-migration", title: "Data Migration", - items: [ - { - type: "page", - slug: "guides/data-management/migrations/data-migration/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/data-migration/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/data-migration/1-backup-existing-data", - title: "Backup Existing Data", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/data-management/migrations/version-upgrades", title: "Version Upgrades", - items: [ - { - type: "page", - slug: "guides/data-management/migrations/version-upgrades/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/version-upgrades/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/migrations/version-upgrades/1-review-changelog", - title: "Review Changelog", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/data-management/impact-analysis/overview", + slug: "guides/data-management/impact-analysis/guide-overview", title: "Impact Analysis", icon: IconChartDots, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-management/impact-analysis/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/data-management/impact-analysis/schema-changes", title: "Schema Changes", - items: [ - { - type: "page", - slug: "guides/data-management/impact-analysis/schema-changes/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/impact-analysis/schema-changes/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/impact-analysis/schema-changes/1-identify-dependencies", - title: "Identify Dependencies", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/data-management/impact-analysis/query-changes", title: "Query Changes", - items: [ - { - type: "page", - slug: "guides/data-management/impact-analysis/query-changes/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/impact-analysis/query-changes/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/impact-analysis/query-changes/1-analyze-query-performance", - title: "Analyze Query Performance", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/data-management/change-data-capture/overview", + slug: "guides/data-management/change-data-capture/guide-overview", title: "Change Data Capture", icon: IconBolt, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-management/change-data-capture/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/data-management/change-data-capture/database-cdc", title: "Database CDC", - items: [ - { - type: "page", - slug: "guides/data-management/change-data-capture/database-cdc/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/change-data-capture/database-cdc/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/change-data-capture/database-cdc/1-enable-cdc-logging", - title: "Enable CDC Logging", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/data-management/change-data-capture/application-events", title: "Application Events", - items: [ - { - type: "page", - slug: "guides/data-management/change-data-capture/application-events/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/change-data-capture/application-events/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-management/change-data-capture/application-events/1-implement-event-emitter", - title: "Implement Event Emitter", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, @@ -1360,280 +1043,112 @@ const guidesNavigationConfig: NavigationConfig = [ items: [ { type: "page", - slug: "guides/data-warehousing/customer-data-platform/overview", + slug: "guides/data-warehousing/customer-data-platform/guide-overview", title: "Customer Data Platform", icon: IconUsers, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-warehousing/customer-data-platform/guide-overview", - title: "Overview", - languages: ["typescript", "python"], - }, - { - type: "section", + slug: "guides/data-warehousing/customer-data-platform/existing-customer-data", title: "Existing Customer Data", - items: [ - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/existing-customer-data/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/existing-customer-data/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/existing-customer-data/1-consolidate-data-sources", - title: "Consolidate Data Sources", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, { - type: "section", + type: "page", + slug: "guides/data-warehousing/customer-data-platform/multi-source-integration", title: "Multi-Source Integration", - items: [ - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/multi-source-integration/overview", - title: "Implementation Overview", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/multi-source-integration/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/customer-data-platform/multi-source-integration/1-setup-connectors", - title: "Setup Connectors", - languages: ["typescript", "python"], - }, - ], + languages: ["typescript", "python"], }, ], }, { type: "page", - slug: "guides/data-warehousing/operational-analytics/overview", + slug: "guides/data-warehousing/operational-analytics/guide-overview", title: "Operational Analytics", icon: IconChartBarOff, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-warehousing/operational-analytics/application-metrics/overview", + slug: "guides/data-warehousing/operational-analytics/application-metrics", title: "Application Metrics", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/operational-analytics/application-metrics/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/operational-analytics/application-metrics/1-instrument-application", - title: "Instrument Application", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/data-warehousing/operational-analytics/infrastructure-monitoring/overview", + slug: "guides/data-warehousing/operational-analytics/infrastructure-monitoring", title: "Infrastructure Monitoring", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/operational-analytics/infrastructure-monitoring/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/operational-analytics/infrastructure-monitoring/1-collect-system-metrics", - title: "Collect System Metrics", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/data-warehousing/startup-metrics/overview", + slug: "guides/data-warehousing/startup-metrics/guide-overview", title: "Startup Metrics", icon: IconChartBar, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-warehousing/startup-metrics/product-metrics/overview", + slug: "guides/data-warehousing/startup-metrics/product-metrics", title: "Product Metrics", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/startup-metrics/product-metrics/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/startup-metrics/product-metrics/1-define-kpis", - title: "Define KPIs", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/data-warehousing/startup-metrics/business-metrics/overview", + slug: "guides/data-warehousing/startup-metrics/business-metrics", title: "Business Metrics", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/startup-metrics/business-metrics/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/startup-metrics/business-metrics/1-setup-revenue-tracking", - title: "Setup Revenue Tracking", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/data-warehousing/connectors/overview", + slug: "guides/data-warehousing/connectors/guide-overview", title: "Connectors", icon: IconStack, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-warehousing/connectors/database-connector/overview", + slug: "guides/data-warehousing/connectors/database-connector", title: "Database Connector", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/connectors/database-connector/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/connectors/database-connector/1-configure-connection", - title: "Configure Connection", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/data-warehousing/connectors/api-connector/overview", + slug: "guides/data-warehousing/connectors/api-connector", title: "API Connector", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/connectors/api-connector/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/connectors/api-connector/1-setup-authentication", - title: "Setup Authentication", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/data-warehousing/connectors/custom-connector/overview", + slug: "guides/data-warehousing/connectors/custom-connector", title: "Custom Connector", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/connectors/custom-connector/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/connectors/custom-connector/1-create-connector-class", - title: "Create Connector Class", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/data-warehousing/pipelines/overview", + slug: "guides/data-warehousing/pipelines/guide-overview", title: "Pipelines", icon: IconRoute, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/data-warehousing/pipelines/etl-pipeline/overview", + slug: "guides/data-warehousing/pipelines/etl-pipeline", title: "ETL Pipeline", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/pipelines/etl-pipeline/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/pipelines/etl-pipeline/1-extract-data", - title: "Extract Data", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/data-warehousing/pipelines/streaming-pipeline/overview", + slug: "guides/data-warehousing/pipelines/streaming-pipeline", title: "Streaming Pipeline", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/data-warehousing/pipelines/streaming-pipeline/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/data-warehousing/pipelines/streaming-pipeline/1-setup-stream-source", - title: "Setup Stream Source", - languages: ["typescript", "python"], - }, - ], }, ], }, @@ -1645,99 +1160,43 @@ const guidesNavigationConfig: NavigationConfig = [ items: [ { type: "page", - slug: "guides/methodology/data-as-code/overview", + slug: "guides/methodology/data-as-code/guide-overview", title: "Data as Code", icon: IconCode, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/methodology/data-as-code/version-control-setup/overview", + slug: "guides/methodology/data-as-code/version-control-setup", title: "Version Control Setup", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/methodology/data-as-code/version-control-setup/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/methodology/data-as-code/version-control-setup/1-initialize-repository", - title: "Initialize Repository", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/methodology/data-as-code/cicd-integration/overview", + slug: "guides/methodology/data-as-code/cicd-integration", title: "CI/CD Integration", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/methodology/data-as-code/cicd-integration/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/methodology/data-as-code/cicd-integration/1-create-pipeline-config", - title: "Create Pipeline Config", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/methodology/dora-for-data/overview", + slug: "guides/methodology/dora-for-data/guide-overview", title: "DORA for Data", icon: IconTrendingUp, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/methodology/dora-for-data/deployment-frequency/overview", + slug: "guides/methodology/dora-for-data/deployment-frequency", title: "Deployment Frequency", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/methodology/dora-for-data/deployment-frequency/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/methodology/dora-for-data/deployment-frequency/1-measure-current-frequency", - title: "Measure Current Frequency", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/methodology/dora-for-data/lead-time/overview", + slug: "guides/methodology/dora-for-data/lead-time", title: "Lead Time", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/methodology/dora-for-data/lead-time/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/methodology/dora-for-data/lead-time/1-track-change-lifecycle", - title: "Track Change Lifecycle", - languages: ["typescript", "python"], - }, - ], }, ], }, @@ -1749,197 +1208,85 @@ const guidesNavigationConfig: NavigationConfig = [ items: [ { type: "page", - slug: "guides/strategy/ai-enablement/overview", + slug: "guides/strategy/ai-enablement/guide-overview", title: "AI Enablement", icon: IconBrain, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/strategy/ai-enablement/llm-integration/overview", + slug: "guides/strategy/ai-enablement/llm-integration", title: "LLM Integration", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/ai-enablement/llm-integration/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/ai-enablement/llm-integration/1-choose-llm-provider", - title: "Choose LLM Provider", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/strategy/ai-enablement/vector-search/overview", + slug: "guides/strategy/ai-enablement/vector-search", title: "Vector Search", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/ai-enablement/vector-search/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/ai-enablement/vector-search/1-setup-vector-database", - title: "Setup Vector Database", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/strategy/data-foundation/overview", + slug: "guides/strategy/data-foundation/guide-overview", title: "Data Foundation", icon: IconDatabase, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/strategy/data-foundation/greenfield-project/overview", + slug: "guides/strategy/data-foundation/greenfield-project", title: "Greenfield Project", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/data-foundation/greenfield-project/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/data-foundation/greenfield-project/1-design-data-architecture", - title: "Design Data Architecture", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/strategy/data-foundation/legacy-system-migration/overview", + slug: "guides/strategy/data-foundation/legacy-system-migration", title: "Legacy System Migration", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/data-foundation/legacy-system-migration/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/data-foundation/legacy-system-migration/1-assess-current-state", - title: "Assess Current State", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/strategy/platform-engineering/overview", + slug: "guides/strategy/platform-engineering/guide-overview", title: "Platform Engineering", icon: IconServer, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/strategy/platform-engineering/internal-platform/overview", + slug: "guides/strategy/platform-engineering/internal-platform", title: "Internal Platform", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/platform-engineering/internal-platform/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/platform-engineering/internal-platform/1-define-platform-scope", - title: "Define Platform Scope", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/strategy/platform-engineering/self-service-tools/overview", + slug: "guides/strategy/platform-engineering/self-service-tools", title: "Self-Service Tools", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/platform-engineering/self-service-tools/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/platform-engineering/self-service-tools/1-create-developer-portal", - title: "Create Developer Portal", - languages: ["typescript", "python"], - }, - ], }, ], }, { type: "page", - slug: "guides/strategy/olap-evaluation/overview", + slug: "guides/strategy/olap-evaluation/guide-overview", title: "OLAP Evaluation", icon: IconDatabase, languages: ["typescript", "python"], children: [ { type: "page", - slug: "guides/strategy/olap-evaluation/performance-requirements/overview", + slug: "guides/strategy/olap-evaluation/performance-requirements", title: "Performance Requirements", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/olap-evaluation/performance-requirements/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/olap-evaluation/performance-requirements/1-benchmark-queries", - title: "Benchmark Queries", - languages: ["typescript", "python"], - }, - ], }, { type: "page", - slug: "guides/strategy/olap-evaluation/scale-requirements/overview", + slug: "guides/strategy/olap-evaluation/scale-requirements", title: "Scale Requirements", languages: ["typescript", "python"], - children: [ - { - type: "page", - slug: "guides/strategy/olap-evaluation/scale-requirements/requirements", - title: "Requirements", - languages: ["typescript", "python"], - }, - { - type: "page", - slug: "guides/strategy/olap-evaluation/scale-requirements/1-estimate-data-volume", - title: "Estimate Data Volume", - languages: ["typescript", "python"], - }, - ], }, ], }, diff --git a/apps/framework-docs-v2/src/lib/content.ts b/apps/framework-docs-v2/src/lib/content.ts index 861e1b7538..9097d01027 100644 --- a/apps/framework-docs-v2/src/lib/content.ts +++ b/apps/framework-docs-v2/src/lib/content.ts @@ -244,3 +244,52 @@ export function getAllSlugs(): string[] { const uniqueSlugs = Array.from(new Set(slugs)); return uniqueSlugs; } + +/** + * Discover step files in a directory + * Returns step files matching the pattern: {number}-{name}.mdx + * Sorted by step number + */ +export function discoverStepFiles(slug: string): Array<{ + slug: string; + stepNumber: number; + title: string; +}> { + const dirPath = path.join(CONTENT_ROOT, slug); + + if (!fs.existsSync(dirPath) || !fs.statSync(dirPath).isDirectory()) { + return []; + } + + const entries = fs.readdirSync(dirPath, { withFileTypes: true }); + const steps: Array<{ slug: string; stepNumber: number; title: string }> = []; + + for (const entry of entries) { + if (!entry.isFile()) continue; + + // Match pattern: {number}-{name}.mdx + const stepMatch = entry.name.match(/^(\d+)-(.+)\.mdx$/); + if (!stepMatch) continue; + + const stepNumber = parseInt(stepMatch[1]!, 10); + const stepName = stepMatch[2]; + if (!stepName) continue; + + const stepSlug = `${slug}/${entry.name.replace(/\.mdx$/, "")}`; + + // Read front matter to get title + const filePath = path.join(dirPath, entry.name); + const fileContents = fs.readFileSync(filePath, "utf8"); + const { data } = matter(fileContents); + const frontMatter = data as FrontMatter; + + steps.push({ + slug: stepSlug, + stepNumber, + title: (frontMatter.title as string) || stepName.replace(/-/g, " "), + }); + } + + // Sort by step number + return steps.sort((a, b) => a.stepNumber - b.stepNumber); +} From 227bd93cecf64284dfaac640dd1b378c4974ce98 Mon Sep 17 00:00:00 2001 From: Tim Delisle Date: Sun, 16 Nov 2025 17:35:51 -0800 Subject: [PATCH 07/59] fixed some styles --- apps/framework-docs-v2/package.json | 2 +- .../src/components/navigation/side-nav.tsx | 2 - .../src/components/navigation/toc-nav.tsx | 115 +++++++++++++++++- .../src/components/ui/popover.tsx | 33 +++++ pnpm-lock.yaml | 2 +- 5 files changed, 148 insertions(+), 6 deletions(-) create mode 100644 apps/framework-docs-v2/src/components/ui/popover.tsx diff --git a/apps/framework-docs-v2/package.json b/apps/framework-docs-v2/package.json index 9780a46f10..14453c6d43 100644 --- a/apps/framework-docs-v2/package.json +++ b/apps/framework-docs-v2/package.json @@ -29,7 +29,7 @@ "@radix-ui/react-navigation-menu": "^1.2.13", "@radix-ui/react-popover": "^1.1.15", "@radix-ui/react-scroll-area": "^1.2.2", - "@radix-ui/react-select": "^2.0.0", + "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.7", "@radix-ui/react-slot": "^1.2.3", "@radix-ui/react-tabs": "^1.1.12", diff --git a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx index e3b5ef66bf..7df49d05a1 100644 --- a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx @@ -9,11 +9,9 @@ import { Sidebar, SidebarContent, SidebarGroup, - SidebarGroupContent, SidebarGroupLabel, SidebarMenu, SidebarMenuAction, - SidebarMenuBadge, SidebarMenuButton, SidebarMenuItem, SidebarMenuSub, diff --git a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx index 0be4c2b7f6..f0fa5352f4 100644 --- a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx @@ -1,9 +1,29 @@ "use client"; import { useEffect, useState } from "react"; +import { usePathname } from "next/navigation"; import { cn } from "@/lib/utils"; import type { Heading } from "@/lib/content-types"; -import { IconExternalLink } from "@tabler/icons-react"; +import { + IconExternalLink, + IconPlus, + IconInfoCircle, +} from "@tabler/icons-react"; +import { Button } from "@/components/ui/button"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { Label } from "../ui/label"; interface TOCNavProps { headings: Heading[]; @@ -15,6 +35,10 @@ interface TOCNavProps { export function TOCNav({ headings, helpfulLinks }: TOCNavProps) { const [activeId, setActiveId] = useState(""); + const [scope, setScope] = useState<"initiative" | "project">("initiative"); + const pathname = usePathname(); + const isGuidePage = + pathname?.startsWith("/guides/") && pathname !== "/guides"; useEffect(() => { if (headings.length === 0) return; @@ -123,7 +147,7 @@ export function TOCNav({ headings, helpfulLinks }: TOCNavProps) { } return ( -
+ } + > + + + ); + }); return (
diff --git a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx index f0fa5352f4..dabd27899c 100644 --- a/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/toc-nav.tsx @@ -153,9 +153,9 @@ export function TOCNav({ headings, helpfulLinks }: TOCNavProps) {

On this page

); @@ -453,7 +454,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { props["data-rehype-pretty-code-title"] || props["data-filename"] || props["title"]; // Also check for title prop directly - const hasCopy = props["data-copy"] !== undefined; + const hasCopy = props["data-copy"] !== "false"; const isShell = SHELL_LANGUAGES.has(language); const isConfigFile = CONFIG_LANGUAGES.has(language); @@ -473,7 +474,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} />
); @@ -506,7 +507,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { } // If filename is provided and no copy attribute, use animated CodeEditor - if (filename && !hasCopy) { + if (filename && props["data-copy"] === undefined) { // Determine if this is a terminal based on language const isTerminalLang = SHELL_LANGUAGES.has(language); return ( @@ -531,7 +532,7 @@ export function MDXPre({ children, ...props }: MDXCodeBlockProps) { code={codeText} language={language || "typescript"} filename={filename || undefined} - copyButton={true} + copyButton={hasCopy} />
); @@ -578,6 +579,7 @@ export function MDXCode({ children, className, ...props }: MDXCodeProps) { // Config files use CodeSnippet const filename = props["data-rehype-pretty-code-title"] || props["data-filename"]; + const hasCopy = props["data-copy"] !== "false"; return (
@@ -585,19 +587,20 @@ export function MDXCode({ children, className, ...props }: MDXCodeProps) { code={codeText} language={language} filename={filename} - copyButton={true} + copyButton={hasCopy} />
); } // Default to CodeSnippet for editable code blocks + const hasCopy = props["data-copy"] !== "false"; return (
); diff --git a/apps/framework-docs-v2/src/lib/github-stars.ts b/apps/framework-docs-v2/src/lib/github-stars.ts index 09c821bedf..c2764d5c45 100644 --- a/apps/framework-docs-v2/src/lib/github-stars.ts +++ b/apps/framework-docs-v2/src/lib/github-stars.ts @@ -7,15 +7,23 @@ import { unstable_cache } from "next/cache"; */ async function fetchGitHubStars(): Promise { try { + const headers: HeadersInit = { + // GitHub API requires a user-agent + "User-Agent": "MooseDocs", + }; + + // Add Authorization header with token if available to increase rate limit + // Without token: 60 requests/hour + // With token: 5,000 requests/hour + const githubToken = process.env.GITHUB_TOKEN; + if (githubToken) { + headers.Authorization = `token ${githubToken}`; + } + const response = await fetch( "https://api.github.com/repos/514-labs/moose", { - headers: { - // GitHub API requires a user-agent - "User-Agent": "MooseDocs", - // Optional: Add Authorization header with token to increase rate limit - // Authorization: `token ${process.env.GITHUB_TOKEN}`, - }, + headers, }, ); From 3115c6145c79e9ec9f5f09d7e960c54af8e7dabc Mon Sep 17 00:00:00 2001 From: Tim Date: Thu, 20 Nov 2025 17:42:58 -0800 Subject: [PATCH 14/59] adding library back in --- apps/framework-docs-v2/package.json | 1 + pnpm-lock.yaml | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/apps/framework-docs-v2/package.json b/apps/framework-docs-v2/package.json index dd9895136c..1ec487751a 100644 --- a/apps/framework-docs-v2/package.json +++ b/apps/framework-docs-v2/package.json @@ -67,6 +67,7 @@ "sonner": "^2.0.7", "tailwind-merge": "^2.6.0", "tailwindcss-animate": "^1.0.7", + "unist-util-visit": "^5.0.0", "zod": "^3.25.76" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4d7945029e..e7726b6930 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -65,7 +65,7 @@ importers: devDependencies: '@clickhouse/client': specifier: latest - version: 1.13.0 + version: 1.14.0 '@iarna/toml': specifier: ^3.0.0 version: 3.0.0 @@ -397,6 +397,9 @@ importers: tailwindcss-animate: specifier: ^1.0.7 version: 1.0.7(tailwindcss@3.4.18(tsx@4.20.6)(yaml@2.8.1)) + unist-util-visit: + specifier: ^5.0.0 + version: 5.0.0 zod: specifier: ^3.25.76 version: 3.25.76 @@ -1351,8 +1354,8 @@ packages: '@chevrotain/utils@11.0.3': resolution: {integrity: sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==} - '@clickhouse/client-common@1.13.0': - resolution: {integrity: sha512-QlGUMd3EaKkIRLCv0WW8Rw9cOlqhwQPT+ucNWY8eC4UALsMhJLpa0H7Cd7MYc9CEtTv/xlr3IcYw5Tdho4Hr2g==} + '@clickhouse/client-common@1.14.0': + resolution: {integrity: sha512-CyUcv2iCkZ1A++vmOSufYRpHR3aAWVfbrWed7ATzf0yyx/BW/2SEqlL07vBpSRa3BIkQe/DSOHVv8JkWZpUOwQ==} '@clickhouse/client-common@1.5.0': resolution: {integrity: sha512-U3vDp+PDnNVEv6kia+Mq5ygnlMZzsYU+3TX+0da3XvL926jzYLMBlIvFUxe2+/5k47ySvnINRC/2QxVK7PC2/A==} @@ -1363,8 +1366,8 @@ packages: '@clickhouse/client-web@1.5.0': resolution: {integrity: sha512-21+c2UJ4cx9SPiIWQThCLULb8h/zng0pNrtTwbbnaoCqMbasyRCyRTHs3wRr7fqRUcZ3p9krIPuN0gnJw3GJ6Q==} - '@clickhouse/client@1.13.0': - resolution: {integrity: sha512-uK+zqPaJnAoq3QIOvUNbHtbWUhyg2A/aSbdJtrY2+kawp4SMBLcfIbB9ucRv5Yht1CAa3b24CiUlypkmgarukg==} + '@clickhouse/client@1.14.0': + resolution: {integrity: sha512-co2spjR7wZoZ3Ck0H/jv76bpiuO3oJHtOmq9/gxFiod2DcT9NFg01u/hXcG8MJFnEJuMB6e3vGqS6IOnLwHqRw==} engines: {node: '>=16'} '@clickhouse/client@1.8.1': @@ -10319,7 +10322,7 @@ snapshots: '@chevrotain/utils@11.0.3': {} - '@clickhouse/client-common@1.13.0': {} + '@clickhouse/client-common@1.14.0': {} '@clickhouse/client-common@1.5.0': {} @@ -10329,9 +10332,9 @@ snapshots: dependencies: '@clickhouse/client-common': 1.5.0 - '@clickhouse/client@1.13.0': + '@clickhouse/client@1.14.0': dependencies: - '@clickhouse/client-common': 1.13.0 + '@clickhouse/client-common': 1.14.0 '@clickhouse/client@1.8.1': dependencies: From eff9086c0ca0240f4b29c880e14cfeafe60824f2 Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 25 Nov 2025 17:12:29 -0800 Subject: [PATCH 15/59] fix side nav --- apps/framework-docs-v2/public/sitemap-0.xml | 302 +++++++++--------- .../src/components/navigation/side-nav.tsx | 35 +- .../src/components/ui/sidebar.tsx | 19 ++ 3 files changed, 194 insertions(+), 162 deletions(-) diff --git a/apps/framework-docs-v2/public/sitemap-0.xml b/apps/framework-docs-v2/public/sitemap-0.xml index 31972a45f9..88aaa0ea28 100644 --- a/apps/framework-docs-v2/public/sitemap-0.xml +++ b/apps/framework-docs-v2/public/sitemap-0.xml @@ -1,154 +1,154 @@ -https://docs.fiveonefour.com/ai2025-11-21T01:39:42.116Zdaily0.7 -https://docs.fiveonefour.com/ai/data-collection-policy2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/context2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/dlqs2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/egress2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/ingest2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/model-data2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/mvs2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/claude2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/cursor2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/other-clients2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/vs-code2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/windsurf2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/clickhouse-chat2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/clickhouse-proj2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/from-template2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/cli-reference2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/mcp-json-reference2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/tool-reference2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/hosting2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/hosting/deployment2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/hosting/getting-started2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/hosting/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/hosting/overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/admin-api2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/analytics-api2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/auth2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/ingest-api2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/openapi-sdk2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/trigger-api2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/express2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastapi2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastify2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/koa2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/nextjs2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/raw-nodejs2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/changelog2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/configuration2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/contribution/documentation2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/contribution/framework2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/data-modeling2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/data-sources2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/configuring-moose-for-cloud2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-an-offline-server2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-ecs2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-kubernetes2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-with-docker-compose2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/monitoring2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/packaging-moose-for-deployment2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/preparing-clickhouse-redpanda2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/from-clickhouse2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/quickstart2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/minimum-requirements2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/troubleshooting2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/in-your-stack2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/local-dev-environment2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/metrics2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/lifecycle2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/migration-types2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/moose-cli2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/moosedev-mcp2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/apply-migrations2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/db-pull2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/external-tables2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/indexes2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/insert-data2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-materialized-view2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-table2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-view2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/planned-migrations2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/read-data2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-change2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-optimization2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-versioning2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/supported-types2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/ttl2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/quickstart2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/reference/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/connect-cdc2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/consumer-functions2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/create-stream2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/dead-letter-queues2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/from-your-code2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/schema-registry2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/sync-to-table2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/transform-functions2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/cancel-workflow2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/define-workflow2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/index2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/retries-and-timeouts2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/schedule-workflow2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/trigger-workflow2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/automated-reports2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/automated-reports/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/going-to-production2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/going-to-production/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/scratch/1-init2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/change-data-capture2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/change-data-capture/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/impact-analysis2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/impact-analysis/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/migrations2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/migrations/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/connectors2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/connectors/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/pipelines2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/pipelines/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/data-as-code2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/data-as-code/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/dora-for-data2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/dora-for-data/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/ai-enablement2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/ai-enablement/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/data-foundation2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/data-foundation/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/olap-evaluation2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/olap-evaluation/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/platform-engineering2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/platform-engineering/guide-overview2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/guides2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com2025-11-21T01:39:42.117Zdaily0.7 -https://docs.fiveonefour.com/templates2025-11-21T01:39:42.117Zdaily0.7 +https://docs.fiveonefour.com/ai2025-11-21T02:43:16.692Zdaily0.7 +https://docs.fiveonefour.com/ai/data-collection-policy2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/context2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/dlqs2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/egress2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/ingest2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/model-data2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/mvs2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/claude2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/cursor2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/other-clients2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/vs-code2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/windsurf2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-chat2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-proj2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/from-template2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/cli-reference2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/mcp-json-reference2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/tool-reference2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/hosting2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/hosting/deployment2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/hosting/getting-started2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/hosting/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/hosting/overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/admin-api2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/analytics-api2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/auth2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/ingest-api2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/openapi-sdk2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/trigger-api2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/express2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastapi2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastify2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/koa2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/nextjs2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/raw-nodejs2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/changelog2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/configuration2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/documentation2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/framework2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-modeling2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-sources2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/configuring-moose-for-cloud2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-an-offline-server2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-ecs2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-kubernetes2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-with-docker-compose2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/monitoring2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/packaging-moose-for-deployment2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/preparing-clickhouse-redpanda2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/from-clickhouse2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/quickstart2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/minimum-requirements2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/troubleshooting2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/in-your-stack2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/local-dev-environment2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/metrics2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/migration-types2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moose-cli2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moosedev-mcp2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/apply-migrations2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/db-pull2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/external-tables2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/indexes2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/insert-data2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-materialized-view2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-table2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-view2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/planned-migrations2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/read-data2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-change2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-optimization2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-versioning2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/supported-types2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/ttl2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/quickstart2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/reference/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/connect-cdc2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/consumer-functions2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/create-stream2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/dead-letter-queues2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/from-your-code2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/schema-registry2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/sync-to-table2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/transform-functions2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/cancel-workflow2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/define-workflow2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/index2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/retries-and-timeouts2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/schedule-workflow2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/trigger-workflow2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/scratch/1-init2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering/guide-overview2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/guides2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/templates2025-11-21T02:43:16.693Zdaily0.7 \ No newline at end of file diff --git a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx index 7df49d05a1..11ce273d6c 100644 --- a/apps/framework-docs-v2/src/components/navigation/side-nav.tsx +++ b/apps/framework-docs-v2/src/components/navigation/side-nav.tsx @@ -17,6 +17,7 @@ import { SidebarMenuSub, SidebarMenuSubButton, SidebarMenuSubItem, + SidebarMenuSubLabel, } from "@/components/ui/sidebar"; import { Collapsible, @@ -327,18 +328,30 @@ function renderNavChildren( language: string, ): React.ReactNode[] { const elements: React.ReactNode[] = []; + let isFirstLabel = true; - children.forEach((child) => { - if (child.type !== "page") return; - elements.push( - , - ); + children.forEach((child, index) => { + if (child.type === "label") { + elements.push( + + {child.title} + , + ); + isFirstLabel = false; + } else if (child.type === "separator") { + // Separators are handled via label spacing - skip rendering them + return; + } else if (child.type === "page") { + elements.push( + , + ); + } }); return elements; diff --git a/apps/framework-docs-v2/src/components/ui/sidebar.tsx b/apps/framework-docs-v2/src/components/ui/sidebar.tsx index 23f6ea7503..0e424297cd 100644 --- a/apps/framework-docs-v2/src/components/ui/sidebar.tsx +++ b/apps/framework-docs-v2/src/components/ui/sidebar.tsx @@ -753,6 +753,24 @@ const SidebarMenuSubButton = React.forwardRef< }); SidebarMenuSubButton.displayName = "SidebarMenuSubButton"; +const SidebarMenuSubLabel = React.forwardRef< + HTMLLIElement, + React.ComponentProps<"li"> & { isFirst?: boolean } +>(({ className, isFirst = false, ...props }, ref) => ( +
  • +)); +SidebarMenuSubLabel.displayName = "SidebarMenuSubLabel"; + export { Sidebar, SidebarContent, @@ -773,6 +791,7 @@ export { SidebarMenuSub, SidebarMenuSubButton, SidebarMenuSubItem, + SidebarMenuSubLabel, SidebarProvider, SidebarRail, SidebarSeparator, From 1439d75b966a26fad4b09251e37b65f81c6c7b0e Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 25 Nov 2025 17:23:48 -0800 Subject: [PATCH 16/59] update to codeblocks and attributes --- .../src/components/mdx-renderer.tsx | 48 +-- .../src/components/mdx/index.ts | 3 + .../src/components/mdx/server-code-block.tsx | 297 ++++++++++++++++++ .../src/components/mdx/server-figure.tsx | 119 +++++++ .../src/components/mdx/shell-snippet.tsx | 36 +++ .../src/lib/rehype-code-meta.ts | 141 +++++++++ 6 files changed, 607 insertions(+), 37 deletions(-) create mode 100644 apps/framework-docs-v2/src/components/mdx/server-code-block.tsx create mode 100644 apps/framework-docs-v2/src/components/mdx/server-figure.tsx create mode 100644 apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx create mode 100644 apps/framework-docs-v2/src/lib/rehype-code-meta.ts diff --git a/apps/framework-docs-v2/src/components/mdx-renderer.tsx b/apps/framework-docs-v2/src/components/mdx-renderer.tsx index b560603590..9e7e80d5b6 100644 --- a/apps/framework-docs-v2/src/components/mdx-renderer.tsx +++ b/apps/framework-docs-v2/src/components/mdx-renderer.tsx @@ -36,16 +36,16 @@ import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; import { Badge } from "@/components/ui/badge"; import { IconTerminal, IconFileCode } from "@tabler/icons-react"; import { - MDXPre, - MDXCode, - MDXFigure, -} from "@/components/mdx/code-block-wrapper"; + ServerCodeBlock, + ServerInlineCode, +} from "@/components/mdx/server-code-block"; +import { ServerFigure } from "@/components/mdx/server-figure"; import Link from "next/link"; import remarkGfm from "remark-gfm"; import rehypeSlug from "rehype-slug"; import rehypeAutolinkHeadings from "rehype-autolink-headings"; import rehypePrettyCode from "rehype-pretty-code"; -import { visit } from "unist-util-visit"; +import { rehypeCodeMeta } from "@/lib/rehype-code-meta"; interface MDXRendererProps { source: string; @@ -125,10 +125,10 @@ export async function MDXRenderer({ source }: MDXRendererProps) { SourceCodeLink, Link, - figure: MDXFigure, - // wrap with not-prose class - pre: MDXPre, - code: MDXCode, + // Code block handling - server-side rendered + figure: ServerFigure, + pre: ServerCodeBlock, + code: ServerInlineCode, }; return ( @@ -148,34 +148,8 @@ export async function MDXRenderer({ source }: MDXRendererProps) { keepBackground: false, }, ], - // Custom plugin to extract copy attribute from metadata and set it on pre element - () => { - return (tree: any) => { - visit(tree, "element", (node: any) => { - // Check pre elements and their code children for metadata - if (node.tagName === "pre" && node.children) { - for (const child of node.children) { - if (child.tagName === "code" && child.data?.meta) { - const codeMetaString = child.data.meta as string; - // Parse copy attribute: copy="false", copy="true", or just "copy" (which means true) - const copyFalseMatch = - codeMetaString.match(/copy=["']?false["']?/); - - if (copyFalseMatch) { - // copy="false" or copy=false - hide copy button - if (!node.properties) { - node.properties = {}; - } - node.properties["data-copy"] = "false"; - } - // If copy="true" or just "copy", don't set anything (default behavior shows copy button) - break; // Only process first code child - } - } - } - }); - }; - }, + // Generic plugin to extract all meta attributes as data-* props + rehypeCodeMeta, ], }, }} diff --git a/apps/framework-docs-v2/src/components/mdx/index.ts b/apps/framework-docs-v2/src/components/mdx/index.ts index e34439c4a0..5320abeeac 100644 --- a/apps/framework-docs-v2/src/components/mdx/index.ts +++ b/apps/framework-docs-v2/src/components/mdx/index.ts @@ -11,6 +11,9 @@ export { LanguageTabs, LanguageTabContent } from "./language-tabs"; export { CommandSnippet } from "./command-snippet"; export { CodeSnippet } from "./code-snippet"; export { CodeEditorWrapper } from "./code-editor-wrapper"; +export { ShellSnippet } from "./shell-snippet"; +export { ServerCodeBlock, ServerInlineCode } from "./server-code-block"; +export { ServerFigure } from "./server-figure"; export { ToggleBlock } from "./toggle-block"; export { BulletPointsCard, diff --git a/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx new file mode 100644 index 0000000000..c5f5f9b7f6 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx @@ -0,0 +1,297 @@ +import React from "react"; +import { cn } from "@/lib/utils"; +import { CodeSnippet } from "./code-snippet"; +import { CodeEditorWrapper } from "./code-editor-wrapper"; +import { ShellSnippet } from "./shell-snippet"; +import { extractTextContent } from "@/lib/extract-text-content"; + +// Shell languages that should use terminal styling +const SHELL_LANGUAGES = new Set([ + "bash", + "sh", + "shell", + "zsh", + "fish", + "powershell", + "cmd", +]); + +// Config/data file languages that should always use static CodeSnippet +const CONFIG_LANGUAGES = new Set([ + "toml", + "yaml", + "yml", + "json", + "jsonc", + "ini", + "properties", + "config", +]); + +/** + * Props interface for server-side code block + * All data-* attributes from markdown are available here + */ +export interface ServerCodeBlockProps + extends React.HTMLAttributes { + // Standard rehype-pretty-code attributes + "data-language"?: string; + "data-theme"?: string; + "data-rehype-pretty-code-fragment"?: string; + "data-rehype-pretty-code-title"?: string; + + // Custom attributes from markdown meta + "data-filename"?: string; + "data-copy"?: string; + "data-variant"?: string; + "data-duration"?: string; + "data-delay"?: string; + "data-writing"?: string; + "data-linenumbers"?: string; + + children?: React.ReactNode; +} + +/** + * Extracts the language from data attributes or className + */ +function getLanguage(props: ServerCodeBlockProps): string { + const dataLang = props["data-language"]; + if (dataLang) { + return dataLang.toLowerCase(); + } + + if (typeof props.className === "string") { + const match = props.className.match(/language-(\w+)/); + if (match?.[1]) { + return match[1].toLowerCase(); + } + } + + return ""; +} + +/** + * Find the code element in children + */ +function findCodeElement( + node: React.ReactNode, + depth = 0, +): React.ReactElement | undefined { + if (depth > 10) return undefined; + + if (Array.isArray(node)) { + for (const item of node) { + const found = findCodeElement(item, depth + 1); + if (found) return found; + } + return undefined; + } + + if (!React.isValidElement(node)) return undefined; + + const nodeType = node.type; + const nodeProps = (node.props as Record) || {}; + + if (nodeType === React.Fragment && nodeProps.children) { + return findCodeElement(nodeProps.children as React.ReactNode, depth + 1); + } + + if (typeof nodeType === "string" && nodeType === "code") { + return node; + } + + if (nodeProps.children) { + return findCodeElement(nodeProps.children as React.ReactNode, depth + 1); + } + + return undefined; +} + +/** + * Server-side code block component + * + * Extracts all code block attributes and routes to the appropriate + * client-side component based on language and attributes. + */ +export function ServerCodeBlock({ + children, + ...props +}: ServerCodeBlockProps): React.ReactElement { + // Check if this is a code block processed by rehype-pretty-code + const isCodeBlock = props["data-rehype-pretty-code-fragment"] !== undefined; + + if (!isCodeBlock) { + // Not a code block, render as regular pre element + const { className, ...restProps } = props; + return ( +
    +        {children}
    +      
    + ); + } + + // Extract code content + const codeElement = findCodeElement(children); + const codeText = + codeElement ? + extractTextContent( + (codeElement.props as Record) + .children as React.ReactNode, + ).trim() + : extractTextContent(children).trim(); + + // Extract all attributes (supports multiple sources for backwards compat) + const language = getLanguage(props); + + // Filename: check title (from rehype-pretty-code), filename, or direct title + const filename = + props["data-rehype-pretty-code-title"] || + props["data-filename"] || + ((props as Record)["title"] as string | undefined); + + // Copy button: defaults to true unless explicitly set to "false" + const showCopy = props["data-copy"] !== "false"; + + // Variant: "terminal" or "ide" + const variant = props["data-variant"] as "terminal" | "ide" | undefined; + + // Animation settings + const duration = + props["data-duration"] ? parseFloat(props["data-duration"]) : undefined; + const delay = + props["data-delay"] ? parseFloat(props["data-delay"]) : undefined; + const writing = props["data-writing"] !== "false"; + + // Line numbers: defaults to true unless explicitly set to "false" + const lineNumbers = props["data-linenumbers"] !== "false"; + + // Determine component type based on language and attributes + const isShell = SHELL_LANGUAGES.has(language); + const isConfigFile = CONFIG_LANGUAGES.has(language); + + // Routing logic: + // 1. Config files → Always static CodeSnippet (never animated) + // 2. Shell + filename + copy=false → Animated CodeEditorWrapper (terminal style) + // 3. Shell (all other cases) → ShellSnippet (copyable Terminal tab UI) + // 4. Non-shell + filename + no copy attr → Animated CodeEditorWrapper + // 5. Default → Static CodeSnippet + + // Config files always use static CodeSnippet (never animated) + if (isConfigFile) { + return ( +
    + +
    + ); + } + + // Shell commands: Use animated terminal only when explicitly copy=false with filename + // Otherwise, always use ShellSnippet (the Terminal tab UI with copy button) + if (isShell) { + // Only use animated terminal when explicitly no copy button wanted + if (filename && props["data-copy"] === "false") { + return ( +
    + +
    + ); + } + + // All other shell commands use ShellSnippet (Terminal tab with copy) + return ( +
    + +
    + ); + } + + // Non-shell: animate if filename present and copy not explicitly set + const shouldAnimate = filename && props["data-copy"] === undefined; + + if (shouldAnimate) { + return ( +
    + +
    + ); + } + + // Default: static CodeSnippet + return ( +
    + +
    + ); +} + +/** + * Server-side inline code component + */ +export function ServerInlineCode({ + children, + className, + ...props +}: React.HTMLAttributes): React.ReactElement { + const isCodeBlock = + className?.includes("language-") || + (props as Record)["data-language"]; + + if (isCodeBlock) { + // This is a code block that should be handled by ServerCodeBlock + // This is a fallback for when code is not wrapped in pre + const language = getLanguage(props as ServerCodeBlockProps); + const codeText = extractTextContent(children).trim(); + + return ( +
    + +
    + ); + } + + // Inline code - simple styled element + return ( + + {children} + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx new file mode 100644 index 0000000000..b9d7a08744 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx @@ -0,0 +1,119 @@ +import React from "react"; + +interface MDXFigureProps extends React.HTMLAttributes { + "data-rehype-pretty-code-figure"?: string; + children?: React.ReactNode; +} + +/** + * Extracts text content from a React node (for figcaption titles) + */ +function extractTextFromNode(node: React.ReactNode): string { + if (typeof node === "string") { + return node; + } + if (typeof node === "number") { + return String(node); + } + if (Array.isArray(node)) { + return node.map(extractTextFromNode).join(""); + } + if (React.isValidElement(node)) { + const props = node.props as Record; + return extractTextFromNode(props.children as React.ReactNode); + } + return ""; +} + +/** + * Server-side component that handles figure wrapper from rehype-pretty-code + * Extracts the title from figcaption and passes it to the pre element + */ +export function ServerFigure({ + children, + ...props +}: MDXFigureProps): React.ReactElement { + // Only handle code block figures + // data-rehype-pretty-code-figure is present (even if empty string) for code blocks + if (props["data-rehype-pretty-code-figure"] === undefined) { + return
    {children}
    ; + } + + // For code blocks, extract figcaption title and pass to pre + const childrenArray = React.Children.toArray(children); + + // Find figcaption and pre elements + let figcaption: React.ReactElement | null = null; + let preElement: React.ReactElement | null = null; + + childrenArray.forEach((child) => { + if (React.isValidElement(child)) { + const childType = child.type; + const childProps = (child.props as Record) || {}; + + // Check if it's a native HTML element by checking if type is a string + if (typeof childType === "string") { + if (childType === "figcaption") { + figcaption = child; + } else if (childType === "pre") { + preElement = child; + } + } else { + // For React components (like ServerCodeBlock) + // Check if it has code block attributes + const hasCodeBlockAttrs = + childProps["data-rehype-pretty-code-fragment"] !== undefined || + childProps["data-language"] !== undefined || + childProps["data-theme"] !== undefined; + + // If it has code block attributes, it's the pre element + if (hasCodeBlockAttrs || !preElement) { + preElement = child; + } + } + } + }); + + // Extract filename from figcaption (title from markdown) + const figcaptionTitle = + figcaption ? + extractTextFromNode( + (figcaption.props as Record) + .children as React.ReactNode, + ).trim() + : undefined; + + const preProps = + preElement ? (preElement.props as Record) || {} : {}; + + // Prioritize figcaption title (from markdown title="...") over any existing attributes + const filename = + figcaptionTitle || + (preProps["data-rehype-pretty-code-title"] as string | undefined) || + (preProps["data-filename"] as string | undefined); + + // If we have a pre element, ensure the filename is set on both attributes + if (preElement) { + const hasCodeBlockAttrs = + preProps["data-language"] !== undefined || + preProps["data-theme"] !== undefined; + const fragmentValue = + preProps["data-rehype-pretty-code-fragment"] !== undefined ? + preProps["data-rehype-pretty-code-fragment"] + : hasCodeBlockAttrs ? "" + : undefined; + + const updatedPre = React.cloneElement(preElement, { + ...preProps, + "data-filename": filename || undefined, + "data-rehype-pretty-code-title": filename || undefined, + ...(fragmentValue !== undefined ? + { "data-rehype-pretty-code-fragment": fragmentValue } + : {}), + }); + return <>{updatedPre}; + } + + // Fallback: render children + return <>{children}; +} diff --git a/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx new file mode 100644 index 0000000000..1ad348c608 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/shell-snippet.tsx @@ -0,0 +1,36 @@ +"use client"; + +import React from "react"; +import { + Snippet, + SnippetCopyButton, + SnippetHeader, + SnippetTabsContent, + SnippetTabsList, + SnippetTabsTrigger, +} from "@/components/ui/snippet"; + +interface ShellSnippetProps { + code: string; + language: string; +} + +/** + * Client component for shell/terminal code snippets + * Displays with "Terminal" label and copy button + */ +export function ShellSnippet({ code, language }: ShellSnippetProps) { + const [value, setValue] = React.useState("terminal"); + + return ( + + + + Terminal + + + + {code} + + ); +} diff --git a/apps/framework-docs-v2/src/lib/rehype-code-meta.ts b/apps/framework-docs-v2/src/lib/rehype-code-meta.ts new file mode 100644 index 0000000000..ed64837837 --- /dev/null +++ b/apps/framework-docs-v2/src/lib/rehype-code-meta.ts @@ -0,0 +1,141 @@ +import { visit } from "unist-util-visit"; + +/** + * Generic rehype plugin that extracts all code block meta attributes + * and sets them as data-* attributes on the pre element. + * + * Supports: + * - key="value" or key='value' (quoted values) + * - key=value (unquoted values) + * - key (flag-style, sets data-key="true") + * + * Examples: + * ```ts filename="example.ts" copy + * ```bash variant="terminal" duration=3 delay=0.5 + * ```python copy=false lineNumbers + */ + +interface HastElement { + type: "element"; + tagName: string; + properties?: Record; + children?: HastNode[]; + data?: Record; +} + +interface HastText { + type: "text"; + value: string; +} + +type HastNode = HastElement | HastText | { type: string }; + +interface HastRoot { + type: "root"; + children: HastNode[]; +} + +export function rehypeCodeMeta() { + return (tree: HastRoot) => { + visit(tree, "element", (node: HastElement) => { + // Only process pre elements with code children + if (node.tagName !== "pre" || !node.children) { + return; + } + + for (const child of node.children) { + if ( + child.type === "element" && + (child as HastElement).tagName === "code" && + (child as HastElement).data?.meta + ) { + const meta = (child as HastElement).data?.meta as string; + const attributes = parseMetaString(meta); + + // Ensure properties object exists + if (!node.properties) { + node.properties = {}; + } + + // Set each parsed attribute as a data-* attribute + for (const [key, value] of Object.entries(attributes)) { + // Use lowercase keys with data- prefix + const dataKey = `data-${key.toLowerCase()}`; + node.properties[dataKey] = value; + } + + // Only process the first code child + break; + } + } + }); + }; +} + +/** + * Parses a code block meta string into key-value pairs + * + * Handles: + * - key="value" or key='value' + * - key=value (no quotes) + * - key (flag, becomes "true") + */ +function parseMetaString(meta: string): Record { + const attributes: Record = {}; + + if (!meta || typeof meta !== "string") { + return attributes; + } + + // Regex patterns for different attribute formats + // Pattern 1: key="value" or key='value' (quoted) + const quotedPattern = /(\w+)=["']([^"']*)["']/g; + // Pattern 2: key=value (unquoted, stops at whitespace) + const unquotedPattern = /(\w+)=([^\s"']+)/g; + // Pattern 3: standalone key (flag-style) + const flagPattern = /(?:^|\s)(\w+)(?=\s|$)/g; + + // Track which parts of the string we've processed + let processed = meta; + + // First, extract quoted values + let match: RegExpExecArray | null = quotedPattern.exec(meta); + while (match !== null) { + const key = match[1]; + const value = match[2]; + if (key) { + attributes[key] = value ?? ""; + // Mark as processed by replacing with spaces + processed = processed.replace(match[0], " ".repeat(match[0].length)); + } + match = quotedPattern.exec(meta); + } + + // Then, extract unquoted values from remaining string + match = unquotedPattern.exec(processed); + while (match !== null) { + const key = match[1]; + const value = match[2]; + if (key && !attributes[key]) { + attributes[key] = value ?? ""; + } + match = unquotedPattern.exec(processed); + } + + // Finally, extract flags from remaining string + // Reset processed to only include non-key=value parts + const remainingParts = processed.split(/\w+=\S+/).join(" "); + match = flagPattern.exec(remainingParts); + while (match !== null) { + const key = match[1]; + // Only add if not already set + if (key && !attributes[key]) { + attributes[key] = "true"; + } + match = flagPattern.exec(remainingParts); + } + + return attributes; +} + +export default rehypeCodeMeta; From a0d6c2eedc348a71339ebab5b5bddaf434dc2feb Mon Sep 17 00:00:00 2001 From: Tim Date: Tue, 25 Nov 2025 17:31:25 -0800 Subject: [PATCH 17/59] fix build --- .../src/components/mdx/server-figure.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx index b9d7a08744..75b1a7621a 100644 --- a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx +++ b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx @@ -75,13 +75,13 @@ export function ServerFigure({ }); // Extract filename from figcaption (title from markdown) - const figcaptionTitle = - figcaption ? - extractTextFromNode( - (figcaption.props as Record) - .children as React.ReactNode, - ).trim() - : undefined; + let figcaptionTitle: string | undefined; + if (figcaption !== null) { + const figcaptionProps = figcaption.props as Record; + figcaptionTitle = extractTextFromNode( + figcaptionProps.children as React.ReactNode, + ).trim(); + } const preProps = preElement ? (preElement.props as Record) || {} : {}; From 781fe2b614e1f1ebca29d67b4d2dd0673df23e63 Mon Sep 17 00:00:00 2001 From: Tim Date: Wed, 26 Nov 2025 20:25:30 -0800 Subject: [PATCH 18/59] added the filetree component --- .../content/moosestack/configuration.mdx | 19 +-- .../src/components/mdx/file-tree.tsx | 123 +++++++++++++++--- 2 files changed, 117 insertions(+), 25 deletions(-) diff --git a/apps/framework-docs-v2/content/moosestack/configuration.mdx b/apps/framework-docs-v2/content/moosestack/configuration.mdx index 28702959eb..9b8a03d18b 100644 --- a/apps/framework-docs-v2/content/moosestack/configuration.mdx +++ b/apps/framework-docs-v2/content/moosestack/configuration.mdx @@ -4,7 +4,7 @@ description: Configure your MooseStack project order: 1 --- -import { Callout } from "@/components/mdx"; +import { Callout, FileTree } from "@/components/mdx"; # Project Configuration @@ -117,14 +117,15 @@ MOOSE_
    __=value ### Complete Example **File structure:** -``` -my-moose-project/ -├── .env # Base config -├── .env.dev # Dev overrides -├── .env.prod # Prod overrides -├── .env.local # Local secrets (gitignored) -└── moose.config.toml # Structured config -``` + + + + + + + + + **.env** (committed): ```bash diff --git a/apps/framework-docs-v2/src/components/mdx/file-tree.tsx b/apps/framework-docs-v2/src/components/mdx/file-tree.tsx index 52343f95cd..36d9602df2 100644 --- a/apps/framework-docs-v2/src/components/mdx/file-tree.tsx +++ b/apps/framework-docs-v2/src/components/mdx/file-tree.tsx @@ -1,37 +1,128 @@ "use client"; -import React from "react"; +import * as React from "react"; +import { IconChevronRight, IconFile, IconFolder } from "@tabler/icons-react"; +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from "@/components/ui/collapsible"; +import { cn } from "@/lib/utils"; + +// ============================================================================ +// FileTree Root +// ============================================================================ interface FileTreeProps { children: React.ReactNode; + className?: string; } +/** + * FileTree component for MDX documentation + * + * Usage in MDX: + * ```mdx + * + * + * + * + * + * + * + * + * ``` + */ +export function FileTree({ children, className }: FileTreeProps) { + return ( +
    +
      {children}
    +
    + ); +} + +// ============================================================================ +// FileTreeFolder +// ============================================================================ + interface FileTreeFolderProps { name: string; children?: React.ReactNode; + defaultOpen?: boolean; } -interface FileTreeFileProps { - name: string; +export function FileTreeFolder({ + name, + children, + defaultOpen = true, +}: FileTreeFolderProps) { + return ( +
  • + + + + + +
      + {children} +
    +
    +
    +
  • + ); } -export function FileTree({ children }: FileTreeProps) { - return
    {children}
    ; +// ============================================================================ +// FileTreeFile +// ============================================================================ + +interface FileTreeFileProps { + name: string; } -export function FileTreeFolder({ name, children }: FileTreeFolderProps) { +export function FileTreeFile({ name }: FileTreeFileProps) { return ( -
    -
    {name}/
    -
    {children}
    -
    +
  • +
    svg]:size-4 [&>svg]:shrink-0", + )} + > + + {name} +
    +
  • ); } -export function FileTreeFile({ name }: FileTreeFileProps) { - return
    {name}
    ; -} +// ============================================================================ +// Attach sub-components for dot notation +// ============================================================================ -// Attach sub-components to FileTree for nested usage -(FileTree as any).Folder = FileTreeFolder; -(FileTree as any).File = FileTreeFile; +FileTree.Folder = FileTreeFolder; +FileTree.File = FileTreeFile; From b95377700dc205930a9720f99a2b836b9ef3c815 Mon Sep 17 00:00:00 2001 From: Tim Date: Fri, 28 Nov 2025 11:43:49 -0800 Subject: [PATCH 19/59] fix some biome errors and styles --- .../src/components/mdx/code-snippet.tsx | 324 +++++++++++++++++- .../src/components/mdx/inline-code.tsx | 87 +++++ .../src/components/mdx/server-code-block.tsx | 172 +++++++++- .../src/lib/rehype-code-meta.ts | 178 +++++++--- 4 files changed, 696 insertions(+), 65 deletions(-) create mode 100644 apps/framework-docs-v2/src/components/mdx/inline-code.tsx diff --git a/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx b/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx index 3316156f11..538a48d884 100644 --- a/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx +++ b/apps/framework-docs-v2/src/components/mdx/code-snippet.tsx @@ -11,12 +11,23 @@ import { CodeBlockContent, } from "@/components/ui/shadcn-io/code-block"; +/** + * Parsed substring highlight with optional occurrence filter + */ +interface SubstringHighlight { + pattern: string; + occurrences?: number[]; +} + interface CodeSnippetProps { code: string; language?: string; filename?: string; copyButton?: boolean; lineNumbers?: boolean; + highlightLines?: number[]; + highlightStrings?: SubstringHighlight[]; + isAnsi?: boolean; className?: string; } @@ -59,14 +70,299 @@ function CopyButton({ ); } +/** + * Parse ANSI escape codes and convert to styled HTML + */ +function parseAnsi(text: string): string { + const colors: Record = { + 30: "color: #000", + 31: "color: #c00", + 32: "color: #0a0", + 33: "color: #a50", + 34: "color: #00a", + 35: "color: #a0a", + 36: "color: #0aa", + 37: "color: #aaa", + 90: "color: #555", + 91: "color: #f55", + 92: "color: #5f5", + 93: "color: #ff5", + 94: "color: #55f", + 95: "color: #f5f", + 96: "color: #5ff", + 97: "color: #fff", + }; + + const bgColors: Record = { + 40: "background-color: #000", + 41: "background-color: #c00", + 42: "background-color: #0a0", + 43: "background-color: #a50", + 44: "background-color: #00a", + 45: "background-color: #a0a", + 46: "background-color: #0aa", + 47: "background-color: #aaa", + 100: "background-color: #555", + 101: "background-color: #f55", + 102: "background-color: #5f5", + 103: "background-color: #ff5", + 104: "background-color: #55f", + 105: "background-color: #f5f", + 106: "background-color: #5ff", + 107: "background-color: #fff", + }; + + // biome-ignore lint/complexity/useRegexLiterals: Using constructor to avoid control character lint error + const ansiPattern = new RegExp("\\x1b\\[([0-9;]*)m", "g"); + let result = ""; + let lastIndex = 0; + let currentStyles: string[] = []; + + let match = ansiPattern.exec(text); + while (match !== null) { + const textBefore = text.slice(lastIndex, match.index); + if (textBefore) { + const escapedText = textBefore + .replace(/&/g, "&") + .replace(//g, ">"); + + if (currentStyles.length > 0) { + result += `${escapedText}`; + } else { + result += escapedText; + } + } + + const codes = match[1] ? match[1].split(";").map(Number) : [0]; + + for (const code of codes) { + if (code === 0) { + currentStyles = []; + } else if (code === 1) { + currentStyles.push("font-weight: bold"); + } else if (code === 2) { + currentStyles.push("opacity: 0.75"); + } else if (code === 3) { + currentStyles.push("font-style: italic"); + } else if (code === 4) { + currentStyles.push("text-decoration: underline"); + } else if (code === 9) { + currentStyles.push("text-decoration: line-through"); + } else if (colors[code]) { + currentStyles.push(colors[code]); + } else if (bgColors[code]) { + currentStyles.push(bgColors[code]); + } + } + + lastIndex = ansiPattern.lastIndex; + match = ansiPattern.exec(text); + } + + const remainingText = text.slice(lastIndex); + if (remainingText) { + const escapedText = remainingText + .replace(/&/g, "&") + .replace(//g, ">"); + + if (currentStyles.length > 0) { + result += `${escapedText}`; + } else { + result += escapedText; + } + } + + return result; +} + +/** + * Custom CodeBlockContent that supports line and substring highlighting + */ +function HighlightedCodeBlockContent({ + code, + language, + highlightLines, + highlightStrings, +}: { + code: string; + language: string; + highlightLines: number[]; + highlightStrings: SubstringHighlight[]; +}) { + const [highlightedCode, setHighlightedCode] = React.useState(""); + const [isLoading, setIsLoading] = React.useState(true); + + React.useEffect(() => { + const loadHighlightedCode = async () => { + try { + const { codeToHtml } = await import("shiki"); + + const languageMap: Record = { + gitignore: "text", + env: "text", + dotenv: "text", + }; + const mappedLanguage = languageMap[language.toLowerCase()] || language; + + const html = await codeToHtml(code, { + lang: mappedLanguage, + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + transformers: [ + { + line(node, line) { + // Add highlighted class to specified lines + if (highlightLines.includes(line)) { + this.addClassToHast(node, "highlighted"); + } + }, + }, + ], + }); + + // Apply substring highlighting if needed + let finalHtml = html; + if (highlightStrings.length > 0) { + finalHtml = applySubstringHighlighting(html, highlightStrings); + } + + setHighlightedCode(finalHtml); + setIsLoading(false); + } catch { + // Fallback + try { + const { codeToHtml } = await import("shiki"); + const html = await codeToHtml(code, { + lang: "text", + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + }); + setHighlightedCode(html); + } catch { + const lines = code.split("\n"); + const html = `
    ${lines.map((line) => `${line.replace(//g, ">")}`).join("\n")}
    `; + setHighlightedCode(html); + } + setIsLoading(false); + } + }; + + loadHighlightedCode(); + }, [code, language, highlightLines, highlightStrings]); + + if (isLoading) { + return ( +
    +        
    +          {code.split("\n").map((line, i) => (
    +            // biome-ignore lint/suspicious/noArrayIndexKey: Static code lines have no unique ID
    +            
    +              {line}
    +            
    +          ))}
    +        
    +      
    + ); + } + + return ( +
    + ); +} + +function escapeRegExp(string: string): string { + return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); +} + +function applySubstringHighlighting( + html: string, + highlightStrings: SubstringHighlight[], +): string { + let result = html; + + for (const { pattern, occurrences } of highlightStrings) { + const escapedPattern = escapeRegExp(pattern); + let occurrenceCount = 0; + + // Replace pattern occurrences, respecting occurrence filter + result = result.replace( + new RegExp(`(?<=>)([^<]*?)${escapedPattern}`, "g"), + (match, prefix) => { + occurrenceCount++; + const shouldHighlight = + !occurrences || occurrences.includes(occurrenceCount); + + if (shouldHighlight) { + return `>${prefix}${pattern}`; + } + return match; + }, + ); + } + + return result; +} + export function CodeSnippet({ code, language = "typescript", filename, copyButton = true, lineNumbers = true, + highlightLines = [], + highlightStrings = [], + isAnsi = false, className, }: CodeSnippetProps) { + // For ANSI blocks, render with ANSI parsing + if (isAnsi) { + const lines = code.split("\n"); + return ( +
    + {copyButton && } + {filename && ( +
    + {filename} +
    + )} +
    +
    +            
    +              {lines.map((line, i) => (
    +                // biome-ignore lint/suspicious/noArrayIndexKey: Static code lines have no unique ID
    +                
    +                  
    +                
    +              ))}
    +            
    +          
    +
    +
    + ); + } + + // Check if we need custom highlighting + const needsCustomHighlighting = + highlightLines.length > 0 || highlightStrings.length > 0; + return (
    - - {item.code} - + {needsCustomHighlighting ? + + : + {item.code} + + } )} diff --git a/apps/framework-docs-v2/src/components/mdx/inline-code.tsx b/apps/framework-docs-v2/src/components/mdx/inline-code.tsx new file mode 100644 index 0000000000..374c8a4f49 --- /dev/null +++ b/apps/framework-docs-v2/src/components/mdx/inline-code.tsx @@ -0,0 +1,87 @@ +"use client"; + +import * as React from "react"; +import { cn } from "@/lib/utils"; + +interface InlineCodeProps { + code: string; + language: string; + className?: string; +} + +const darkModeStyles = cn( + "dark:[&_.shiki]:!text-[var(--shiki-dark)]", + "dark:[&_.shiki_span]:!text-[var(--shiki-dark)]", +); + +/** + * Inline code with syntax highlighting + * Used for the Nextra-style `code{:lang}` syntax + */ +export function InlineCode({ code, language, className }: InlineCodeProps) { + const [highlightedCode, setHighlightedCode] = React.useState(""); + const [isLoading, setIsLoading] = React.useState(true); + + React.useEffect(() => { + const loadHighlightedCode = async () => { + try { + const { codeToHtml } = await import("shiki"); + + const html = await codeToHtml(code, { + lang: language, + themes: { + light: "vitesse-light", + dark: "vitesse-dark", + }, + }); + + // Extract just the code content, removing the pre/code wrapper + // The output is usually:
    ...
    + const match = html.match(/]*>([\s\S]*)<\/code>/); + if (match?.[1]) { + // Remove the line span wrapper for inline display + const content = match[1].replace( + /([\s\S]*?)<\/span>/g, + "$1", + ); + setHighlightedCode(content); + } else { + setHighlightedCode(code); + } + setIsLoading(false); + } catch { + // Fallback to plain text + setHighlightedCode(code); + setIsLoading(false); + } + }; + + loadHighlightedCode(); + }, [code, language]); + + if (isLoading) { + return ( + + {code} + + ); + } + + return ( + + ); +} diff --git a/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx index c5f5f9b7f6..bf523147a7 100644 --- a/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx +++ b/apps/framework-docs-v2/src/components/mdx/server-code-block.tsx @@ -3,6 +3,7 @@ import { cn } from "@/lib/utils"; import { CodeSnippet } from "./code-snippet"; import { CodeEditorWrapper } from "./code-editor-wrapper"; import { ShellSnippet } from "./shell-snippet"; +import { InlineCode } from "./inline-code"; import { extractTextContent } from "@/lib/extract-text-content"; // Shell languages that should use terminal styling @@ -28,6 +29,14 @@ const CONFIG_LANGUAGES = new Set([ "config", ]); +/** + * Parsed substring highlight with optional occurrence filter + */ +interface SubstringHighlight { + pattern: string; + occurrences?: number[]; +} + /** * Props interface for server-side code block * All data-* attributes from markdown are available here @@ -48,6 +57,14 @@ export interface ServerCodeBlockProps "data-delay"?: string; "data-writing"?: string; "data-linenumbers"?: string; + "data-showlinenumbers"?: string; + + // Line and substring highlighting (Nextra-style) + "data-highlight-lines"?: string; + "data-highlight-strings"?: string; + + // Animation flag (Nextra extension) + "data-animate"?: string; children?: React.ReactNode; } @@ -108,11 +125,69 @@ function findCodeElement( return undefined; } +/** + * Parse line highlight specification into array of line numbers + * Handles: "1", "1,4-5", "1-3,7,9-11" + */ +function parseLineHighlights(spec: string | undefined): number[] { + if (!spec) return []; + + const lines: number[] = []; + const parts = spec.split(","); + + for (const part of parts) { + const trimmed = part.trim(); + if (trimmed.includes("-")) { + const [start, end] = trimmed.split("-").map((n) => parseInt(n, 10)); + if ( + start !== undefined && + end !== undefined && + !isNaN(start) && + !isNaN(end) + ) { + for (let i = start; i <= end; i++) { + lines.push(i); + } + } + } else { + const num = parseInt(trimmed, 10); + if (!isNaN(num)) { + lines.push(num); + } + } + } + + return lines; +} + +/** + * Parse substring highlights from JSON string + */ +function parseSubstringHighlights( + jsonStr: string | undefined, +): SubstringHighlight[] { + if (!jsonStr) return []; + + try { + return JSON.parse(jsonStr) as SubstringHighlight[]; + } catch { + return []; + } +} + /** * Server-side code block component * * Extracts all code block attributes and routes to the appropriate * client-side component based on language and attributes. + * + * Supports Nextra-style syntax: + * - ```js {1,4-5} → Line highlighting + * - ```js /useState/ → Substring highlighting + * - ```js copy → Copy button + * - ```js showLineNumbers→ Line numbers + * - ```js filename="x" → File header + * - ```js animate → Animated typing effect */ export function ServerCodeBlock({ children, @@ -156,29 +231,60 @@ export function ServerCodeBlock({ // Variant: "terminal" or "ide" const variant = props["data-variant"] as "terminal" | "ide" | undefined; - // Animation settings + // Animation settings - explicit animate flag takes precedence + const animateFlag = props["data-animate"]; + const shouldAnimate = animateFlag === "true"; + const shouldNotAnimate = animateFlag === "false"; + const duration = props["data-duration"] ? parseFloat(props["data-duration"]) : undefined; const delay = props["data-delay"] ? parseFloat(props["data-delay"]) : undefined; const writing = props["data-writing"] !== "false"; - // Line numbers: defaults to true unless explicitly set to "false" - const lineNumbers = props["data-linenumbers"] !== "false"; + // Line numbers: support both linenumbers and showlinenumbers + const lineNumbersFlag = + props["data-showlinenumbers"] ?? props["data-linenumbers"]; + const lineNumbers = lineNumbersFlag !== "false"; + + // Highlighting + const highlightLines = parseLineHighlights(props["data-highlight-lines"]); + const highlightStrings = parseSubstringHighlights( + props["data-highlight-strings"], + ); // Determine component type based on language and attributes const isShell = SHELL_LANGUAGES.has(language); const isConfigFile = CONFIG_LANGUAGES.has(language); + const isAnsi = language === "ansi"; + + // ANSI blocks render as plain text with ANSI escape code handling + if (isAnsi) { + return ( +
    + +
    + ); + } // Routing logic: - // 1. Config files → Always static CodeSnippet (never animated) - // 2. Shell + filename + copy=false → Animated CodeEditorWrapper (terminal style) - // 3. Shell (all other cases) → ShellSnippet (copyable Terminal tab UI) - // 4. Non-shell + filename + no copy attr → Animated CodeEditorWrapper - // 5. Default → Static CodeSnippet - - // Config files always use static CodeSnippet (never animated) - if (isConfigFile) { + // 1. Config files → Always static CodeSnippet (never animated unless explicit) + // 2. Explicit animate flag → Use CodeEditorWrapper + // 3. Explicit animate=false → Use CodeSnippet + // 4. Shell + filename + copy=false → Animated CodeEditorWrapper (terminal style) + // 5. Shell (all other cases) → ShellSnippet (copyable Terminal tab UI) + // 6. Non-shell + filename + no copy attr + no animate=false → Animated CodeEditorWrapper + // 7. Default → Static CodeSnippet + + // Config files use static CodeSnippet unless explicitly animated + if (isConfigFile && !shouldAnimate) { return (
    +
    + ); + } + + // Explicit animate flag + if (shouldAnimate) { + return ( +
    +
    ); } // Shell commands: Use animated terminal only when explicitly copy=false with filename + // and animate flag is not explicitly false // Otherwise, always use ShellSnippet (the Terminal tab UI with copy button) if (isShell) { // Only use animated terminal when explicitly no copy button wanted - if (filename && props["data-copy"] === "false") { + if (filename && props["data-copy"] === "false" && !shouldNotAnimate) { return (
    ); @@ -255,6 +385,8 @@ export function ServerCodeBlock({ /** * Server-side inline code component + * + * Supports Nextra-style inline highlighting: `code{:lang}` */ export function ServerInlineCode({ children, @@ -282,6 +414,18 @@ export function ServerInlineCode({ ); } + // Check for inline code with language hint: `code{:lang}` + const textContent = + typeof children === "string" ? children : extractTextContent(children); + const inlineLangMatch = textContent.match(/^(.+)\{:(\w+)\}$/); + + if (inlineLangMatch) { + const [, code, lang] = inlineLangMatch; + if (code && lang) { + return ; + } + } + // Inline code - simple styled element return ( 0) { + node.properties["data-highlight-strings"] = JSON.stringify( + parsed.highlightStrings, + ); + } + // Only process the first code child break; } @@ -73,69 +90,148 @@ export function rehypeCodeMeta() { } /** - * Parses a code block meta string into key-value pairs + * Parsed substring highlight with optional occurrence filter + */ +interface SubstringHighlight { + pattern: string; + occurrences?: number[]; // undefined = all occurrences +} + +/** + * Result of parsing the meta string + */ +interface ParsedMeta { + attributes: Record; + highlightLines: string | null; // e.g., "1,4-5" + highlightStrings: SubstringHighlight[]; +} + +/** + * Parses a code block meta string into key-value pairs, line highlights, + * and substring highlights. * * Handles: * - key="value" or key='value' * - key=value (no quotes) * - key (flag, becomes "true") + * - {1,4-5} (line highlighting) + * - /substring/ (substring highlighting) + * - /substring/1 or /substring/1-3 or /substring/1,3 (occurrence filtering) */ -function parseMetaString(meta: string): Record { - const attributes: Record = {}; +function parseMetaString(meta: string): ParsedMeta { + const result: ParsedMeta = { + attributes: {}, + highlightLines: null, + highlightStrings: [], + }; if (!meta || typeof meta !== "string") { - return attributes; + return result; } - // Regex patterns for different attribute formats - // Pattern 1: key="value" or key='value' (quoted) - const quotedPattern = /(\w+)=["']([^"']*)["']/g; - // Pattern 2: key=value (unquoted, stops at whitespace) - const unquotedPattern = /(\w+)=([^\s"']+)/g; - // Pattern 3: standalone key (flag-style) - const flagPattern = /(?:^|\s)(\w+)(?=\s|$)/g; - - // Track which parts of the string we've processed let processed = meta; - // First, extract quoted values - let match: RegExpExecArray | null = quotedPattern.exec(meta); - while (match !== null) { + // 1. Extract line highlighting: {1,4-5} + const lineHighlightMatch = processed.match(/\{([^}]+)\}/); + if (lineHighlightMatch?.[1]) { + result.highlightLines = lineHighlightMatch[1]; + processed = processed.replace(lineHighlightMatch[0], " "); + } + + // 2. Extract substring highlighting: /pattern/ or /pattern/occurrences + // Pattern: /[^/]+/(?:\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*)? + const substringPattern = /\/([^/]+)\/(\d+(?:-\d+)?(?:,\d+(?:-\d+)?)*)?/g; + let substringMatch = substringPattern.exec(processed); + while (substringMatch !== null) { + const pattern = substringMatch[1]; + const occurrenceSpec = substringMatch[2]; + + if (pattern) { + const highlight: SubstringHighlight = { pattern }; + + if (occurrenceSpec) { + highlight.occurrences = parseOccurrenceSpec(occurrenceSpec); + } + + result.highlightStrings.push(highlight); + } + substringMatch = substringPattern.exec(processed); + } + + // Remove substring patterns from processed string for attribute parsing + processed = processed.replace(substringPattern, " "); + + // 3. Extract quoted values: key="value" or key='value' + const quotedPattern = /(\w+)=["']([^"']*)["']/g; + for (const match of meta.matchAll(quotedPattern)) { const key = match[1]; const value = match[2]; if (key) { - attributes[key] = value ?? ""; - // Mark as processed by replacing with spaces + result.attributes[key] = value ?? ""; processed = processed.replace(match[0], " ".repeat(match[0].length)); } - match = quotedPattern.exec(meta); } - // Then, extract unquoted values from remaining string - match = unquotedPattern.exec(processed); - while (match !== null) { + // 4. Extract unquoted values: key=value + const unquotedPattern = /(\w+)=([^\s"'{}\/]+)/g; + for (const match of processed.matchAll(unquotedPattern)) { const key = match[1]; const value = match[2]; - if (key && !attributes[key]) { - attributes[key] = value ?? ""; + if (key && !result.attributes[key]) { + result.attributes[key] = value ?? ""; } - match = unquotedPattern.exec(processed); } - // Finally, extract flags from remaining string + // 5. Extract flags (standalone words) // Reset processed to only include non-key=value parts - const remainingParts = processed.split(/\w+=\S+/).join(" "); - match = flagPattern.exec(remainingParts); - while (match !== null) { + const remainingParts = processed + .replace(/\w+=\S+/g, " ") + .replace(/\{[^}]*\}/g, " ") + .replace(/\/[^/]+\/\S*/g, " "); + const flagPattern = /(?:^|\s)(\w+)(?=\s|$)/g; + for (const match of remainingParts.matchAll(flagPattern)) { const key = match[1]; - // Only add if not already set - if (key && !attributes[key]) { - attributes[key] = "true"; + if (key && !result.attributes[key]) { + result.attributes[key] = "true"; + } + } + + return result; +} + +/** + * Parse occurrence specification like "1", "1-3", "1,3", "1-3,5" + * Returns array of 1-indexed occurrence numbers + */ +function parseOccurrenceSpec(spec: string): number[] { + const occurrences: number[] = []; + const parts = spec.split(","); + + for (const part of parts) { + const trimmed = part.trim(); + if (trimmed.includes("-")) { + // Range: "1-3" + const [start, end] = trimmed.split("-").map((n) => parseInt(n, 10)); + if ( + start !== undefined && + end !== undefined && + !Number.isNaN(start) && + !Number.isNaN(end) + ) { + for (let i = start; i <= end; i++) { + occurrences.push(i); + } + } + } else { + // Single number: "1" + const num = parseInt(trimmed, 10); + if (!Number.isNaN(num)) { + occurrences.push(num); + } } - match = flagPattern.exec(remainingParts); } - return attributes; + return occurrences; } export default rehypeCodeMeta; From 3634a954a04c360540c5387ba1e5f2663a10d6b7 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Sun, 16 Nov 2025 20:29:08 -0700 Subject: [PATCH 20/59] on cluster (#2944) > [!NOTE] > Introduce full ClickHouse cluster support: new table `cluster` config, ON CLUSTER DDL across migrations/ops, validations, TS/Py SDK updates, new templates and E2E tests, CI jobs, and docs updates. > > - **OLAP/CLI core**: > - Add `cluster_name` to `Table`/inframap (proto, serde) and show in CLI. > - Generate `ON CLUSTER` in CREATE/ALTER/DROP/SETTINGS/SAMPLE/INDEX DDL and pass cluster through all operations. > - Do not treat `cluster_name` as schema-diff; preserve from infra map during reconcile. > - Validate databases and clusters in plans/migrations; improved error messages. > - Handle replicated engines: auto-inject params in dev when no cluster; support cluster-aware replication. > - Update migration schema to allow structured `engine` objects. > - **SDKs (TypeScript/Python)**: > - Add optional `cluster` to OLAP table config; serialize to internal model. > - Validate that `cluster` cannot be combined with explicit `keeperPath/replicaName` (TS) or `keeper_path/replica_name` (Py). > - Add unit tests for cluster validation. > - **E2E & Templates**: > - New templates `templates/typescript-cluster` and `templates/python-cluster` with clustered/non-clustered tables. > - Add E2E tests (`apps/framework-cli-e2e/test/cluster.test.ts`) for infra map, CH clusters, XML generation, and table creation. > - CI: add `test-e2e-cluster-typescript` and `test-e2e-cluster-python` jobs; adjust path filters. > - **Infra & Misc**: > - Docker compose supports mounting `clickhouse_clusters.xml` when provided. > - **Docs**: > - Update configuration and model-table docs to document clusters, replication modes, and deployment guidance; note in planned migrations. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit a201fdd6f29a05cec6f1fda437730c03a1046e86. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .github/workflows/test-framework-cli.yaml | 142 +++++- apps/framework-cli-e2e/test/cluster.test.ts | 463 ++++++++++++++++++ apps/framework-cli-e2e/test/constants.ts | 4 + apps/framework-cli/src/cli.rs | 6 + .../src/cli/display/infrastructure.rs | 5 + apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 349 ++++++++++--- apps/framework-cli/src/cli/routines/mod.rs | 10 +- .../framework/core/infra_reality_checker.rs | 1 + .../framework/core/infrastructure/table.rs | 6 + .../src/framework/core/infrastructure_map.rs | 9 + .../core/partial_infrastructure_map.rs | 4 + apps/framework-cli/src/framework/core/plan.rs | 111 +++++ .../src/framework/core/plan_validator.rs | 331 +++++++++++++ .../src/framework/data_model/model.rs | 1 + .../src/framework/python/generate.rs | 11 + .../src/framework/typescript/generate.rs | 11 + .../olap/clickhouse/diff_strategy.rs | 181 +++++++ .../infrastructure/olap/clickhouse/mapper.rs | 1 + .../src/infrastructure/olap/clickhouse/mod.rs | 288 +++++++++-- .../infrastructure/olap/clickhouse/model.rs | 2 + .../infrastructure/olap/clickhouse/queries.rs | 381 +++++++++++++- .../src/infrastructure/olap/ddl_ordering.rs | 34 ++ .../src/utilities/migration_plan_schema.json | 9 +- .../src/utilities/prod-docker-compose.yml.hbs | 3 + .../content/moosestack/configuration.mdx | 11 + .../content/moosestack/olap/model-table.mdx | 107 +++- .../llm-docs/python/table-setup.md | 86 ++++ .../llm-docs/typescript/table-setup.md | 74 +++ .../src/pages/moose/configuration.mdx | 11 + .../src/pages/moose/olap/model-table.mdx | 104 +++- .../pages/moose/olap/planned-migrations.mdx | 2 + packages/protobuf/infrastructure_map.proto | 3 + .../py-moose-lib/moose_lib/dmv2/olap_table.py | 29 ++ packages/py-moose-lib/moose_lib/internal.py | 3 + .../tests/test_cluster_validation.py | 86 ++++ packages/ts-moose-lib/src/dmv2/internal.ts | 3 + .../ts-moose-lib/src/dmv2/sdk/olapTable.ts | 21 + .../tests/cluster-validation.test.ts | 121 +++++ templates/python-cluster/README.md | 3 + templates/python-cluster/app/__init__.py | 0 .../python-cluster/app/ingest/__init__.py | 0 templates/python-cluster/app/ingest/models.py | 96 ++++ templates/python-cluster/app/main.py | 1 + templates/python-cluster/moose.config.toml | 62 +++ templates/python-cluster/requirements.txt | 7 + templates/python-cluster/setup.py | 13 + templates/python-cluster/template.config.toml | 22 + templates/typescript-cluster/README.md | 3 + .../typescript-cluster/moose.config.toml | 65 +++ templates/typescript-cluster/package.json | 26 + templates/typescript-cluster/src/index.ts | 1 + .../typescript-cluster/src/ingest/models.ts | 77 +++ .../typescript-cluster/template.config.toml | 22 + templates/typescript-cluster/tsconfig.json | 16 + 55 files changed, 3289 insertions(+), 150 deletions(-) create mode 100644 apps/framework-cli-e2e/test/cluster.test.ts create mode 100644 packages/py-moose-lib/tests/test_cluster_validation.py create mode 100644 packages/ts-moose-lib/tests/cluster-validation.test.ts create mode 100644 templates/python-cluster/README.md create mode 100644 templates/python-cluster/app/__init__.py create mode 100644 templates/python-cluster/app/ingest/__init__.py create mode 100644 templates/python-cluster/app/ingest/models.py create mode 100644 templates/python-cluster/app/main.py create mode 100644 templates/python-cluster/moose.config.toml create mode 100644 templates/python-cluster/requirements.txt create mode 100644 templates/python-cluster/setup.py create mode 100644 templates/python-cluster/template.config.toml create mode 100644 templates/typescript-cluster/README.md create mode 100644 templates/typescript-cluster/moose.config.toml create mode 100644 templates/typescript-cluster/package.json create mode 100644 templates/typescript-cluster/src/index.ts create mode 100644 templates/typescript-cluster/src/ingest/models.ts create mode 100644 templates/typescript-cluster/template.config.toml create mode 100644 templates/typescript-cluster/tsconfig.json diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index b318935553..ef36fb0486 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -43,8 +43,8 @@ jobs: "^\.github/workflows/test-framework-cli\.yaml" "^apps/framework-cli-e2e/" "^apps/framework-cli/" - "^templates/python/" - "^templates/typescript/" + "^templates/python" + "^templates/typescript" "^packages/" "Cargo.lock" "pnpm-lock.yaml" @@ -716,6 +716,138 @@ jobs: run: | cat ~/.moose/*-cli.log + test-e2e-cluster-typescript: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support - TypeScript (Node 20) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Run TypeScript Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "TypeScript Cluster Template" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + + test-e2e-cluster-python: + needs: + [detect-changes, check, test-cli, test-ts-moose-lib, test-py-moose-lib] + if: needs.detect-changes.outputs.should_run == 'true' + name: Test E2E Cluster Support - Python (Python 3.13) + runs-on: ubuntu-latest + permissions: + contents: read + env: + RUST_BACKTRACE: full + steps: + - name: Install Protoc (Needed for Temporal) + uses: arduino/setup-protoc@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + version: "23.x" + + - name: Checkout + uses: actions/checkout@v4 + with: + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.sha }} + + # Login to Docker hub to get higher rate limits when moose pulls images + - name: Login to Docker Hub + uses: ./.github/actions/docker-login + with: + op-service-account-token: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }} + + - uses: pnpm/action-setup@v4 + + - name: Install node + uses: actions/setup-node@v4 + with: + node-version: 20 + cache: "pnpm" + + - name: Get system info + id: system + run: | + echo "version=$(lsb_release -rs)" >> $GITHUB_OUTPUT + echo "distro=$(lsb_release -is)" >> $GITHUB_OUTPUT + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: stable + cache: true + cache-shared-key: ${{ runner.os }}-${{ steps.system.outputs.distro }}-${{ steps.system.outputs.version }}-${{ runner.arch }}-rust + cache-on-failure: true + cache-all-crates: true + cache-workspace-crates: true + + - name: Setup Python 3.13 + uses: actions/setup-python@v4 + with: + python-version: "3.13" + + - name: Upgrade Python build tools + run: pip install --upgrade pip setuptools wheel + + - name: Run Python Cluster E2E Tests + run: pnpm install --frozen-lockfile && pnpm --filter=framework-cli-e2e run test -- --grep "Python Cluster Template" + env: + MOOSE_TELEMETRY_ENABLED: false + + - name: Inspect Logs + if: always() + run: | + cat ~/.moose/*-cli.log + lints: needs: detect-changes if: needs.detect-changes.outputs.should_run == 'true' @@ -778,6 +910,8 @@ jobs: test-e2e-python-tests, test-e2e-backward-compatibility-typescript, test-e2e-backward-compatibility-python, + test-e2e-cluster-typescript, + test-e2e-cluster-python, lints, ] if: always() @@ -807,6 +941,8 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "failure" ]] || \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "failure" ]] || \ [[ "${{ needs.lints.result }}" == "failure" ]]; then echo "One or more required jobs failed" exit 1 @@ -822,6 +958,8 @@ jobs: [[ "${{ needs.test-e2e-python-tests.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "success" ]] && \ + [[ "${{ needs.test-e2e-cluster-python.result }}" == "success" ]] && \ [[ "${{ needs.lints.result }}" == "success" ]]; then echo "All required jobs succeeded" exit 0 diff --git a/apps/framework-cli-e2e/test/cluster.test.ts b/apps/framework-cli-e2e/test/cluster.test.ts new file mode 100644 index 0000000000..8397a75e59 --- /dev/null +++ b/apps/framework-cli-e2e/test/cluster.test.ts @@ -0,0 +1,463 @@ +/// +/// +/// +/** + * Cluster Support E2E Tests + * + * Tests the ON CLUSTER functionality for ClickHouse tables in MooseStack. + * + * The tests verify: + * 1. Tables are created with ON CLUSTER clause when cluster is specified + * 2. ClickHouse clusters are properly configured from moose.config.toml + * 3. cluster_name appears correctly in the infrastructure map + * 4. Mixed environments (some tables with cluster, some without) work correctly + * 5. Both TypeScript and Python SDKs support cluster configuration + * 6. ReplicatedMergeTree with explicit keeper_path/replica_name (no cluster) works correctly + * 7. ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) works correctly + */ + +import { spawn, ChildProcess } from "child_process"; +import { expect } from "chai"; +import * as fs from "fs"; +import * as path from "path"; +import { promisify } from "util"; +import { createClient } from "@clickhouse/client"; + +// Import constants and utilities +import { + TIMEOUTS, + SERVER_CONFIG, + TEMPLATE_NAMES, + APP_NAMES, + CLICKHOUSE_CONFIG, +} from "./constants"; + +import { + waitForServerStart, + createTempTestDirectory, + setupTypeScriptProject, + setupPythonProject, + cleanupTestSuite, + performGlobalCleanup, + cleanupClickhouseData, + waitForInfrastructureReady, +} from "./utils"; + +const execAsync = promisify(require("child_process").exec); + +const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); +const MOOSE_LIB_PATH = path.resolve( + __dirname, + "../../../packages/ts-moose-lib", +); +const MOOSE_PY_LIB_PATH = path.resolve( + __dirname, + "../../../packages/py-moose-lib", +); + +// Admin API key hash for authentication +const TEST_ADMIN_HASH = + "deadbeefdeadbeefdeadbeefdeadbeef.0123456789abcdef0123456789abcdef"; + +/** + * Query ClickHouse to verify cluster configuration + */ +async function verifyClustersInClickHouse( + expectedClusters: string[], +): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + }); + + try { + const result = await client.query({ + query: "SELECT DISTINCT cluster FROM system.clusters ORDER BY cluster", + format: "JSONEachRow", + }); + + const clusters = await result.json<{ cluster: string }>(); + const clusterNames = clusters.map((row) => row.cluster); + + console.log("Clusters found in ClickHouse:", clusterNames); + + for (const expected of expectedClusters) { + expect( + clusterNames, + `Cluster '${expected}' should be configured in ClickHouse`, + ).to.include(expected); + } + } finally { + await client.close(); + } +} + +/** + * Query inframap to verify cluster_name is set correctly + */ +async function verifyInfraMapClusters( + expectedTables: { name: string; cluster: string | null }[], +): Promise { + const response = await fetch(`${SERVER_CONFIG.url}/admin/inframap`, { + headers: { + Authorization: `Bearer ${TEST_ADMIN_HASH}`, + }, + }); + + if (!response.ok) { + const errorText = await response.text(); + throw new Error( + `inframap endpoint returned ${response.status}: ${errorText}`, + ); + } + + const response_data = await response.json(); + console.log("InfraMap response:", JSON.stringify(response_data, null, 2)); + + // Handle both direct format and wrapped format + const infraMap = response_data.infra_map || response_data; + + expect(infraMap.tables, "InfraMap should have tables field").to.exist; + + console.log("InfraMap tables:", Object.keys(infraMap.tables)); + + for (const expectedTable of expectedTables) { + const tableKey = `local_${expectedTable.name}`; + const table = infraMap.tables[tableKey]; + + expect(table, `Table ${expectedTable.name} should exist in inframap`).to + .exist; + + // Normalize undefined to null for comparison (undefined means field not present) + const actualCluster = + table.cluster_name === undefined ? null : table.cluster_name; + expect( + actualCluster, + `Table ${expectedTable.name} should have correct cluster_name`, + ).to.equal(expectedTable.cluster); + } +} + +/** + * Verify that the clickhouse_clusters.xml file was generated + */ +function verifyClusterXmlGenerated(projectDir: string): void { + const clusterXmlPath = path.join( + projectDir, + ".moose/clickhouse_clusters.xml", + ); + + expect( + fs.existsSync(clusterXmlPath), + "clickhouse_clusters.xml should be generated in .moose directory", + ).to.be.true; + + const xmlContent = fs.readFileSync(clusterXmlPath, "utf-8"); + console.log("Generated cluster XML:", xmlContent); + + // Verify XML contains expected cluster definitions + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); + expect(xmlContent).to.include(""); +} + +/** + * Verify table exists in ClickHouse + * + * Note: ON CLUSTER is a DDL execution directive and is NOT stored in the table schema. + * SHOW CREATE TABLE will never display ON CLUSTER, even if it was used during creation. + * To verify cluster support, we rely on: + * 1. The inframap showing cluster_name (preserved in our state) + * 2. The table being successfully created (which would fail if cluster was misconfigured) + */ +async function verifyTableExists(tableName: string): Promise { + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: `SELECT name, engine FROM system.tables WHERE database = '${CLICKHOUSE_CONFIG.database}' AND name = '${tableName}'`, + format: "JSONEachRow", + }); + + const rows = await result.json<{ name: string; engine: string }>(); + expect( + rows.length, + `Table ${tableName} should exist in ClickHouse`, + ).to.equal(1); + console.log(`Table ${tableName} exists with engine: ${rows[0].engine}`); + } finally { + await client.close(); + } +} + +/** + * Configuration for cluster template tests + */ +interface ClusterTestConfig { + language: "typescript" | "python"; + templateName: string; + appName: string; + projectDirSuffix: string; + displayName: string; +} + +const CLUSTER_CONFIGS: ClusterTestConfig[] = [ + { + language: "typescript", + templateName: TEMPLATE_NAMES.TYPESCRIPT_CLUSTER, + appName: APP_NAMES.TYPESCRIPT_CLUSTER, + projectDirSuffix: "ts-cluster", + displayName: "TypeScript Cluster Template", + }, + { + language: "python", + templateName: TEMPLATE_NAMES.PYTHON_CLUSTER, + appName: APP_NAMES.PYTHON_CLUSTER, + projectDirSuffix: "py-cluster", + displayName: "Python Cluster Template", + }, +]; + +/** + * Creates a test suite for a specific cluster template configuration + */ +const createClusterTestSuite = (config: ClusterTestConfig) => { + describe(config.displayName, function () { + let devProcess: ChildProcess | null = null; + let TEST_PROJECT_DIR: string; + + before(async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + // Verify CLI exists + try { + await fs.promises.access(CLI_PATH, fs.constants.F_OK); + } catch (err) { + console.error( + `CLI not found at ${CLI_PATH}. It should be built in the pretest step.`, + ); + throw err; + } + + // Create temporary directory for this test + TEST_PROJECT_DIR = createTempTestDirectory(config.projectDirSuffix); + + // Setup project based on language + if (config.language === "typescript") { + await setupTypeScriptProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_LIB_PATH, + config.appName, + "npm", + ); + } else { + await setupPythonProject( + TEST_PROJECT_DIR, + config.templateName, + CLI_PATH, + MOOSE_PY_LIB_PATH, + config.appName, + ); + } + + // Start dev server + console.log("Starting dev server..."); + const devEnv = + config.language === "python" ? + { + ...process.env, + VIRTUAL_ENV: path.join(TEST_PROJECT_DIR, ".venv"), + PATH: `${path.join(TEST_PROJECT_DIR, ".venv", "bin")}:${process.env.PATH}`, + } + : { ...process.env }; + + devProcess = spawn(CLI_PATH, ["dev"], { + stdio: "pipe", + cwd: TEST_PROJECT_DIR, + env: devEnv, + }); + + await waitForServerStart( + devProcess, + TIMEOUTS.SERVER_STARTUP_MS, + SERVER_CONFIG.startupMessage, + SERVER_CONFIG.url, + ); + console.log("Server started, cleaning up old data..."); + await cleanupClickhouseData(); + console.log("Waiting for infrastructure to be ready..."); + await waitForInfrastructureReady(); + console.log("All components ready, starting tests..."); + }); + + after(async function () { + this.timeout(TIMEOUTS.CLEANUP_MS); + await cleanupTestSuite(devProcess, TEST_PROJECT_DIR, config.appName, { + logPrefix: config.displayName, + }); + }); + + it("should create tables with ON CLUSTER clauses", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify all tables were created in ClickHouse + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: + "SELECT name FROM system.tables WHERE database = 'local' AND name IN ('TableA', 'TableB', 'TableC', 'TableD', 'TableE') ORDER BY name", + format: "JSONEachRow", + }); + + const tables = await result.json<{ name: string }>(); + const tableNames = tables.map((t) => t.name); + + expect(tableNames).to.include("TableA"); + expect(tableNames).to.include("TableB"); + expect(tableNames).to.include("TableC"); + expect(tableNames).to.include("TableD"); + expect(tableNames).to.include("TableE"); + } finally { + await client.close(); + } + }); + + it("should configure ClickHouse clusters from moose.config.toml", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + await verifyClustersInClickHouse(["cluster_a", "cluster_b"]); + }); + + it("should generate clickhouse_clusters.xml file", async function () { + verifyClusterXmlGenerated(TEST_PROJECT_DIR); + }); + + it("should show correct cluster_name in inframap", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + await verifyInfraMapClusters([ + { name: "TableA", cluster: "cluster_a" }, + { name: "TableB", cluster: "cluster_b" }, + { name: "TableC", cluster: null }, + { name: "TableD", cluster: null }, + { name: "TableE", cluster: null }, + ]); + }); + + it("should create tables successfully with cluster configuration", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify tables were created successfully + // (If cluster was misconfigured, table creation would have failed) + await verifyTableExists("TableA"); + await verifyTableExists("TableB"); + await verifyTableExists("TableC"); + await verifyTableExists("TableD"); + await verifyTableExists("TableE"); + }); + + it("should create TableD with explicit keeper args and no cluster", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableD was created with explicit keeper_path and replica_name + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableD", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has explicit keeper path + expect(createStatement).to.include( + "/clickhouse/tables/{database}/{table}", + ); + // Verify it has explicit replica name + expect(createStatement).to.include("{replica}"); + // Verify it does NOT have ON CLUSTER (since no cluster is specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } + }); + + it("should create TableE with auto-injected params (ClickHouse Cloud mode)", async function () { + this.timeout(TIMEOUTS.SCHEMA_VALIDATION_MS); + + // Verify TableE was created with ReplicatedMergeTree and auto-injected params + const client = createClient({ + url: CLICKHOUSE_CONFIG.url, + username: CLICKHOUSE_CONFIG.username, + password: CLICKHOUSE_CONFIG.password, + database: CLICKHOUSE_CONFIG.database, + }); + + try { + const result = await client.query({ + query: "SHOW CREATE TABLE local.TableE", + format: "JSONEachRow", + }); + + const data = await result.json<{ statement: string }>(); + const createStatement = data[0].statement; + + console.log(`TableE CREATE statement: ${createStatement}`); + + // Verify it's ReplicatedMergeTree + expect(createStatement).to.include("ReplicatedMergeTree"); + // Verify it has auto-injected params (Moose injects these in dev mode) + expect(createStatement).to.match(/ReplicatedMergeTree\(/); + // Verify it does NOT have ON CLUSTER (no cluster specified) + expect(createStatement).to.not.include("ON CLUSTER"); + } finally { + await client.close(); + } + }); + }); +}; + +// Global setup to clean Docker state from previous runs (useful for local dev) +// Github hosted runners start with a clean slate. +before(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup( + "Running global setup for cluster tests - cleaning Docker state from previous runs...", + ); +}); + +// Global cleanup to ensure no hanging processes +after(async function () { + this.timeout(TIMEOUTS.GLOBAL_CLEANUP_MS); + await performGlobalCleanup(); +}); + +// Test suite for Cluster Support +describe("Cluster Support E2E Tests", function () { + // Generate test suites for each cluster configuration + CLUSTER_CONFIGS.forEach(createClusterTestSuite); +}); diff --git a/apps/framework-cli-e2e/test/constants.ts b/apps/framework-cli-e2e/test/constants.ts index f6eba08eac..1b1641fd2e 100644 --- a/apps/framework-cli-e2e/test/constants.ts +++ b/apps/framework-cli-e2e/test/constants.ts @@ -96,6 +96,8 @@ export const TEMPLATE_NAMES = { TYPESCRIPT_TESTS: "typescript-tests", PYTHON_DEFAULT: "python", PYTHON_TESTS: "python-tests", + TYPESCRIPT_CLUSTER: "typescript-cluster", + PYTHON_CLUSTER: "python-cluster", } as const; export const APP_NAMES = { @@ -103,4 +105,6 @@ export const APP_NAMES = { TYPESCRIPT_TESTS: "moose-ts-tests-app", PYTHON_DEFAULT: "moose-py-default-app", PYTHON_TESTS: "moose-py-tests-app", + TYPESCRIPT_CLUSTER: "moose-ts-cluster-app", + PYTHON_CLUSTER: "moose-py-cluster-app", } as const; diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index c632407248..87c7e78a0d 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -252,7 +252,13 @@ fn override_project_config_from_url( ) })?; + let clusters = project.clickhouse_config.clusters.clone(); + let additional_databases = project.clickhouse_config.additional_databases.clone(); + project.clickhouse_config = clickhouse_config; + project.clickhouse_config.clusters = clusters; + project.clickhouse_config.additional_databases = additional_databases; + info!( "Overriding project ClickHouse config from CLI: database = {}", project.clickhouse_config.db_name diff --git a/apps/framework-cli/src/cli/display/infrastructure.rs b/apps/framework-cli/src/cli/display/infrastructure.rs index 14859fc927..64769ea83f 100644 --- a/apps/framework-cli/src/cli/display/infrastructure.rs +++ b/apps/framework-cli/src/cli/display/infrastructure.rs @@ -236,6 +236,11 @@ fn format_table_display( details.push(format!("Order by: {}", table.order_by)); } + // Cluster section (if present) + if let Some(ref cluster) = table.cluster_name { + details.push(format!("Cluster: {}", cluster)); + } + // Engine section (if present) if let Some(ref engine) = table.engine { details.push(format!("Engine: {}", Into::::into(engine.clone()))); diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 36ab73027a..87e3a8ed90 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3556,6 +3556,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 5f5bb4da96..8eb2e87bfd 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -6,7 +6,7 @@ use crate::framework::core::infrastructure::table::Table; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::framework::core::migration_plan::MigrationPlan; use crate::framework::core::state_storage::{StateStorage, StateStorageBuilder}; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; use crate::infrastructure::olap::clickhouse::IgnorableOperation; use crate::infrastructure::olap::clickhouse::{ check_ready, create_client, ConfiguredDBClient, SerializableOlapOperation, @@ -196,120 +196,226 @@ fn report_drift(drift: &DriftStatus) { } } -/// Validates that all table databases specified in operations are configured -fn validate_table_databases( +/// Validates that all table databases and clusters specified in operations are configured +fn validate_table_databases_and_clusters( operations: &[SerializableOlapOperation], primary_database: &str, additional_databases: &[String], + clusters: &Option>, ) -> Result<()> { let mut invalid_tables = Vec::new(); - - // Helper to validate a database option - let mut validate_db = |db_opt: &Option, table_name: &str| { + let mut invalid_clusters = Vec::new(); + + // Get configured cluster names + let cluster_names: Vec = clusters + .as_ref() + .map(|cs| cs.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); + + log::info!("Configured cluster names: {:?}", cluster_names); + + // Helper to validate database and cluster options + let mut validate = |db_opt: &Option, cluster_opt: &Option, table_name: &str| { + log::info!( + "Validating table '{}' with cluster: {:?}", + table_name, + cluster_opt + ); + // Validate database if let Some(db) = db_opt { if db != primary_database && !additional_databases.contains(db) { invalid_tables.push((table_name.to_string(), db.clone())); } } + // Validate cluster + if let Some(cluster) = cluster_opt { + log::info!( + "Checking if cluster '{}' is in {:?}", + cluster, + cluster_names + ); + // Fail if cluster is not in the configured list (or if list is empty) + if cluster_names.is_empty() || !cluster_names.contains(cluster) { + log::info!("Cluster '{}' not found in configured clusters!", cluster); + invalid_clusters.push((table_name.to_string(), cluster.clone())); + } + } }; for operation in operations { match operation { SerializableOlapOperation::CreateTable { table } => { - validate_db(&table.database, &table.name); + validate(&table.database, &table.cluster_name, &table.name); } - SerializableOlapOperation::DropTable { table, database } => { - validate_db(database, table); + SerializableOlapOperation::DropTable { + table, + database, + cluster_name, + } => { + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RenameTableColumn { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableSettings { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifyTableTtl { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::AddTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::DropTableIndex { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::ModifySampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RemoveSampleBy { - table, database, .. + table, + database, + cluster_name, + .. } => { - validate_db(database, table); + validate(database, cluster_name, table); } SerializableOlapOperation::RawSql { .. } => { - // RawSql doesn't reference specific tables/databases, skip validation + // RawSql doesn't reference specific tables/databases/clusters, skip validation } } } - if !invalid_tables.is_empty() { - let mut error_message = String::from( - "One or more tables specify databases that are not configured in moose.config.toml:\n\n" - ); + // Build error message if we found any issues + let has_errors = !invalid_tables.is_empty() || !invalid_clusters.is_empty(); + if has_errors { + let mut error_message = String::new(); - for (table_name, database) in &invalid_tables { - error_message.push_str(&format!( - " • Table '{}' specifies database '{}'\n", - table_name, database - )); - } + // Report database errors + if !invalid_tables.is_empty() { + error_message.push_str( + "One or more tables specify databases that are not configured in moose.config.toml:\n\n", + ); - error_message - .push_str("\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n"); - error_message.push_str("[clickhouse_config]\n"); - error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); - error_message.push_str("additional_databases = ["); + for (table_name, database) in &invalid_tables { + error_message.push_str(&format!( + " • Table '{}' specifies database '{}'\n", + table_name, database + )); + } - let mut all_databases: Vec = additional_databases.to_vec(); - for (_, db) in &invalid_tables { - if !all_databases.contains(db) { - all_databases.push(db.clone()); + error_message.push_str( + "\nTo fix this, add the missing database(s) to your moose.config.toml:\n\n", + ); + error_message.push_str("[clickhouse_config]\n"); + error_message.push_str(&format!("db_name = \"{}\"\n", primary_database)); + error_message.push_str("additional_databases = ["); + + let mut all_databases: Vec = additional_databases.to_vec(); + for (_, db) in &invalid_tables { + if !all_databases.contains(db) { + all_databases.push(db.clone()); + } } + all_databases.sort(); + + let db_list = all_databases + .iter() + .map(|db| format!("\"{}\"", db)) + .collect::>() + .join(", "); + error_message.push_str(&db_list); + error_message.push_str("]\n"); } - all_databases.sort(); - let db_list = all_databases - .iter() - .map(|db| format!("\"{}\"", db)) - .collect::>() - .join(", "); - error_message.push_str(&db_list); - error_message.push_str("]\n"); + // Report cluster errors + if !invalid_clusters.is_empty() { + if !invalid_tables.is_empty() { + error_message.push('\n'); + } + + error_message.push_str( + "One or more tables specify clusters that are not configured in moose.config.toml:\n\n", + ); + + for (table_name, cluster) in &invalid_clusters { + error_message.push_str(&format!( + " • Table '{}' specifies cluster '{}'\n", + table_name, cluster + )); + } + + error_message.push_str( + "\nTo fix this, add the missing cluster(s) to your moose.config.toml:\n\n", + ); + + // Only show the missing clusters in the error message, not the already configured ones + let mut missing_clusters: Vec = invalid_clusters + .iter() + .map(|(_, cluster)| cluster.clone()) + .collect(); + missing_clusters.sort(); + missing_clusters.dedup(); + + for cluster in &missing_clusters { + error_message.push_str("[[clickhouse_config.clusters]]\n"); + error_message.push_str(&format!("name = \"{}\"\n\n", cluster)); + } + } anyhow::bail!(error_message); } @@ -341,11 +447,16 @@ async fn execute_operations( migration_plan.operations.len() ); - // Validate that all table databases are configured - validate_table_databases( + // Validate that all table databases and clusters are configured + log::info!( + "Validating operations against config. Clusters: {:?}", + project.clickhouse_config.clusters + ); + validate_table_databases_and_clusters( &migration_plan.operations, &project.clickhouse_config.db_name, &project.clickhouse_config.additional_databases, + &project.clickhouse_config.clusters, )?; let is_dev = !project.is_production; @@ -659,6 +770,7 @@ mod tests { engine_params_hash: None, table_settings: None, table_ttl_setting: None, + cluster_name: None, } } @@ -964,7 +1076,7 @@ mod tests { }]; // Primary database matches - should pass - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_ok()); // Database in additional_databases - should pass @@ -973,7 +1085,12 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table: table_analytics, }]; - let result = validate_table_databases(&operations, "local", &["analytics".to_string()]); + let result = validate_table_databases_and_clusters( + &operations, + "local", + &["analytics".to_string()], + &None, + ); assert!(result.is_ok()); } @@ -985,7 +1102,7 @@ mod tests { let operations = vec![SerializableOlapOperation::CreateTable { table }]; // Database not in config - should fail - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); assert!(err.contains("unconfigured_db")); @@ -999,6 +1116,7 @@ mod tests { SerializableOlapOperation::DropTable { table: "test".to_string(), database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::AddTableColumn { table: "test".to_string(), @@ -1015,6 +1133,7 @@ mod tests { }, after_column: None, database: Some("bad_db".to_string()), + cluster_name: None, }, SerializableOlapOperation::ModifyTableColumn { table: "test".to_string(), @@ -1041,10 +1160,11 @@ mod tests { ttl: None, }, database: Some("another_bad_db".to_string()), + cluster_name: None, }, ]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_err()); let err = result.unwrap_err().to_string(); // Should report both bad databases @@ -1060,7 +1180,108 @@ mod tests { description: "test".to_string(), }]; - let result = validate_table_databases(&operations, "local", &[]); + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); assert!(result.is_ok()); } + + #[test] + fn test_validate_cluster_valid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("my_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { + table: table.clone(), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // Cluster is configured - should pass + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_cluster_invalid() { + let mut table = create_test_table("users"); + table.cluster_name = Some("unconfigured_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ + ClusterConfig { + name: "my_cluster".to_string(), + }, + ClusterConfig { + name: "another_cluster".to_string(), + }, + ]); + + // Cluster not in config - should fail and show available clusters + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("unconfigured_cluster"), + "Error should mention the invalid cluster" + ); + assert!( + err.contains("moose.config.toml"), + "Error should reference config file" + ); + } + + #[test] + fn test_validate_cluster_no_clusters_configured() { + let mut table = create_test_table("users"); + table.cluster_name = Some("some_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + // No clusters configured but table references one - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &None); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("some_cluster")); + } + + #[test] + fn test_validate_both_database_and_cluster_invalid() { + let mut table = create_test_table("users"); + table.database = Some("bad_db".to_string()); + table.cluster_name = Some("bad_cluster".to_string()); + + let operations = vec![SerializableOlapOperation::CreateTable { table }]; + + let clusters = Some(vec![ClusterConfig { + name: "good_cluster".to_string(), + }]); + + // Both database and cluster invalid - should report both errors + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("bad_db")); + assert!(err.contains("bad_cluster")); + } + + #[test] + fn test_validate_cluster_in_drop_table_operation() { + let operations = vec![SerializableOlapOperation::DropTable { + table: "users".to_string(), + database: None, + cluster_name: Some("unconfigured_cluster".to_string()), + }]; + + let clusters = Some(vec![ClusterConfig { + name: "my_cluster".to_string(), + }]); + + // DropTable with invalid cluster - should fail + let result = validate_table_databases_and_clusters(&operations, "local", &[], &clusters); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("unconfigured_cluster")); + } } diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 49492b11c8..38a49280b8 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -1181,8 +1181,16 @@ pub async fn remote_gen_migration( }, ); + // Validate the plan before generating migration files + let plan = InfraPlan { + target_infra_map: local_infra_map.clone(), + changes, + }; + + plan_validator::validate(project, &plan)?; + let db_migration = - MigrationPlan::from_infra_plan(&changes, &project.clickhouse_config.db_name)?; + MigrationPlan::from_infra_plan(&plan.changes, &project.clickhouse_config.db_name)?; Ok(MigrationPlanWithBeforeAfter { remote_state: remote_infra_map, diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index bf665073fb..80b1f6458b 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -420,6 +420,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index f27f6bbcdf..03e358cbdb 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -299,6 +299,9 @@ pub struct Table { /// Table-level TTL expression (without leading 'TTL') #[serde(skip_serializing_if = "Option::is_none", default)] pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support in ClickHouse + #[serde(skip_serializing_if = "Option::is_none", default)] + pub cluster_name: Option, } impl Table { @@ -472,6 +475,7 @@ impl Table { .or_else(|| self.compute_non_alterable_params_hash()), table_settings: self.table_settings.clone().unwrap_or_default(), table_ttl_setting: self.table_ttl_setting.clone(), + cluster_name: self.cluster_name.clone(), metadata: MessageField::from_option(self.metadata.as_ref().map(|m| { infrastructure_map::Metadata { description: m.description.clone().unwrap_or_default(), @@ -578,6 +582,7 @@ impl Table { .collect(), database: proto.database, table_ttl_setting: proto.table_ttl_setting, + cluster_name: proto.cluster_name, } } } @@ -1643,6 +1648,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; assert_eq!(table1.id(DEFAULT_DATABASE_NAME), "local_users"); diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index d5be6a00a4..622b20de46 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1854,6 +1854,11 @@ impl InfrastructureMap { // Detect engine change (e.g., MergeTree -> ReplacingMergeTree) let engine_changed = table.engine != target_table.engine; + // Note: We intentionally do NOT check for cluster_name changes here. + // cluster_name is a deployment directive (how to run DDL), not a schema property. + // The inframap will be updated with the new cluster_name value, and future DDL + // operations will use it, but changing cluster_name doesn't trigger operations. + let order_by_change = if order_by_changed { OrderByChange { before: table.order_by.clone(), @@ -1894,6 +1899,7 @@ impl InfrastructureMap { // since ClickHouse requires the full column definition when modifying TTL // Only process changes if there are actual differences to report + // Note: cluster_name changes are intentionally excluded - they don't trigger operations if !column_changes.is_empty() || order_by_changed || partition_by_changed @@ -3017,6 +3023,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let after = Table { @@ -3072,6 +3079,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let diff = compute_table_columns_diff(&before, &after); @@ -3247,6 +3255,7 @@ mod diff_tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index 2ec987f84c..0359633d6b 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -256,6 +256,9 @@ struct PartialTable { /// Optional database name for multi-database support #[serde(default)] pub database: Option, + /// Optional cluster name for ON CLUSTER support + #[serde(default)] + pub cluster: Option, } /// Represents a topic definition from user code before it's converted into a complete [`Topic`]. @@ -699,6 +702,7 @@ impl PartialInfrastructureMap { indexes: partial_table.indexes.clone(), table_ttl_setting, database: partial_table.database.clone(), + cluster_name: partial_table.cluster.clone(), }; Ok((table.id(default_database), table)) }) diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index ee87431c67..76b8b3e7c8 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -172,6 +172,12 @@ pub async fn reconcile_with_reality( // that might have authentication parameters. table.engine_params_hash = infra_map_table.engine_params_hash.clone(); + // Keep the cluster_name from the infra map because it cannot be reliably detected + // from ClickHouse's system tables. The ON CLUSTER clause is only used during + // DDL execution and is not stored in the table schema. While it appears in + // system.distributed_ddl_queue, those entries are ephemeral and get cleaned up. + table.cluster_name = infra_map_table.cluster_name.clone(); + reconciled_map .tables .insert(reality_table.id(&reconciled_map.default_database), table); @@ -456,6 +462,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } @@ -917,4 +924,108 @@ mod tests { // but they don't directly use clickhouse_config.db_name. // The bug in ENG-1160 is specifically about default_database being hardcoded to "local". } + + #[tokio::test] + async fn test_reconcile_preserves_cluster_name() { + // Create a test table with a cluster name + let mut table = create_test_table("clustered_table"); + table.cluster_name = Some("test_cluster".to_string()); + + // Create mock OLAP client with the table (but cluster_name will be lost in reality) + let mut table_from_reality = table.clone(); + table_from_reality.cluster_name = None; // ClickHouse system.tables doesn't preserve this + + let mock_client = MockOlapClient { + tables: vec![table_from_reality], + }; + + // Create infrastructure map with the table including cluster_name + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(table.id(DEFAULT_DATABASE_NAME), table.clone()); + + // Create test project + let project = create_test_project(); + + let target_table_names = HashSet::new(); + // Reconcile the infrastructure map + let reconciled = + reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) + .await + .unwrap(); + + // The reconciled map should preserve cluster_name from the infra map + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + assert_eq!( + reconciled_table.cluster_name, + Some("test_cluster".to_string()), + "cluster_name should be preserved from infra map" + ); + } + + #[tokio::test] + async fn test_reconcile_with_reality_mismatched_table_preserves_cluster() { + // Create a table that exists in both places but with different schemas + let mut infra_table = create_test_table("mismatched_table"); + infra_table.cluster_name = Some("production_cluster".to_string()); + + let mut reality_table = create_test_table("mismatched_table"); + // Reality table has no cluster_name (as ClickHouse doesn't preserve it) + reality_table.cluster_name = None; + // Add a column difference to make them mismatched + reality_table + .columns + .push(crate::framework::core::infrastructure::table::Column { + name: "extra_col".to_string(), + data_type: crate::framework::core::infrastructure::table::ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }); + + // Create mock OLAP client with the reality table + let mock_client = MockOlapClient { + tables: vec![reality_table.clone()], + }; + + // Create infrastructure map with the infra table + let mut infra_map = InfrastructureMap::default(); + infra_map + .tables + .insert(infra_table.id(DEFAULT_DATABASE_NAME), infra_table.clone()); + + // Create test project + let project = create_test_project(); + + let target_table_names = HashSet::new(); + // Reconcile the infrastructure map + let reconciled = + reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) + .await + .unwrap(); + + // The reconciled map should still have the table + assert_eq!(reconciled.tables.len(), 1); + let reconciled_table = reconciled.tables.values().next().unwrap(); + + // The cluster_name should be preserved from the infra map + assert_eq!( + reconciled_table.cluster_name, + Some("production_cluster".to_string()), + "cluster_name should be preserved from infra map even when schema differs" + ); + + // But the columns should be updated from reality + assert_eq!( + reconciled_table.columns.len(), + reality_table.columns.len(), + "columns should be updated from reality" + ); + } } diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 65f4862187..7863ec27d6 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -10,11 +10,68 @@ pub enum ValidationError { #[error("Table validation failed: {0}")] TableValidation(String), + + #[error("Cluster validation failed: {0}")] + ClusterValidation(String), +} + +/// Validates that all tables with cluster_name reference clusters defined in the config +fn validate_cluster_references(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { + let defined_clusters = project.clickhouse_config.clusters.as_ref(); + + // Get all cluster names from the defined clusters + let cluster_names: Vec = defined_clusters + .map(|clusters| clusters.iter().map(|c| c.name.clone()).collect()) + .unwrap_or_default(); + + // Check all tables in the target infrastructure map + for table in plan.target_infra_map.tables.values() { + if let Some(cluster_name) = &table.cluster_name { + // If table has a cluster_name, verify it's defined in the config + if cluster_names.is_empty() { + // No clusters defined in config but table references one + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', but no clusters are defined in moose.config.toml.\n\ + \n\ + To fix this, add the cluster definition to your config:\n\ + \n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n", + table.name, cluster_name, cluster_name + ))); + } else if !cluster_names.contains(cluster_name) { + // Table references a cluster that's not defined + return Err(ValidationError::ClusterValidation(format!( + "Table '{}' references cluster '{}', which is not defined in moose.config.toml.\n\ + \n\ + Available clusters: {}\n\ + \n\ + To fix this, either:\n\ + 1. Add the cluster to your config:\n\ + [[clickhouse_config.clusters]]\n\ + name = \"{}\"\n\ + \n\ + 2. Or change the table to use an existing cluster: {}\n", + table.name, + cluster_name, + cluster_names.join(", "), + cluster_name, + cluster_names.join(", ") + ))); + } + // Cluster is defined, continue validation + } + } + + Ok(()) } pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationError> { stream::validate_changes(project, &plan.changes.streaming_engine_changes)?; + // Validate cluster references + validate_cluster_references(project, plan)?; + // Check for validation errors in OLAP changes for change in &plan.changes.olap_changes { if let OlapChange::Table(TableChange::ValidationError { message, .. }) = change { @@ -24,3 +81,277 @@ pub fn validate(project: &Project, plan: &InfraPlan) -> Result<(), ValidationErr Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy, Table}; + use crate::framework::core::infrastructure_map::{ + InfrastructureMap, PrimitiveSignature, PrimitiveTypes, + }; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::framework::core::plan::InfraPlan; + use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; + use crate::project::{Project, ProjectFeatures}; + use std::collections::HashMap; + use std::path::PathBuf; + + fn create_test_project(clusters: Option>) -> Project { + Project { + language: crate::framework::languages::SupportedLanguages::Typescript, + redpanda_config: crate::infrastructure::stream::kafka::models::KafkaConfig::default(), + clickhouse_config: ClickHouseConfig { + db_name: "local".to_string(), + user: "default".to_string(), + password: "".to_string(), + use_ssl: false, + host: "localhost".to_string(), + host_port: 18123, + native_port: 9000, + host_data_path: None, + additional_databases: vec![], + clusters, + }, + http_server_config: crate::cli::local_webserver::LocalWebserverConfig::default(), + redis_config: crate::infrastructure::redis::redis_client::RedisConfig::default(), + git_config: crate::utilities::git::GitConfig::default(), + temporal_config: + crate::infrastructure::orchestration::temporal::TemporalConfig::default(), + state_config: crate::project::StateConfig::default(), + migration_config: crate::project::MigrationConfig::default(), + language_project_config: crate::project::LanguageProjectConfig::default(), + project_location: PathBuf::from("/test"), + is_production: false, + supported_old_versions: HashMap::new(), + jwt: None, + authentication: crate::project::AuthenticationConfig::default(), + features: ProjectFeatures::default(), + load_infra: None, + typescript_config: crate::project::TypescriptConfig::default(), + source_dir: crate::project::default_source_dir(), + } + } + + fn create_test_table(name: &str, cluster_name: Option) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: None, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + } + } + + fn create_test_plan(tables: Vec) -> InfraPlan { + let mut table_map = HashMap::new(); + for table in tables { + table_map.insert(format!("local_{}", table.name), table); + } + + InfraPlan { + target_infra_map: InfrastructureMap { + default_database: "local".to_string(), + tables: table_map, + topics: HashMap::new(), + api_endpoints: HashMap::new(), + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: crate::framework::core::infrastructure::olap_process::OlapProcess {}, + consumption_api_web_server: crate::framework::core::infrastructure::consumption_webserver::ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + }, + changes: Default::default(), + } + } + + #[test] + fn test_validate_no_clusters_defined_but_table_references_one() { + let project = create_test_project(None); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + assert!(msg.contains("no clusters are defined")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_undefined_cluster() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table = create_test_table("test_table", Some("cluster_c".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("cluster_c")); + assert!(msg.contains("cluster_a")); + assert!(msg.contains("cluster_b")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + #[test] + fn test_validate_table_references_valid_cluster() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_table_with_no_cluster_is_allowed() { + let project = create_test_project(Some(vec![ClusterConfig { + name: "test_cluster".to_string(), + }])); + let table = create_test_table("test_table", None); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_multiple_tables_different_clusters() { + let project = create_test_project(Some(vec![ + ClusterConfig { + name: "cluster_a".to_string(), + }, + ClusterConfig { + name: "cluster_b".to_string(), + }, + ])); + let table1 = create_test_table("table1", Some("cluster_a".to_string())); + let table2 = create_test_table("table2", Some("cluster_b".to_string())); + let plan = create_test_plan(vec![table1, table2]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } + + #[test] + fn test_validate_empty_clusters_list() { + let project = create_test_project(Some(vec![])); + let table = create_test_table("test_table", Some("test_cluster".to_string())); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_err()); + match result { + Err(ValidationError::ClusterValidation(msg)) => { + assert!(msg.contains("test_table")); + assert!(msg.contains("test_cluster")); + } + _ => panic!("Expected ClusterValidation error"), + } + } + + // Helper to create a table with a specific engine + fn create_table_with_engine( + name: &str, + cluster_name: Option, + engine: Option, + ) -> Table { + Table { + name: name.to_string(), + columns: vec![Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine, + version: Some(Version::from_string("1.0.0".to_string())), + source_primitive: PrimitiveSignature { + name: name.to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name, + } + } + + #[test] + fn test_non_replicated_engine_without_cluster_succeeds() { + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + + let project = create_test_project(None); + let table = create_table_with_engine("test_table", None, Some(ClickhouseEngine::MergeTree)); + let plan = create_test_plan(vec![table]); + + let result = validate(&project, &plan); + + assert!(result.is_ok()); + } +} diff --git a/apps/framework-cli/src/framework/data_model/model.rs b/apps/framework-cli/src/framework/data_model/model.rs index f8918853d1..c4b2ac1435 100644 --- a/apps/framework-cli/src/framework/data_model/model.rs +++ b/apps/framework-cli/src/framework/data_model/model.rs @@ -70,6 +70,7 @@ impl DataModel { indexes: vec![], database: None, // Database defaults to global config table_ttl_setting: None, + cluster_name: None, }; // Compute hash that includes both engine params and database diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index b97842eac8..4cdc3f653f 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -1065,6 +1065,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1158,6 +1159,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1276,6 +1278,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1354,6 +1357,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1411,6 +1415,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1479,6 +1484,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1550,6 +1556,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1632,6 +1639,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1696,6 +1704,7 @@ user_table = OlapTable[User]("User", OlapConfig( ], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1760,6 +1769,7 @@ user_table = OlapTable[User]("User", OlapConfig( table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); @@ -1813,6 +1823,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_python(&tables, None); diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index 7e9e556260..3a89d3645f 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -1002,6 +1002,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1083,6 +1084,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1133,6 +1135,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1202,6 +1205,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1246,6 +1250,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1322,6 +1327,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1384,6 +1390,7 @@ export const UserTable = new OlapTable("User", { ], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1454,6 +1461,7 @@ export const UserTable = new OlapTable("User", { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1531,6 +1539,7 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1597,6 +1606,7 @@ export const TaskTable = new OlapTable("Task", { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); @@ -1644,6 +1654,7 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], database: Some("analytics_db".to_string()), table_ttl_setting: None, + cluster_name: None, }]; let result = tables_to_typescript(&tables, None); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index da040b6700..4e27e2e2b3 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -472,6 +472,12 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { })]; } + // Note: cluster_name changes are intentionally NOT treated as requiring drop+create. + // cluster_name is a deployment directive (how to run DDL) rather than a schema property + // (what the table looks like). When cluster_name changes, future DDL operations will + // automatically use the new cluster_name via the ON CLUSTER clause, but the table + // itself doesn't need to be recreated. + // Check if PARTITION BY has changed let partition_by_changed = partition_by_change.before != partition_by_change.after; if partition_by_changed { @@ -687,6 +693,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, } } @@ -1493,6 +1500,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; assert!(ClickHouseTableDiffStrategy::is_s3queue_table(&s3_table)); @@ -1621,4 +1629,177 @@ mod tests { error_msg.contains("INSERT INTO target_db.my_table SELECT * FROM source_db.my_table") ); } + + #[test] + fn test_cluster_change_from_none_to_some() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from None to Some + before.cluster_name = None; + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_cluster_change_from_some_to_none() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from Some to None + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = None; + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_cluster_change_between_different_clusters() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change cluster from one to another + before.cluster_name = Some("cluster_a".to_string()); + after.cluster_name = Some("cluster_b".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // cluster_name is a deployment directive, not a schema property + // Changing it should not trigger any operations + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_no_cluster_change_both_none() { + let strategy = ClickHouseTableDiffStrategy; + + let before = create_test_table("test", vec!["id".to_string()], false); + let after = create_test_table("test", vec!["id".to_string()], false); + + // Both None - no cluster change + assert_eq!(before.cluster_name, None); + assert_eq!(after.cluster_name, None); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_no_cluster_change_both_same() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both have the same cluster + before.cluster_name = Some("test_cluster".to_string()); + after.cluster_name = Some("test_cluster".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should not trigger a validation error - no changes at all + assert_eq!(changes.len(), 0); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 9cf81a5c1a..a22012beb3 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -354,6 +354,7 @@ pub fn std_table_to_clickhouse_table(table: &Table) -> Result, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Add a column to a table AddTableColumn { @@ -129,6 +131,8 @@ pub enum SerializableOlapOperation { after_column: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Drop a column from a table DropTableColumn { @@ -138,6 +142,8 @@ pub enum SerializableOlapOperation { column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify a column in a table ModifyTableColumn { @@ -149,6 +155,8 @@ pub enum SerializableOlapOperation { after_column: Column, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RenameTableColumn { /// The table containing the column @@ -159,6 +167,8 @@ pub enum SerializableOlapOperation { after_column_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify table settings using ALTER TABLE MODIFY SETTING ModifyTableSettings { @@ -170,6 +180,8 @@ pub enum SerializableOlapOperation { after_settings: Option>, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, /// Modify or remove table-level TTL ModifyTableTtl { @@ -178,29 +190,39 @@ pub enum SerializableOlapOperation { after: Option, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, AddTableIndex { table: String, index: TableIndex, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, DropTableIndex { table: String, index_name: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, ModifySampleBy { table: String, expression: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RemoveSampleBy { table: String, /// The database containing the table (None means use primary database) database: Option, + /// Optional cluster name for ON CLUSTER support + cluster_name: Option, }, RawSql { /// The SQL statements to execute @@ -436,41 +458,78 @@ pub async fn execute_atomic_operation( SerializableOlapOperation::CreateTable { table } => { execute_create_table(db_name, table, client, is_dev).await?; } - SerializableOlapOperation::DropTable { table, database } => { - execute_drop_table(db_name, table, database.as_deref(), client).await?; + SerializableOlapOperation::DropTable { + table, + database, + cluster_name, + } => { + execute_drop_table( + db_name, + table, + database.as_deref(), + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::AddTableColumn { table, column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_column(target_db, table, column, after_column, client).await?; + execute_add_table_column( + target_db, + table, + column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::DropTableColumn { table, column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_column(target_db, table, column_name, client).await?; + execute_drop_table_column( + target_db, + table, + column_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifyTableColumn { table, before_column, after_column, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_table_column(target_db, table, before_column, after_column, client) - .await?; + execute_modify_table_column( + target_db, + table, + before_column, + after_column, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::RenameTableColumn { table, before_column_name, after_column_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_rename_table_column( @@ -478,6 +537,7 @@ pub async fn execute_atomic_operation( table, before_column_name, after_column_name, + cluster_name.as_deref(), client, ) .await?; @@ -487,6 +547,7 @@ pub async fn execute_atomic_operation( before_settings, after_settings, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); execute_modify_table_settings( @@ -494,6 +555,7 @@ pub async fn execute_atomic_operation( table, before_settings, after_settings, + cluster_name.as_deref(), client, ) .await?; @@ -503,16 +565,24 @@ pub async fn execute_atomic_operation( before: _, after, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); // Build ALTER TABLE ... [REMOVE TTL | MODIFY TTL expr] + let cluster_clause = cluster_name + .as_ref() + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = if let Some(expr) = after { format!( - "ALTER TABLE `{}`.`{}` MODIFY TTL {}", - target_db, table, expr + "ALTER TABLE `{}`.`{}`{} MODIFY TTL {}", + target_db, table, cluster_clause, expr ) } else { - format!("ALTER TABLE `{}`.`{}` REMOVE TTL", target_db, table) + format!( + "ALTER TABLE `{}`.`{}`{} REMOVE TTL", + target_db, table, cluster_clause + ) }; run_query(&sql, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { @@ -525,29 +595,51 @@ pub async fn execute_atomic_operation( table, index, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_add_table_index(target_db, table, index, client).await?; + execute_add_table_index(target_db, table, index, cluster_name.as_deref(), client) + .await?; } SerializableOlapOperation::DropTableIndex { table, index_name, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_drop_table_index(target_db, table, index_name, client).await?; + execute_drop_table_index( + target_db, + table, + index_name, + cluster_name.as_deref(), + client, + ) + .await?; } SerializableOlapOperation::ModifySampleBy { table, expression, database, + cluster_name, } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_modify_sample_by(target_db, table, expression, client).await?; + execute_modify_sample_by( + target_db, + table, + expression, + cluster_name.as_deref(), + client, + ) + .await?; } - SerializableOlapOperation::RemoveSampleBy { table, database } => { + SerializableOlapOperation::RemoveSampleBy { + table, + database, + cluster_name, + } => { let target_db = database.as_deref().unwrap_or(db_name); - execute_remove_sample_by(target_db, table, client).await?; + execute_remove_sample_by(target_db, table, cluster_name.as_deref(), client).await?; } SerializableOlapOperation::RawSql { sql, description } => { execute_raw_sql(sql, description, client).await?; @@ -580,6 +672,7 @@ async fn execute_add_table_index( db_name: &str, table_name: &str, index: &TableIndex, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { let args = if index.arguments.is_empty() { @@ -587,10 +680,14 @@ async fn execute_add_table_index( } else { format!("({})", index.arguments.join(", ")) }; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", + "ALTER TABLE `{}`.`{}`{} ADD INDEX `{}` {} TYPE {}{} GRANULARITY {}", db_name, table_name, + cluster_clause, index.name, index.expression, index.index_type, @@ -609,11 +706,15 @@ async fn execute_drop_table_index( db_name: &str, table_name: &str, index_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` DROP INDEX `{}`", - db_name, table_name, index_name + "ALTER TABLE `{}`.`{}`{} DROP INDEX `{}`", + db_name, table_name, cluster_clause, index_name ); run_query(&sql, client) .await @@ -627,11 +728,15 @@ async fn execute_modify_sample_by( db_name: &str, table_name: &str, expression: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` MODIFY SAMPLE BY {}", - db_name, table_name, expression + "ALTER TABLE `{}`.`{}`{} MODIFY SAMPLE BY {}", + db_name, table_name, cluster_clause, expression ); run_query(&sql, client) .await @@ -644,11 +749,15 @@ async fn execute_modify_sample_by( async fn execute_remove_sample_by( db_name: &str, table_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let sql = format!( - "ALTER TABLE `{}`.`{}` REMOVE SAMPLE BY", - db_name, table_name + "ALTER TABLE `{}`.`{}`{} REMOVE SAMPLE BY", + db_name, table_name, cluster_clause ); run_query(&sql, client) .await @@ -662,12 +771,13 @@ async fn execute_drop_table( db_name: &str, table_name: &str, table_database: Option<&str>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!("Executing DropTable: {:?}", table_name); // Use table's database if specified, otherwise use global database let target_database = table_database.unwrap_or(db_name); - let drop_query = drop_table_query(target_database, table_name)?; + let drop_query = drop_table_query(target_database, table_name, cluster_name)?; run_query(&drop_query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -687,6 +797,7 @@ async fn execute_add_table_column( table_name: &str, column: &Column, after_column: &Option, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -705,10 +816,15 @@ async fn execute_add_table_column( .map(|d| format!(" DEFAULT {}", d)) .unwrap_or_default(); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let add_column_query = format!( - "ALTER TABLE `{}`.`{}` ADD COLUMN `{}` {}{} {}", + "ALTER TABLE `{}`.`{}`{} ADD COLUMN `{}` {}{} {}", db_name, table_name, + cluster_clause, clickhouse_column.name, column_type_string, default_clause, @@ -731,6 +847,7 @@ async fn execute_drop_table_column( db_name: &str, table_name: &str, column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -738,9 +855,12 @@ async fn execute_drop_table_column( table_name, column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let drop_column_query = format!( - "ALTER TABLE `{}`.`{}` DROP COLUMN IF EXISTS `{}`", - db_name, table_name, column_name + "ALTER TABLE `{}`.`{}`{} DROP COLUMN IF EXISTS `{}`", + db_name, table_name, cluster_clause, column_name ); log::debug!("Dropping column: {}", drop_column_query); run_query(&drop_column_query, client).await.map_err(|e| { @@ -763,6 +883,7 @@ async fn execute_modify_table_column( table_name: &str, before_column: &Column, after_column: &Column, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { // Check if only the comment has changed @@ -790,11 +911,26 @@ async fn execute_modify_table_column( let clickhouse_column = std_column_to_clickhouse_column(after_column.clone())?; if let Some(ref comment) = clickhouse_column.comment { - execute_modify_column_comment(db_name, table_name, after_column, comment, client) - .await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + comment, + cluster_name, + client, + ) + .await?; } else { // If the new comment is None, we still need to update to remove the old comment - execute_modify_column_comment(db_name, table_name, after_column, "", client).await?; + execute_modify_column_comment( + db_name, + table_name, + after_column, + "", + cluster_name, + client, + ) + .await?; } return Ok(()); } @@ -820,6 +956,7 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ &clickhouse_column, removing_default, removing_ttl, + cluster_name, )?; // Execute all statements in order @@ -845,6 +982,7 @@ async fn execute_modify_column_comment( table_name: &str, column: &Column, comment: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -854,7 +992,7 @@ async fn execute_modify_column_comment( ); let modify_comment_query = - build_modify_column_comment_sql(db_name, table_name, &column.name, comment)?; + build_modify_column_comment_sql(db_name, table_name, &column.name, comment, cluster_name)?; log::debug!("Modifying column comment: {}", modify_comment_query); run_query(&modify_comment_query, client) @@ -872,25 +1010,30 @@ fn build_modify_column_sql( ch_col: &ClickHouseColumn, removing_default: bool, removing_ttl: bool, + cluster_name: Option<&str>, ) -> Result, ClickhouseChangesError> { let column_type_string = basic_field_type_to_string(&ch_col.column_type)?; + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + let mut statements = vec![]; // Add REMOVE DEFAULT statement if needed // ClickHouse doesn't allow mixing column properties with REMOVE clauses if removing_default { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE DEFAULT", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE DEFAULT", + db_name, table_name, cluster_clause, ch_col.name )); } // Add REMOVE TTL statement if needed if removing_ttl { statements.push(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` REMOVE TTL", - db_name, table_name, ch_col.name + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` REMOVE TTL", + db_name, table_name, cluster_clause, ch_col.name )); } @@ -919,9 +1062,10 @@ fn build_modify_column_sql( let main_sql = if let Some(ref comment) = ch_col.comment { let escaped_comment = comment.replace('\'', "''"); format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{} COMMENT '{}'", db_name, table_name, + cluster_clause, ch_col.name, column_type_string, default_clause, @@ -930,8 +1074,14 @@ fn build_modify_column_sql( ) } else { format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN IF EXISTS `{}` {}{}{}", - db_name, table_name, ch_col.name, column_type_string, default_clause, ttl_clause + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN IF EXISTS `{}` {}{}{}", + db_name, + table_name, + cluster_clause, + ch_col.name, + column_type_string, + default_clause, + ttl_clause ) }; statements.push(main_sql); @@ -944,11 +1094,15 @@ fn build_modify_column_comment_sql( table_name: &str, column_name: &str, comment: &str, + cluster_name: Option<&str>, ) -> Result { let escaped_comment = comment.replace('\'', "''"); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); Ok(format!( - "ALTER TABLE `{}`.`{}` MODIFY COLUMN `{}` COMMENT '{}'", - db_name, table_name, column_name, escaped_comment + "ALTER TABLE `{}`.`{}`{} MODIFY COLUMN `{}` COMMENT '{}'", + db_name, table_name, cluster_clause, column_name, escaped_comment )) } @@ -958,6 +1112,7 @@ async fn execute_modify_table_settings( table_name: &str, before_settings: &Option>, after_settings: &Option>, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { use std::collections::HashMap; @@ -990,8 +1145,12 @@ async fn execute_modify_table_settings( // Execute MODIFY SETTING if there are settings to modify if !settings_to_modify.is_empty() { - let alter_settings_query = - alter_table_modify_settings_query(db_name, table_name, &settings_to_modify)?; + let alter_settings_query = alter_table_modify_settings_query( + db_name, + table_name, + &settings_to_modify, + cluster_name, + )?; log::debug!("Modifying table settings: {}", alter_settings_query); run_query(&alter_settings_query, client) @@ -1004,8 +1163,12 @@ async fn execute_modify_table_settings( // Execute RESET SETTING if there are settings to reset if !settings_to_reset.is_empty() { - let reset_settings_query = - alter_table_reset_settings_query(db_name, table_name, &settings_to_reset)?; + let reset_settings_query = alter_table_reset_settings_query( + db_name, + table_name, + &settings_to_reset, + cluster_name, + )?; log::debug!("Resetting table settings: {}", reset_settings_query); run_query(&reset_settings_query, client) @@ -1025,6 +1188,7 @@ async fn execute_rename_table_column( table_name: &str, before_column_name: &str, after_column_name: &str, + cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { log::info!( @@ -1033,8 +1197,11 @@ async fn execute_rename_table_column( before_column_name, after_column_name ); + let cluster_clause = cluster_name + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); let rename_column_query = format!( - "ALTER TABLE `{db_name}`.`{table_name}` RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" + "ALTER TABLE `{db_name}`.`{table_name}`{cluster_clause} RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" ); log::debug!("Renaming column: {}", rename_column_query); run_query(&rename_column_query, client).await.map_err(|e| { @@ -1719,6 +1886,7 @@ impl OlapOperations for ConfiguredDBClient { debug!("Could not extract engine from CREATE TABLE query, falling back to system.tables engine column"); engine.as_str().try_into().ok() }; + let engine_params_hash = engine_parsed .as_ref() .map(|e: &ClickhouseEngine| e.non_alterable_params_hash()); @@ -1765,6 +1933,10 @@ impl OlapOperations for ConfiguredDBClient { indexes, database: Some(database), table_ttl_setting, + // cluster_name is always None from introspection because ClickHouse doesn't store + // the ON CLUSTER clause - it's only used during DDL execution and isn't persisted + // in system tables. Users must manually specify cluster in their table configs. + cluster_name: None, }; debug!("Created table object: {:?}", table); @@ -2283,7 +2455,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let ch_after = std_column_to_clickhouse_column(after_column).unwrap(); - let sqls = build_modify_column_sql("db", "table", &ch_after, false, false).unwrap(); + let sqls = build_modify_column_sql("db", "table", &ch_after, false, false, None).unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2315,8 +2487,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; // Use the pure SQL builder for comment-only update - let sql = - build_modify_column_comment_sql("db", "table", &after_column.name, "new").unwrap(); + let sql = build_modify_column_comment_sql("db", "table", &after_column.name, "new", None) + .unwrap(); assert_eq!( sql, "ALTER TABLE `db`.`table` MODIFY COLUMN `status` COMMENT 'new'" @@ -2344,7 +2516,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra let clickhouse_column = std_column_to_clickhouse_column(column).unwrap(); let sqls = - build_modify_column_sql("test_db", "users", &clickhouse_column, false, false).unwrap(); + build_modify_column_sql("test_db", "users", &clickhouse_column, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); assert_eq!( @@ -2370,8 +2543,15 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &sample_hash_col, false, false) - .unwrap(); + let sqls = build_modify_column_sql( + "test_db", + "test_table", + &sample_hash_col, + false, + false, + None, + ) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures xxHash64(_id) is NOT quoted - if it were quoted, ClickHouse would treat it as a string literal @@ -2392,8 +2572,9 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ttl: None, }; - let sqls = build_modify_column_sql("test_db", "test_table", &created_at_col, false, false) - .unwrap(); + let sqls = + build_modify_column_sql("test_db", "test_table", &created_at_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // The fix ensures now() is NOT quoted @@ -2415,7 +2596,8 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra }; let sqls = - build_modify_column_sql("test_db", "test_table", &status_col, false, false).unwrap(); + build_modify_column_sql("test_db", "test_table", &status_col, false, false, None) + .unwrap(); assert_eq!(sqls.len(), 1); // String literals should preserve their quotes @@ -2739,6 +2921,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), }; @@ -2804,6 +2987,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra table_settings: None, indexes: vec![], database: None, + cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), }; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index a51e6c656f..fbc134ea65 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -657,6 +657,8 @@ pub struct ClickHouseTable { pub indexes: Vec, /// Optional TTL expression at table level (without leading 'TTL') pub table_ttl_setting: Option, + /// Optional cluster name for ON CLUSTER support + pub cluster_name: Option, } impl ClickHouseTable { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 7bd972a70f..ab4fceaae6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -121,7 +121,8 @@ pub fn create_alias_for_table( } static CREATE_TABLE_TEMPLATE: &str = r#" -CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}` +CREATE TABLE IF NOT EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} +ON CLUSTER {{cluster_name}}{{/if}} ( {{#each fields}} `{{field_name}}` {{{field_type}}} {{field_nullable}}{{#if field_default}} DEFAULT {{{field_default}}}{{/if}}{{#if field_comment}} COMMENT '{{{field_comment}}}'{{/if}}{{#if field_ttl}} TTL {{{field_ttl}}}{{/if}}{{#unless @last}}, {{/unless}}{{/each}}{{#if has_indexes}}, {{#each indexes}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} @@ -2011,12 +2012,14 @@ fn build_summing_merge_tree_ddl(columns: &Option>) -> String { /// Build replication parameters for replicated engines /// /// When keeper_path and replica_name are None: -/// - In dev mode: Injects default parameters for local development using a static table name hash -/// - In production: Returns empty parameters to let ClickHouse use automatic configuration -/// (ClickHouse Cloud or server-configured defaults) +/// - Dev without cluster: Injects table_name-based paths (ON CLUSTER absent, {uuid} won't work) +/// - Dev with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod with cluster: Returns empty params (ON CLUSTER present, ClickHouse uses {uuid}) +/// - Prod without cluster: Returns empty params (ClickHouse Cloud handles defaults) fn build_replication_params( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, engine_name: &str, table_name: &str, is_dev: bool, @@ -2026,18 +2029,20 @@ fn build_replication_params( Ok(vec![format!("'{}'", path), format!("'{}'", name)]) } (None, None) => { - if is_dev { - // In dev mode, inject default parameters for local ClickHouse - // Use table name to ensure unique paths per table, avoiding conflicts + // The {uuid} macro only works with ON CLUSTER queries + // Only dev without cluster needs explicit params + if is_dev && cluster_name.is_none() { + // Dev mode without cluster: inject table_name-based paths // {shard}, {replica}, and {database} macros are configured in docker-compose - // Note: {uuid} macro only works with ON CLUSTER queries, so we use table name instead Ok(vec![ format!("'/clickhouse/tables/{{database}}/{{shard}}/{}'", table_name), "'{replica}'".to_string(), ]) } else { - // In production, return empty parameters - let ClickHouse handle defaults - // This works for ClickHouse Cloud and properly configured servers + // All other cases: return empty parameters + // - Dev with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod with cluster: ON CLUSTER present → ClickHouse uses {uuid} + // - Prod without cluster: ClickHouse Cloud handles defaults Ok(vec![]) } } @@ -2054,12 +2059,14 @@ fn build_replication_params( fn build_replicated_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedMergeTree", table_name, is_dev, @@ -2068,9 +2075,11 @@ fn build_replicated_merge_tree_ddl( } /// Generate DDL for ReplicatedReplacingMergeTree engine +#[allow(clippy::too_many_arguments)] fn build_replicated_replacing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, ver: &Option, is_deleted: &Option, order_by_empty: bool, @@ -2093,6 +2102,7 @@ fn build_replicated_replacing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedReplacingMergeTree", table_name, is_dev, @@ -2115,12 +2125,14 @@ fn build_replicated_replacing_merge_tree_ddl( fn build_replicated_aggregating_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, table_name: &str, is_dev: bool, ) -> Result { let params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedAggregatingMergeTree", table_name, is_dev, @@ -2135,6 +2147,7 @@ fn build_replicated_aggregating_merge_tree_ddl( fn build_replicated_summing_merge_tree_ddl( keeper_path: &Option, replica_name: &Option, + cluster_name: &Option, columns: &Option>, table_name: &str, is_dev: bool, @@ -2142,6 +2155,7 @@ fn build_replicated_summing_merge_tree_ddl( let mut params = build_replication_params( keeper_path, replica_name, + cluster_name, "ReplicatedSummingMergeTree", table_name, is_dev, @@ -2181,7 +2195,13 @@ pub fn create_table_query( ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name, - } => build_replicated_merge_tree_ddl(keeper_path, replica_name, &table.name, is_dev)?, + } => build_replicated_merge_tree_ddl( + keeper_path, + replica_name, + &table.cluster_name, + &table.name, + is_dev, + )?, ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, @@ -2190,6 +2210,7 @@ pub fn create_table_query( } => build_replicated_replacing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, ver, is_deleted, table.order_by.is_empty(), @@ -2202,6 +2223,7 @@ pub fn create_table_query( } => build_replicated_aggregating_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, &table.name, is_dev, )?, @@ -2212,6 +2234,7 @@ pub fn create_table_query( } => build_replicated_summing_merge_tree_ddl( keeper_path, replica_name, + &table.cluster_name, columns, &table.name, is_dev, @@ -2409,6 +2432,7 @@ pub fn create_table_query( let template_context = json!({ "db_name": db_name, "table_name": table.name, + "cluster_name": table.cluster_name.as_deref(), "fields": builds_field_context(&table.columns)?, "has_fields": !table.columns.is_empty(), "has_indexes": has_indexes, @@ -2439,28 +2463,33 @@ pub fn create_table_query( } pub static DROP_TABLE_TEMPLATE: &str = r#" -DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`; +DROP TABLE IF EXISTS `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}} SYNC{{/if}}; "#; -pub fn drop_table_query(db_name: &str, table_name: &str) -> Result { +pub fn drop_table_query( + db_name: &str, + table_name: &str, + cluster_name: Option<&str>, +) -> Result { let mut reg = Handlebars::new(); reg.register_escape_fn(no_escape); let context = json!({ "db_name": db_name, "table_name": table_name, + "cluster_name": cluster_name, }); Ok(reg.render_template(DROP_TABLE_TEMPLATE, &context)?) } pub static ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} MODIFY SETTING {{settings}}; "#; pub static ALTER_TABLE_RESET_SETTINGS_TEMPLATE: &str = r#" -ALTER TABLE `{{db_name}}`.`{{table_name}}` +ALTER TABLE `{{db_name}}`.`{{table_name}}`{{#if cluster_name}} ON CLUSTER {{cluster_name}}{{/if}} RESET SETTING {{settings}}; "#; @@ -2469,6 +2498,7 @@ pub fn alter_table_modify_settings_query( db_name: &str, table_name: &str, settings: &std::collections::HashMap, + cluster_name: Option<&str>, ) -> Result { if settings.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2495,6 +2525,7 @@ pub fn alter_table_modify_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_MODIFY_SETTINGS_TEMPLATE, &context)?) @@ -2505,6 +2536,7 @@ pub fn alter_table_reset_settings_query( db_name: &str, table_name: &str, setting_names: &[String], + cluster_name: Option<&str>, ) -> Result { if setting_names.is_empty() { return Err(ClickhouseError::InvalidParameters { @@ -2521,6 +2553,7 @@ pub fn alter_table_reset_settings_query( "db_name": db_name, "table_name": table_name, "settings": settings_str, + "cluster_name": cluster_name, }); Ok(reg.render_template(ALTER_TABLE_RESET_SETTINGS_TEMPLATE, &context)?) @@ -2917,6 +2950,7 @@ mod tests { table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2954,6 +2988,7 @@ PRIMARY KEY (`id`) table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -2990,6 +3025,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3049,6 +3085,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3089,6 +3126,7 @@ ENGINE = MergeTree table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3128,6 +3166,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let result = create_table_query("test_db", table, false); @@ -3174,6 +3213,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3236,6 +3276,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3277,6 +3318,7 @@ ORDER BY (`id`) "#; table_settings: None, table_ttl_setting: None, indexes: vec![], + cluster_name: None, }; let result = create_table_query("test_db", table, false); @@ -3432,6 +3474,7 @@ ORDER BY (`id`) "#; table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3497,6 +3540,7 @@ ORDER BY (`id`) "#; table_settings: Some(settings), indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3969,6 +4013,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q table_settings: None, indexes: vec![], table_ttl_setting: None, + cluster_name: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -4485,4 +4530,310 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected ReplacingMergeTree"), } } + + #[test] + fn test_create_table_with_cluster_includes_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::ReplicatedMergeTree { + keeper_path: None, + replica_name: None, + }, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: Some("test_cluster".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "Query should contain ON CLUSTER clause" + ); + + // ON CLUSTER should come after CREATE TABLE but before column definitions + let create_idx = query.find("CREATE TABLE").unwrap(); + let on_cluster_idx = query.find("ON CLUSTER").unwrap(); + let engine_idx = query.find("ENGINE").unwrap(); + + assert!( + create_idx < on_cluster_idx && on_cluster_idx < engine_idx, + "ON CLUSTER should be between CREATE TABLE and ENGINE" + ); + } + + #[test] + fn test_create_table_without_cluster_no_on_cluster() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "id".to_string(), + column_type: ClickHouseColumnType::ClickhouseInt(ClickHouseInt::Int32), + required: true, + primary_key: true, + unique: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec![]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + }; + + let query = create_table_query("test_db", table, false).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "Query should not contain ON CLUSTER clause when cluster_name is None" + ); + } + + #[test] + fn test_drop_table_with_cluster() { + let cluster_name = Some("test_cluster"); + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should include ON CLUSTER clause + assert!( + query.contains("ON CLUSTER test_cluster"), + "DROP query should contain ON CLUSTER clause" + ); + + // Should have SYNC (when using ON CLUSTER) + assert!( + query.contains("SYNC"), + "DROP query should contain SYNC with ON CLUSTER" + ); + + // Should have DROP TABLE + assert!(query.contains("DROP TABLE")); + } + + #[test] + fn test_drop_table_without_cluster() { + let cluster_name = None; + let query = drop_table_query("test_db", "test_table", cluster_name).unwrap(); + + // Should NOT include ON CLUSTER clause + assert!( + !query.contains("ON CLUSTER"), + "DROP query should not contain ON CLUSTER clause when cluster_name is None" + ); + + // Should NOT have SYNC (only needed with ON CLUSTER) + assert!( + !query.contains("SYNC"), + "DROP query should not contain SYNC without ON CLUSTER" + ); + + // Should still have DROP TABLE + assert!(query.contains("DROP TABLE")); + } + + #[test] + fn test_alter_table_modify_setting_with_cluster() { + use std::collections::HashMap; + + let mut settings = HashMap::new(); + settings.insert("index_granularity".to_string(), "4096".to_string()); + settings.insert("ttl_only_drop_parts".to_string(), "1".to_string()); + + let query = alter_table_modify_settings_query( + "test_db", + "test_table", + &settings, + Some("test_cluster"), + ) + .unwrap(); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "MODIFY SETTING query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("MODIFY SETTING")); + } + + #[test] + fn test_alter_table_add_column_with_cluster() { + let column = ClickHouseColumn { + name: "new_col".to_string(), + column_type: ClickHouseColumnType::String, + required: false, + primary_key: false, + unique: false, + default: None, + comment: None, + ttl: None, + }; + + let cluster_clause = Some("test_cluster") + .map(|c| format!(" ON CLUSTER {}", c)) + .unwrap_or_default(); + + let query = format!( + "ALTER TABLE `test_db`.`test_table`{} ADD COLUMN `{}` String FIRST", + cluster_clause, column.name + ); + + assert!( + query.contains("ON CLUSTER test_cluster"), + "ADD COLUMN query should contain ON CLUSTER clause" + ); + assert!(query.contains("ALTER TABLE")); + assert!(query.contains("ADD COLUMN")); + } + + #[test] + fn test_replication_params_dev_no_cluster_no_keeper_args_auto_injects() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should auto-inject params in dev mode + assert_eq!(params.len(), 2); + assert!(params[0].contains("/clickhouse/tables/")); + assert!(params[1].contains("{replica}")); + } + + #[test] + fn test_replication_params_dev_with_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Dev with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_dev_no_cluster_with_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &None, + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_no_cluster_no_keeper_args_succeeds() { + let result = build_replication_params( + &None, + &None, + &None, + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should return empty params for ClickHouse Cloud + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_dev_with_cluster_and_keeper_args_succeeds() { + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &Some("{replica}".to_string()), + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, // is_dev + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Should use explicit params, not auto-inject + assert_eq!(params.len(), 2); + assert_eq!(params[0], "'/clickhouse/tables/{database}/{table}'"); + assert_eq!(params[1], "'{replica}'"); + } + + #[test] + fn test_replication_params_prod_with_cluster_no_keeper_args_empty() { + let result = build_replication_params( + &None, + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + false, // is_dev = false (production) + ); + + assert!(result.is_ok()); + let params = result.unwrap(); + // Prod with cluster: should return empty params (let CH use {uuid} with ON CLUSTER) + assert_eq!(params.len(), 0); + } + + #[test] + fn test_replication_params_mismatched_keeper_args_fails() { + // Only keeper_path, no replica_name + let result = build_replication_params( + &Some("/clickhouse/tables/{database}/{table}".to_string()), + &None, + &Some("test_cluster".to_string()), + "ReplicatedMergeTree", + "test_table", + true, + ); + + assert!(result.is_err()); + let err = result.unwrap_err(); + match err { + ClickhouseError::InvalidParameters { message } => { + assert!(message.contains("requires both keeper_path and replica_name")); + } + _ => panic!("Expected InvalidParameters error"), + } + } } diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 493ec82196..f5237e175a 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -181,6 +181,7 @@ impl AtomicOlapOperation { } => SerializableOlapOperation::DropTable { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableColumn { table, @@ -192,6 +193,7 @@ impl AtomicOlapOperation { column: column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::DropTableColumn { table, @@ -201,6 +203,7 @@ impl AtomicOlapOperation { table: table.name.clone(), column_name: column_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableColumn { table, @@ -212,6 +215,7 @@ impl AtomicOlapOperation { before_column: before_column.clone(), after_column: after_column.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableSettings { table, @@ -223,6 +227,7 @@ impl AtomicOlapOperation { before_settings: before_settings.clone(), after_settings: after_settings.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifyTableTtl { table, @@ -234,12 +239,14 @@ impl AtomicOlapOperation { before: before.clone(), after: after.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::AddTableIndex { table, index, .. } => { SerializableOlapOperation::AddTableIndex { table: table.name.clone(), index: index.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::DropTableIndex { @@ -248,6 +255,7 @@ impl AtomicOlapOperation { table: table.name.clone(), index_name: index_name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::ModifySampleBy { table, expression, .. @@ -255,11 +263,13 @@ impl AtomicOlapOperation { table: table.name.clone(), expression: expression.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), }, AtomicOlapOperation::RemoveSampleBy { table, .. } => { SerializableOlapOperation::RemoveSampleBy { table: table.name.clone(), database: table.database.clone(), + cluster_name: table.cluster_name.clone(), } } AtomicOlapOperation::PopulateMaterializedView { @@ -1311,6 +1321,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create some atomic operations @@ -1385,6 +1396,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - depends on table A @@ -1407,6 +1419,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create view C - depends on table B @@ -1501,6 +1514,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -1523,6 +1537,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create view C - depends on table B @@ -1637,6 +1652,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let view = View { @@ -1793,6 +1809,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -1814,6 +1831,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_c = Table { @@ -1835,6 +1853,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Test operations @@ -1925,6 +1944,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -1946,6 +1966,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_c = Table { @@ -1967,6 +1988,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_d = Table { @@ -1988,6 +2010,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_e = Table { @@ -2009,6 +2032,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let op_create_a = AtomicOlapOperation::CreateTable { @@ -2162,6 +2186,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -2184,6 +2209,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for a materialized view @@ -2305,6 +2331,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create table B - target for materialized view @@ -2327,6 +2354,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for a materialized view @@ -2453,6 +2481,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let table_b = Table { @@ -2474,6 +2503,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create SQL resource for materialized view @@ -2680,6 +2710,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create a column @@ -2789,6 +2820,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create operations with signatures that work with the current implementation @@ -2908,6 +2940,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; let after_table = Table { @@ -2952,6 +2985,7 @@ mod tests { indexes: vec![], database: None, table_ttl_setting: None, + cluster_name: None, }; // Create column changes (remove old_column, add new_column) diff --git a/apps/framework-cli/src/utilities/migration_plan_schema.json b/apps/framework-cli/src/utilities/migration_plan_schema.json index 85ba7da9f4..a9794af48c 100644 --- a/apps/framework-cli/src/utilities/migration_plan_schema.json +++ b/apps/framework-cli/src/utilities/migration_plan_schema.json @@ -201,8 +201,13 @@ } }, "engine": { - "type": ["string", "null"], - "default": null + "anyOf": [ + { "type": "string" }, + { "type": "object" }, + { "type": "null" } + ], + "default": null, + "description": "Table engine configuration. Can be a simple string (e.g., 'MergeTree') or an object for complex engines (e.g., ReplicatedMergeTree with parameters)" }, "version": { "anyOf": [ diff --git a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs index 1dcaf89134..823862a003 100644 --- a/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs +++ b/apps/framework-cli/src/utilities/prod-docker-compose.yml.hbs @@ -151,6 +151,9 @@ services: {{/if}} - clickhouse-0-logs:/var/log/clickhouse-server/ - clickhouse-0-users:/etc/clickhouse-server/users.d +{{#if clickhouse_clusters_file}} + - "{{clickhouse_clusters_file}}:/etc/clickhouse-server/config.d/clusters.xml:ro" +{{/if}} environment: - CLICKHOUSE_DB=${DB_NAME:-local} - CLICKHOUSE_USER=${CLICKHOUSE_USER:-panda} diff --git a/apps/framework-docs-v2/content/moosestack/configuration.mdx b/apps/framework-docs-v2/content/moosestack/configuration.mdx index 9b8a03d18b..2236777903 100644 --- a/apps/framework-docs-v2/content/moosestack/configuration.mdx +++ b/apps/framework-docs-v2/content/moosestack/configuration.mdx @@ -234,6 +234,17 @@ native_port = 9000 # Optional list of additional databases to create on startup (Default: []) # additional_databases = ["analytics", "staging"] +# ClickHouse cluster configuration for replicated tables (optional) +# Define clusters for use with ON CLUSTER DDL operations and distributed tables +# In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# +# Note: Cluster names are deployment directives that control HOW Moose runs DDL (via ON CLUSTER), +# not schema properties. Changing cluster names in your table configs won't trigger table recreation. +# [[clickhouse_config.clusters]] +# name = "default" +# [[clickhouse_config.clusters]] +# name = "my_cluster" + # HTTP server configuration for local development [http_server_config] # Host to bind the webserver to (Default: "localhost") diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index 865fcdc908..b0f98c4c28 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -1070,18 +1070,111 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( - -The `keeper_path` and `replica_name` parameters are **optional** for replicated engines: +##### Configuring Replication -- **Omit both parameters** (recommended): Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments. The default path pattern `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}` works automatically with Atomic databases (default in modern ClickHouse). - -- **Provide custom paths**: You can still specify both parameters explicitly if you need custom replication paths for your self-managed cluster. +Replicated engines support three configuration approaches. Choose the one that fits your deployment: -**Note**: Both parameters must be provided together, or both omitted. The `{uuid}`, `{shard}`, and `{replica}` macros are automatically substituted by ClickHouse at runtime. +###### Default -For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). +Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + + +```ts filename="DefaultReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeper_path, replica_name, or cluster needed +}); +``` + + +```py filename="DefaultReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +###### Cluster + +For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: + + + +```ts filename="ClusterReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + + +```py filename="ClusterReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + cluster="default" # References cluster from moose.config.toml +)) +``` + + + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**Use when:** +- Running multi-node self-managed ClickHouse with cluster configuration +- Need `ON CLUSTER` DDL for distributed operations + +###### Replication Paths + +For custom replication topology, specify both `keeper_path` and `replica_name`: + + + +```ts filename="ExplicitReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/my_table", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + +```py filename="ExplicitReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/my_table", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + + +**Use when:** +- Need custom replication paths for advanced configurations +- Both parameters must be provided together + + +**Cannot mix approaches:** Specifying both `cluster` and explicit `keeper_path`/`replica_name` will cause an error. Choose one approach. + +**Cluster is a deployment directive:** Changing `cluster` won't recreate your table—it only affects future DDL operations. +For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). + ### Irregular column names and Python Aliases diff --git a/apps/framework-docs/llm-docs/python/table-setup.md b/apps/framework-docs/llm-docs/python/table-setup.md index 81820cc42c..9bbb2b1e1e 100644 --- a/apps/framework-docs/llm-docs/python/table-setup.md +++ b/apps/framework-docs/llm-docs/python/table-setup.md @@ -404,6 +404,92 @@ Available replicated engines: - `ReplicatedAggregatingMergeTreeEngine` - Replicated with aggregation - `ReplicatedSummingMergeTreeEngine` - Replicated with summation +### Cluster-Aware Replicated Tables + +For multi-node ClickHouse deployments, you can specify a cluster name to use `ON CLUSTER` DDL operations: + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import ReplicatedMergeTreeEngine +from pydantic import BaseModel +from datetime import datetime + +class ReplicatedData(BaseModel): + id: str + data: str + timestamp: datetime + +# Replicated table on a cluster +clustered_table = OlapTable[ReplicatedData]( + "ClusteredTable", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="default" # References cluster from moose.config.toml + ) +) +``` + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**When to omit all parameters (recommended):** +- ✅ **ClickHouse Cloud** - Platform manages replication automatically +- ✅ **Local development** - Moose auto-injects params: `/clickhouse/tables/{database}/{shard}/{table_name}` +- ✅ **Most production deployments** - Works out of the box + +**When to use `cluster`:** +- ✅ Multi-node self-managed ClickHouse with cluster configuration +- ✅ Need `ON CLUSTER` DDL for distributed operations +- ✅ Works without explicit `keeper_path`/`replica_name` parameters + +**When to use explicit `keeper_path`/`replica_name`:** +- ✅ Custom replication topology required +- ✅ Advanced ZooKeeper/Keeper configuration +- ✅ Specific self-managed deployment requirements + +**Important:** Cannot specify both `cluster` and explicit `keeper_path`/`replica_name` - choose one approach. + +**Local Development:** Moose configures cluster names to point to your local ClickHouse instance, letting you develop with `ON CLUSTER` DDL without running multiple nodes. + +**Production:** Cluster names must match your ClickHouse `remote_servers` configuration. + +#### Understanding `cluster` as a Deployment Directive + +The `cluster` field is a **deployment directive** that controls HOW Moose runs DDL operations, not WHAT the table looks like: + +- **Changing `cluster` won't recreate your table** - it only affects future DDL operations (CREATE, ALTER, etc.) +- **ClickHouse doesn't store cluster information** - the `ON CLUSTER` clause is only used during DDL execution +- **`moose init --from-remote` & `moose db pull` cannot detect cluster names** - ClickHouse system tables don't preserve this information + +**If you're importing existing tables that were created with `ON CLUSTER`:** +1. Run `moose init --from-remote` to generate your table definitions +2. Manually add `cluster="your_cluster_name"` to the generated table configs +3. Future migrations and DDL operations will correctly use `ON CLUSTER` + +**Example workflow:** +```python +# After moose init --from-remote generates this: +my_table = OlapTable[MySchema]( + "MyTable", + OlapConfig( + order_by_fields=["id"] + ) +) + +# Manually add cluster if you know it was created with ON CLUSTER: +my_table = OlapTable[MySchema]( + "MyTable", + OlapConfig( + order_by_fields=["id"], + cluster="my_cluster" # Add this line + ) +) +``` + ### S3Queue Engine Tables The S3Queue engine enables automatic processing of files from S3 buckets as they arrive. diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index e28f14c84e..59714c4a45 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -237,6 +237,80 @@ export const ReplicatedDedup = new OlapTable("ReplicatedDedup" **Note**: The `keeperPath` and `replicaName` parameters are optional: - **Self-managed ClickHouse**: Both parameters are required for configuring ZooKeeper/ClickHouse Keeper paths - **ClickHouse Cloud / Boreal**: Omit both parameters - the platform manages replication automatically + +### Cluster-Aware Replicated Tables + +For multi-node ClickHouse deployments, you can specify a cluster name to use `ON CLUSTER` DDL operations: + +```typescript +import { OlapTable, ClickHouseEngines, Key } from '@514labs/moose-lib'; + +interface ReplicatedSchema { + id: Key; + data: string; + timestamp: Date; +} + +// Replicated table on a cluster +export const ClusteredTable = new OlapTable("ClusteredTable", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**When to omit all parameters (recommended):** +- ✅ **ClickHouse Cloud** - Platform manages replication automatically +- ✅ **Local development** - Moose auto-injects params: `/clickhouse/tables/{database}/{shard}/{table_name}` +- ✅ **Most production deployments** - Works out of the box + +**When to use `cluster`:** +- ✅ Multi-node self-managed ClickHouse with cluster configuration +- ✅ Need `ON CLUSTER` DDL for distributed operations +- ✅ Works without explicit `keeperPath`/`replicaName` parameters + +**When to use explicit `keeperPath`/`replicaName`:** +- ✅ Custom replication topology required +- ✅ Advanced ZooKeeper/Keeper configuration +- ✅ Specific self-managed deployment requirements + +**Important:** Cannot specify both `cluster` and explicit `keeperPath`/`replicaName` - choose one approach. + +**Local Development:** Moose configures cluster names to point to your local ClickHouse instance, letting you develop with `ON CLUSTER` DDL without running multiple nodes. + +**Production:** Cluster names must match your ClickHouse `remote_servers` configuration. + +#### Understanding `cluster` as a Deployment Directive + +The `cluster` field is a **deployment directive** that controls HOW Moose runs DDL operations, not WHAT the table looks like: + +- **Changing `cluster` won't recreate your table** - it only affects future DDL operations (CREATE, ALTER, etc.) +- **ClickHouse doesn't store cluster information** - the `ON CLUSTER` clause is only used during DDL execution +- **`moose init --from-remote` & `moose db pull` cannot detect cluster names** - ClickHouse system tables don't preserve this information + +**If you're importing existing tables that were created with `ON CLUSTER`:** +1. Run `moose init --from-remote` to generate your table definitions +2. Manually add `cluster: "your_cluster_name"` to the generated table configs +3. Future migrations and DDL operations will correctly use `ON CLUSTER` + +**Example workflow:** +```typescript +// After moose init --from-remote generates this: +export const MyTable = new OlapTable("MyTable", { + orderByFields: ["id"] +}); + +// Manually add cluster if you know it was created with ON CLUSTER: +export const MyTable = new OlapTable("MyTable", { + orderByFields: ["id"], + cluster: "my_cluster" // Add this line +}); ``` ### S3Queue Engine Tables diff --git a/apps/framework-docs/src/pages/moose/configuration.mdx b/apps/framework-docs/src/pages/moose/configuration.mdx index d271969c23..92fcd7416e 100644 --- a/apps/framework-docs/src/pages/moose/configuration.mdx +++ b/apps/framework-docs/src/pages/moose/configuration.mdx @@ -243,6 +243,17 @@ native_port = 9000 # Optional list of additional databases to create on startup (Default: []) # additional_databases = ["analytics", "staging"] +# ClickHouse cluster configuration for replicated tables (optional) +# Define clusters for use with ON CLUSTER DDL operations and distributed tables +# In local dev, Moose creates single-node clusters. In production, names must match your ClickHouse remote_servers config. +# +# Note: Cluster names are deployment directives that control HOW Moose runs DDL (via ON CLUSTER), +# not schema properties. Changing cluster names in your table configs won't trigger table recreation. +# [[clickhouse_config.clusters]] +# name = "default" +# [[clickhouse_config.clusters]] +# name = "my_cluster" + # HTTP server configuration for local development [http_server_config] # Host to bind the webserver to (Default: "localhost") diff --git a/apps/framework-docs/src/pages/moose/olap/model-table.mdx b/apps/framework-docs/src/pages/moose/olap/model-table.mdx index 300fcc55b8..a4082712f6 100644 --- a/apps/framework-docs/src/pages/moose/olap/model-table.mdx +++ b/apps/framework-docs/src/pages/moose/olap/model-table.mdx @@ -1050,18 +1050,108 @@ cloud_replicated = OlapTable[Record]("cloud_records", OlapConfig( ``` - -The `keeper_path` and `replica_name` parameters are **optional** for replicated engines: +##### Configuring Replication -- **Omit both parameters** (recommended): Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments. The default path pattern `/clickhouse/tables/{uuid}/{shard}` with replica `{replica}` works automatically with Atomic databases (default in modern ClickHouse). - -- **Provide custom paths**: You can still specify both parameters explicitly if you need custom replication paths for your self-managed cluster. +Replicated engines support three configuration approaches. Choose the one that fits your deployment: -**Note**: Both parameters must be provided together, or both omitted. The `{uuid}`, `{shard}`, and `{replica}` macros are automatically substituted by ClickHouse at runtime. +###### Default -For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). +Omit all replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + +```ts filename="DefaultReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeper_path, replica_name, or cluster needed +}); +``` + + + +```py filename="DefaultReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +###### Cluster + +For multi-node deployments, specify a cluster name to use `ON CLUSTER` DDL operations: + + +```ts filename="ClusterReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "default" // References cluster from moose.config.toml +}); +``` + + + +```py filename="ClusterReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + cluster="default" # References cluster from moose.config.toml +)) +``` + + +**Configuration in `moose.config.toml`:** +```toml +[[clickhouse_config.clusters]] +name = "default" +``` + +**Use when:** +- Running multi-node self-managed ClickHouse with cluster configuration +- Need `ON CLUSTER` DDL for distributed operations + +###### Replication Paths + +For custom replication topology, specify both `keeper_path` and `replica_name`: + + +```ts filename="ExplicitReplication.ts" copy +const table = new OlapTable("my_table", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/my_table", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + + +```py filename="ExplicitReplication.py" copy +table = OlapTable[Record]("my_table", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/my_table", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + +**Use when:** +- Need custom replication paths for advanced configurations +- Both parameters must be provided together + + +**Cannot mix approaches:** Specifying both `cluster` and explicit `keeper_path`/`replica_name` will cause an error. Choose one approach. + +**Cluster is a deployment directive:** Changing `cluster` won't recreate your table -— it only affects future DDL operations. +For more details, see the [ClickHouse documentation on data replication](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication). + ### Irregular column names and Python Aliases diff --git a/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx b/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx index 5ccea10388..cb31d39a09 100644 --- a/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx +++ b/apps/framework-docs/src/pages/moose/olap/planned-migrations.mdx @@ -84,6 +84,8 @@ You will commit the entire `migrations/` directory to version control, and Moose Moose makes some assumptions about your schema changes, such as renaming a column instead of dropping and adding. You can modify the plan to override these assumptions. + +Note: The `cluster` field controls which ClickHouse cluster Moose uses for `ON CLUSTER` DDL operations. It's a deployment directive, not a schema property, so changing it won't trigger table recreation. Open `plan.yaml` in your PR. Operations are ordered (teardown first, then setup) to avoid dependency issues. Review like regular code. You can also edit the plan to override the default assumptions Moose makes. diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index 3fea0ba852..36cdaa2920 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -151,6 +151,9 @@ message Table { // Optional database name for multi-database support // When not specified, uses the global ClickHouse config database optional string database = 17; + + // Optional cluster name for ON CLUSTER support in ClickHouse + optional string cluster_name = 18; } // Structured representation of ORDER BY to support either explicit fields diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index d5abfdf5f4..16d4097c7a 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -121,6 +121,10 @@ class OlapConfig(BaseModel): life_cycle: Determines how changes in code will propagate to the resources. settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING. These are alterable settings that can be changed without recreating the table. + cluster: Optional cluster name for ON CLUSTER support in ClickHouse. + Use this to enable replicated tables across ClickHouse clusters. + The cluster must be defined in moose.config.toml (dev environment only). + Example: cluster="prod_cluster" """ order_by_fields: list[str] = [] order_by_expression: Optional[str] = None @@ -133,6 +137,8 @@ class OlapConfig(BaseModel): settings: Optional[dict[str, str]] = None # Optional table-level TTL expression (without leading 'TTL') ttl: Optional[str] = None + # Optional cluster name for ON CLUSTER support in ClickHouse + cluster: Optional[str] = None # Optional secondary/data-skipping indexes class TableIndex(BaseModel): @@ -228,6 +234,29 @@ def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs): ) _tables[registry_key] = self + # Validate cluster and explicit replication params are not both specified + if config.cluster: + from moose_lib.blocks import ( + ReplicatedMergeTreeEngine, + ReplicatedReplacingMergeTreeEngine, + ReplicatedAggregatingMergeTreeEngine, + ReplicatedSummingMergeTreeEngine, + ) + + if isinstance(config.engine, ( + ReplicatedMergeTreeEngine, + ReplicatedReplacingMergeTreeEngine, + ReplicatedAggregatingMergeTreeEngine, + ReplicatedSummingMergeTreeEngine, + )): + if config.engine.keeper_path is not None or config.engine.replica_name is not None: + raise ValueError( + f"OlapTable {name}: Cannot specify both 'cluster' and explicit replication params " + f"('keeper_path' or 'replica_name'). " + f"Use 'cluster' for auto-injected params, or use explicit 'keeper_path' and " + f"'replica_name' without 'cluster'." + ) + # Check if using legacy enum-based engine configuration if config.engine and isinstance(config.engine, ClickHouseEngines): logger = Logger(action="OlapTable") diff --git a/packages/py-moose-lib/moose_lib/internal.py b/packages/py-moose-lib/moose_lib/internal.py index ab4e806fa9..1bd6ab7fba 100644 --- a/packages/py-moose-lib/moose_lib/internal.py +++ b/packages/py-moose-lib/moose_lib/internal.py @@ -198,6 +198,7 @@ class TableConfig(BaseModel): metadata: Optional metadata for the table. life_cycle: Lifecycle management setting for the table. table_settings: Optional table-level settings that can be modified with ALTER TABLE MODIFY SETTING. + cluster: Optional cluster name for ON CLUSTER support in ClickHouse. """ model_config = model_config @@ -214,6 +215,7 @@ class TableConfig(BaseModel): indexes: list[OlapConfig.TableIndex] = [] ttl: Optional[str] = None database: Optional[str] = None + cluster: Optional[str] = None class TopicConfig(BaseModel): @@ -696,6 +698,7 @@ def to_infra_map() -> dict: indexes=table.config.indexes, ttl=table.config.ttl, database=table.config.database, + cluster=table.config.cluster, ) for name, stream in get_streams().items(): diff --git a/packages/py-moose-lib/tests/test_cluster_validation.py b/packages/py-moose-lib/tests/test_cluster_validation.py new file mode 100644 index 0000000000..0073c804ba --- /dev/null +++ b/packages/py-moose-lib/tests/test_cluster_validation.py @@ -0,0 +1,86 @@ +"""Tests for OlapTable cluster validation.""" + +import pytest +from moose_lib import OlapTable, OlapConfig, MergeTreeEngine, ReplicatedMergeTreeEngine +from pydantic import BaseModel + + +class SampleModel(BaseModel): + """Test model for cluster validation tests.""" + + id: str + value: int + + +def test_cluster_only_is_allowed(): + """Test that specifying only cluster works.""" + table = OlapTable[SampleModel]( + "TestClusterOnly", + OlapConfig( + engine=MergeTreeEngine(), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + assert table is not None + + +def test_explicit_params_only_is_allowed(): + """Test that specifying explicit keeper_path and replica_name without cluster works.""" + table = OlapTable[SampleModel]( + "TestExplicitOnly", + OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + order_by_fields=["id"], + ), + ) + assert table is not None + + +def test_cluster_and_explicit_params_raises_error(): + """Test that specifying both cluster and explicit keeper_path/replica_name raises an error.""" + with pytest.raises( + ValueError, + match=r"Cannot specify both 'cluster' and explicit replication params", + ): + OlapTable[SampleModel]( + "TestBothClusterAndExplicit", + OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + + +def test_non_replicated_engine_with_cluster_is_allowed(): + """Test that non-replicated engines can have a cluster specified.""" + table = OlapTable[SampleModel]( + "TestMergeTreeWithCluster", + OlapConfig( + engine=MergeTreeEngine(), + order_by_fields=["id"], + cluster="test_cluster", + ), + ) + assert table is not None + + +def test_replicated_engine_without_cluster_or_explicit_params_is_allowed(): + """Test that ReplicatedMergeTree without cluster or explicit params works (ClickHouse Cloud mode).""" + table = OlapTable[SampleModel]( + "TestCloudMode", + OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["id"], + # No cluster, no keeper_path, no replica_name + ), + ) + assert table is not None + diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index f4852cc39f..060adc12fc 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -210,6 +210,8 @@ interface TableJson { ttl?: string; /** Optional database name for multi-database support. */ database?: string; + /** Optional cluster name for ON CLUSTER support. */ + cluster?: string; } /** * Represents a target destination for data flow, typically a stream. @@ -726,6 +728,7 @@ export const toInfraMap = (registry: typeof moose_internal) => { })) || [], ttl: table.config.ttl, database: table.config.database, + cluster: table.config.cluster, }; }); diff --git a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts index 513dda5584..2ed3871038 100644 --- a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts +++ b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts @@ -226,6 +226,13 @@ export type BaseOlapConfig = ( * When not specified, uses the global ClickHouse config database. */ database?: string; + /** + * Optional cluster name for ON CLUSTER support. + * Use this to enable replicated tables across ClickHouse clusters. + * The cluster must be defined in config.toml (dev environment only). + * Example: cluster: "prod_cluster" + */ + cluster?: string; }; /** @@ -527,6 +534,20 @@ export class OlapTable extends TypedBase> { ); } + // Validate cluster and explicit replication params are not both specified + const hasCluster = typeof (resolvedConfig as any).cluster === "string"; + const hasKeeperPath = + typeof (resolvedConfig as any).keeperPath === "string"; + const hasReplicaName = + typeof (resolvedConfig as any).replicaName === "string"; + + if (hasCluster && (hasKeeperPath || hasReplicaName)) { + throw new Error( + `OlapTable ${name}: Cannot specify both 'cluster' and explicit replication params ('keeperPath' or 'replicaName'). ` + + `Use 'cluster' for auto-injected params, or use explicit 'keeperPath' and 'replicaName' without 'cluster'.`, + ); + } + super(name, resolvedConfig, schema, columns, validators); this.name = name; diff --git a/packages/ts-moose-lib/tests/cluster-validation.test.ts b/packages/ts-moose-lib/tests/cluster-validation.test.ts new file mode 100644 index 0000000000..6f3feea84c --- /dev/null +++ b/packages/ts-moose-lib/tests/cluster-validation.test.ts @@ -0,0 +1,121 @@ +import { expect } from "chai"; +import { OlapTable, ClickHouseEngines } from "../src/index"; +import { IJsonSchemaCollection } from "typia/src/schemas/json/IJsonSchemaCollection"; +import { Column } from "../src/dataModels/dataModelTypes"; + +interface TestModel { + id: string; + value: number; +} + +// Mock schema and columns for testing +const createMockSchema = (): IJsonSchemaCollection.IV3_1 => ({ + version: "3.1", + components: { schemas: {} }, + schemas: [{ type: "object", properties: {} }], +}); + +const createMockColumns = (fields: string[]): Column[] => + fields.map((field) => ({ + name: field as any, + data_type: "String" as any, + required: true, + unique: false, + primary_key: false, + default: null, + ttl: null, + annotations: [], + })); + +// Helper function to create OlapTable with mock schema for testing +const createTestOlapTable = (name: string, config: any) => { + return new OlapTable( + name, + config, + createMockSchema(), + createMockColumns(["id", "value"]), + ); +}; + +describe("OlapTable Cluster Validation", () => { + it("should allow cluster without explicit replication params", () => { + expect(() => { + createTestOlapTable("TestClusterOnly", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + }); + }).to.not.throw(); + }); + + it("should allow explicit keeperPath and replicaName without cluster", () => { + expect(() => { + createTestOlapTable("TestExplicitOnly", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", + }); + }).to.not.throw(); + }); + + it("should throw error when both cluster and keeperPath are specified", () => { + expect(() => { + createTestOlapTable("TestBothClusterAndKeeper", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + keeperPath: "/clickhouse/tables/{database}/{table}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should throw error when both cluster and replicaName are specified", () => { + expect(() => { + createTestOlapTable("TestBothClusterAndReplica", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + replicaName: "{replica}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should throw error when cluster, keeperPath, and replicaName are all specified", () => { + expect(() => { + createTestOlapTable("TestAll", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", + }); + }).to.throw( + /Cannot specify both 'cluster' and explicit replication params/, + ); + }); + + it("should allow non-replicated engines with cluster", () => { + expect(() => { + createTestOlapTable("TestMergeTreeWithCluster", { + engine: ClickHouseEngines.MergeTree, + orderByFields: ["id"], + cluster: "test_cluster", + }); + }).to.not.throw(); + }); + + it("should allow ReplicatedMergeTree without cluster or explicit params (ClickHouse Cloud mode)", () => { + expect(() => { + createTestOlapTable("TestCloudMode", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"], + // No cluster, no keeperPath, no replicaName + }); + }).to.not.throw(); + }); +}); diff --git a/templates/python-cluster/README.md b/templates/python-cluster/README.md new file mode 100644 index 0000000000..3f4a552400 --- /dev/null +++ b/templates/python-cluster/README.md @@ -0,0 +1,3 @@ +# Python Cluster Test Template + +This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. diff --git a/templates/python-cluster/app/__init__.py b/templates/python-cluster/app/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-cluster/app/ingest/__init__.py b/templates/python-cluster/app/ingest/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-cluster/app/ingest/models.py b/templates/python-cluster/app/ingest/models.py new file mode 100644 index 0000000000..40f013c4ad --- /dev/null +++ b/templates/python-cluster/app/ingest/models.py @@ -0,0 +1,96 @@ +""" +Test models for ClickHouse cluster support +""" + +from moose_lib import Key, OlapTable, OlapConfig, ReplicatedMergeTreeEngine, MergeTreeEngine +from pydantic import BaseModel + + +# Table using cluster_a +class TableA(BaseModel): + id: Key[str] + value: str + timestamp: float + + +# Table using cluster_b +class TableB(BaseModel): + id: Key[str] + count: int + timestamp: float + + +# Table without cluster (for mixed testing) +class TableC(BaseModel): + id: Key[str] + data: str + timestamp: float + + +# Table with explicit keeper args but no cluster +class TableD(BaseModel): + id: Key[str] + metric: int + timestamp: float + + +# Table with ReplicatedMergeTree but no cluster or explicit params (ClickHouse Cloud mode) +class TableE(BaseModel): + id: Key[str] + status: str + timestamp: float + + +# OLAP Tables + +# table_a: Uses cluster_a with ReplicatedMergeTree +table_a = OlapTable[TableA]( + "TableA", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="cluster_a", + ), +) + +# table_b: Uses cluster_b with ReplicatedMergeTree +table_b = OlapTable[TableB]( + "TableB", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + cluster="cluster_b", + ), +) + +# TableC: No cluster, uses plain MergeTree (not replicated) +table_c = OlapTable[TableC]( + "TableC", + OlapConfig( + order_by_fields=["id"], + engine=MergeTreeEngine(), + ), +) + +# TableD: ReplicatedMergeTree with explicit keeper args, no cluster +table_d = OlapTable[TableD]( + "TableD", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{table}", + replica_name="{replica}", + ), + ), +) + +# TableE: ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) +table_e = OlapTable[TableE]( + "TableE", + OlapConfig( + order_by_fields=["id"], + engine=ReplicatedMergeTreeEngine(), + # No cluster, no keeper_path, no replica_name - Moose will auto-inject in dev + ), +) + diff --git a/templates/python-cluster/app/main.py b/templates/python-cluster/app/main.py new file mode 100644 index 0000000000..43c27928fc --- /dev/null +++ b/templates/python-cluster/app/main.py @@ -0,0 +1 @@ +from app.ingest import models diff --git a/templates/python-cluster/moose.config.toml b/templates/python-cluster/moose.config.toml new file mode 100644 index 0000000000..bb3d1ecb55 --- /dev/null +++ b/templates/python-cluster/moose.config.toml @@ -0,0 +1,62 @@ +language = "Python" +source_dir = "app" + +[redpanda_config] +broker = "localhost:19092" +message_timeout_ms = 1000 +retention_ms = 30000 +replication_factor = 1 + +[clickhouse_config] +db_name = "local" +user = "panda" +password = "pandapass" +use_ssl = false +host = "localhost" +host_port = 18123 +native_port = 9000 + +# Define two clusters for testing ON CLUSTER support +[[clickhouse_config.clusters]] +name = "cluster_a" + +[[clickhouse_config.clusters]] +name = "cluster_b" + +[http_server_config] +host = "localhost" +port = 4000 +management_port = 5001 + +[redis_config] +url = "redis://127.0.0.1:6379" +key_prefix = "MS" + +[git_config] +main_branch_name = "main" + +[temporal_config] +db_user = "temporal" +db_password = "temporal" +db_port = 5432 +temporal_port = 7233 +temporal_version = "1.22.3" +admin_tools_version = "1.22.3" +ui_version = "2.21.3" +ui_port = 8080 +ui_cors_origins = "http://localhost:3000" +config_path = "config/dynamicconfig/development-sql.yaml" +postgresql_version = "13" + +[supported_old_versions] + +[authentication] +admin_api_key = "445fd4696cfc5c49e28995c4aba05de44303a112" + +[features] +olap = true +streaming_engine = false +workflows = false +data_model_v2 = true +apis = false + diff --git a/templates/python-cluster/requirements.txt b/templates/python-cluster/requirements.txt new file mode 100644 index 0000000000..df5e13942d --- /dev/null +++ b/templates/python-cluster/requirements.txt @@ -0,0 +1,7 @@ +kafka-python-ng==2.2.2 +clickhouse-connect==0.7.16 +requests==2.32.4 +moose-cli +moose-lib +faker +sqlglot[rs]>=27.16.3 \ No newline at end of file diff --git a/templates/python-cluster/setup.py b/templates/python-cluster/setup.py new file mode 100644 index 0000000000..36c813a049 --- /dev/null +++ b/templates/python-cluster/setup.py @@ -0,0 +1,13 @@ + +from setuptools import setup +import os + +requirements_path = os.path.join(os.path.dirname(__file__), "requirements.txt") +with open(requirements_path, "r") as f: + requirements = f.read().splitlines() + +setup( + name='py', + version='0.0', + install_requires=requirements, +) diff --git a/templates/python-cluster/template.config.toml b/templates/python-cluster/template.config.toml new file mode 100644 index 0000000000..a0e8e09bb4 --- /dev/null +++ b/templates/python-cluster/template.config.toml @@ -0,0 +1,22 @@ +language = "python" +description = "Test-only template: Python project for testing ClickHouse cluster support" +post_install_print = """ +Test Template: ClickHouse Cluster Support + +This template is designed for E2E testing of ON CLUSTER functionality. + +--------------------------------------------------------- + +📂 Go to your project directory: + $ cd {project_dir} + +📦 Install Dependencies: + $ pip install -e . + +🛠️ Start dev server: + $ moose dev +""" +default_sloan_telemetry="standard" + +visible=false + diff --git a/templates/typescript-cluster/README.md b/templates/typescript-cluster/README.md new file mode 100644 index 0000000000..ad92d41ca6 --- /dev/null +++ b/templates/typescript-cluster/README.md @@ -0,0 +1,3 @@ +# TypeScript Cluster Test Template + +This is a test-only template for E2E testing of ClickHouse cluster support in MooseStack. diff --git a/templates/typescript-cluster/moose.config.toml b/templates/typescript-cluster/moose.config.toml new file mode 100644 index 0000000000..1f121c2045 --- /dev/null +++ b/templates/typescript-cluster/moose.config.toml @@ -0,0 +1,65 @@ +language = "Typescript" +source_dir = "src" + +[typescript_config] +package_manager = "npm" + +[redpanda_config] +broker = "localhost:19092" +message_timeout_ms = 1000 +retention_ms = 30000 +replication_factor = 1 + +[clickhouse_config] +db_name = "local" +user = "panda" +password = "pandapass" +use_ssl = false +host = "localhost" +host_port = 18123 +native_port = 9000 + +# Define two clusters for testing ON CLUSTER support +[[clickhouse_config.clusters]] +name = "cluster_a" + +[[clickhouse_config.clusters]] +name = "cluster_b" + +[http_server_config] +host = "localhost" +port = 4000 +management_port = 5001 + +[redis_config] +url = "redis://127.0.0.1:6379" +key_prefix = "MS" + +[git_config] +main_branch_name = "main" + +[temporal_config] +db_user = "temporal" +db_password = "temporal" +db_port = 5432 +temporal_port = 7233 +temporal_version = "1.22.3" +admin_tools_version = "1.22.3" +ui_version = "2.21.3" +ui_port = 8080 +ui_cors_origins = "http://localhost:3000" +config_path = "config/dynamicconfig/development-sql.yaml" +postgresql_version = "13" + +[supported_old_versions] + +[authentication] +admin_api_key = "445fd4696cfc5c49e28995c4aba05de44303a112" + +[features] +olap = true +streaming_engine = false +workflows = false +data_model_v2 = true +apis = false + diff --git a/templates/typescript-cluster/package.json b/templates/typescript-cluster/package.json new file mode 100644 index 0000000000..2612f6cd2c --- /dev/null +++ b/templates/typescript-cluster/package.json @@ -0,0 +1,26 @@ +{ + "name": "moose-cluster-test-app", + "version": "0.0", + "description": "Test template for ClickHouse cluster support", + "scripts": { + "moose": "moose-cli", + "build": "moose-cli build --docker", + "dev": "moose-cli dev" + }, + "dependencies": { + "@514labs/moose-lib": "latest", + "ts-patch": "^3.3.0", + "tsconfig-paths": "^4.2.0", + "typia": "^9.6.1" + }, + "devDependencies": { + "@types/node": "^22.10.0", + "tsx": "^4.19.2", + "typescript": "^5.7.2" + }, + "pnpm": { + "onlyBuiltDependencies": [ + "@confluentinc/kafka-javascript" + ] + } +} diff --git a/templates/typescript-cluster/src/index.ts b/templates/typescript-cluster/src/index.ts new file mode 100644 index 0000000000..f263549a1c --- /dev/null +++ b/templates/typescript-cluster/src/index.ts @@ -0,0 +1 @@ +import "./ingest/models"; diff --git a/templates/typescript-cluster/src/ingest/models.ts b/templates/typescript-cluster/src/ingest/models.ts new file mode 100644 index 0000000000..a1dc4744fd --- /dev/null +++ b/templates/typescript-cluster/src/ingest/models.ts @@ -0,0 +1,77 @@ +import { OlapTable, Key, ClickHouseEngines } from "@514labs/moose-lib"; + +/** + * Test models for ClickHouse cluster support + */ + +/** Table using cluster_a */ +export interface TableA { + id: Key; + value: string; + timestamp: number; +} + +/** Table using cluster_b */ +export interface TableB { + id: Key; + count: number; + timestamp: number; +} + +/** Table without cluster (for mixed testing) */ +export interface TableC { + id: Key; + data: string; + timestamp: number; +} + +/** Table with explicit keeper args but no cluster */ +export interface TableD { + id: Key; + metric: number; + timestamp: number; +} + +/** Table with ReplicatedMergeTree but no cluster or explicit params (ClickHouse Cloud mode) */ +export interface TableE { + id: Key; + status: string; + timestamp: number; +} + +/** OLAP Tables */ + +// TableA: Uses cluster_a with ReplicatedMergeTree +export const tableA = new OlapTable("TableA", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + cluster: "cluster_a", +}); + +// TableB: Uses cluster_b with ReplicatedMergeTree +export const tableB = new OlapTable("TableB", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + cluster: "cluster_b", +}); + +// TableC: No cluster, uses plain MergeTree (not replicated) +export const tableC = new OlapTable("TableC", { + orderByFields: ["id"], + engine: ClickHouseEngines.MergeTree, +}); + +// TableD: ReplicatedMergeTree with explicit keeper args, no cluster +export const tableD = new OlapTable("TableD", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{table}", + replicaName: "{replica}", +}); + +// TableE: ReplicatedMergeTree with auto-injected params (ClickHouse Cloud mode) +export const tableE = new OlapTable("TableE", { + orderByFields: ["id"], + engine: ClickHouseEngines.ReplicatedMergeTree, + // No cluster, no keeperPath, no replicaName - Moose will auto-inject in dev +}); diff --git a/templates/typescript-cluster/template.config.toml b/templates/typescript-cluster/template.config.toml new file mode 100644 index 0000000000..10ff95831a --- /dev/null +++ b/templates/typescript-cluster/template.config.toml @@ -0,0 +1,22 @@ +language = "typescript" +description = "Test-only template: TypeScript project for testing ClickHouse cluster support" +post_install_print = """ +Test Template: ClickHouse Cluster Support + +This template is designed for E2E testing of ON CLUSTER functionality. + +--------------------------------------------------------- + +📂 Go to your project directory: + $ cd {project_dir} + +📦 Install Dependencies: + $ npm install + +🛠️ Start dev server: + $ moose dev +""" +default_sloan_telemetry="standard" + +visible=false + diff --git a/templates/typescript-cluster/tsconfig.json b/templates/typescript-cluster/tsconfig.json new file mode 100644 index 0000000000..36598821c7 --- /dev/null +++ b/templates/typescript-cluster/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "outDir": "dist", + "esModuleInterop": true, + "plugins": [ + { + "transform": "./node_modules/@514labs/moose-lib/dist/compilerPlugin.js", + "transformProgram": true + }, + { + "transform": "typia/lib/transform" + } + ], + "strictNullChecks": true + } +} From 0600701b07b813e7eb2284db56f4edcb1e07d416 Mon Sep 17 00:00:00 2001 From: fiveonefour-github-bot Date: Mon, 17 Nov 2025 12:25:12 -0500 Subject: [PATCH 21/59] Add release notes for November 14, 2025 (#2997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-generated release notes for November 14, 2025. This PR adds: - New release notes file: `2025-11-14.mdx` - Updated `_meta.tsx` with new entry - Updated `index.mdx` with link to new release notes The release notes were automatically generated from commits across the moosestack, registry, and commercial repositories. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- > [!NOTE] > Adds 2025-11-14 release notes and updates navigation to include the new entry. > > - **Release Notes (2025-11-14)**: > - **New template**: TypeScript MCP project with AI chat, data catalog discovery, monorepo; includes `moose dev` and `pnpm` workflow. > - **Data types**: ClickHouse `Enum16` support with example (HTTP status codes). > - **Runtime registry**: Programmatic resource access functions (`getTables()`, `getTable()`, `getApis()`, `getApi()`, `getStreams()`, `getStream()`, `getWorkflows()`, `getWorkflow()`), with dynamic routing example using `DeadLetterQueue`. > - **Other improvements**: Docs search, automatic DB context detection, clearer `[CompilerPlugin]` logs, deep health endpoint (`/_moose_internal/health`), consistent CORS. > - **Bug fixes**: DB-qualified table handling, JWT env var names, serverless migrations, migration generation with `--url`, schema compatibility for JSON/nested/FixedString. > - **Docs navigation**: > - Add `2025-11-14` entry to `release-notes/_meta.tsx` and link in `release-notes/index.mdx`. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 3a4e38333f23858b598d29a576662562a6dc1420. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). Co-authored-by: Release Notes Bot --- .../src/pages/release-notes/2025-11-14.mdx | 134 ++++++++++++++++++ .../src/pages/release-notes/_meta.tsx | 3 + .../src/pages/release-notes/index.mdx | 1 + 3 files changed, 138 insertions(+) create mode 100644 apps/framework-docs/src/pages/release-notes/2025-11-14.mdx diff --git a/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx b/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx new file mode 100644 index 0000000000..1ba271eff2 --- /dev/null +++ b/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx @@ -0,0 +1,134 @@ +--- +title: November 14, 2025 +description: Release notes for November 14, 2025 +--- + +import { Callout } from "@/components"; + +# November 14, 2025 + + +- **New:** TypeScript MCP template with AI chat integration and data catalog discovery +- **New:** [Enum16 data type](#enum16-data-type-support) +- **New:** [Registry functions](#registry-functions-for-programmatic-resource-access) to inspect and access Moose resources at runtime + + +## TypeScript MCP template with AI chat integration + +New project template for building AI-powered data applications with Model Context Protocol (MCP) server integration. +Create projects that include analytical APIs, MCP server tools, and a web interface with AI chat capabilities. + +The template includes: +- **AI chat interface** for natural language data exploration +- **Data catalog discovery** tool for automatic schema detection +- **Monorepo structure** with unified dependency management + +```bash filename="Terminal" copy +# Create a new project with the TypeScript MCP template +moose create my-ai-app --template typescript-mcp + +cd my-ai-app + +# Start the Moose dev server +moose dev + +# In a new terminal, start the web app +cd packages/web-app +pnpm install +pnpm dev +``` + +The MCP server automatically detects database context and +provides readonly access to ClickHouse data with query limits up to 1000 rows. + +## Enum16 data type support + +Support for ClickHouse Enum16 data type with values from -32,768 to 32,767 (compared to Enum8's -128 to 127). +Use Enum16 for HTTP status codes, business identifiers, or any enum values exceeding Enum8's range. + +```typescript filename="datamodels/ApiRequestLog.ts" copy +import { Key } from "@514labs/moose-lib"; + +// Define HTTP status codes with Enum16 to support values like 404, 500 +export enum HttpStatusCode { + "OK" = 200, + "Created" = 201, + "BadRequest" = 400, + "Unauthorized" = 401, + "NotFound" = 404, + "InternalServerError" = 500, + "BadGateway" = 502, + "ServiceUnavailable" = 503 +} + +export interface ApiRequestLog { + id: Key; + endpoint: string; + method: string; + status_code: HttpStatusCode; // Uses Enum16 + response_time_ms: number; + timestamp: Date; +} +``` + +## Registry functions for programmatic resource access + +New registry functions allow programmatic access to all registered Moose resources at runtime. +Dynamically route stream messages to different tables based on runtime conditions, or build custom tooling that inspects your Moose application structure. + +```typescript filename="app/streams/eventRouter.ts" copy +import { getTable, getStream, DeadLetterQueue } from "@514labs/moose-lib"; + +interface IncomingEvent { + eventType: string; + userId: string; + data: any; +} + +// Create dead letter queue for failed routing +const dlq = new DeadLetterQueue("routing_dlq"); + +// Get the incoming events stream and add dynamic routing consumer +const incomingStream = getStream("incoming_events"); + +incomingStream?.addConsumer(async (event) => { + // Dynamically determine target table based on event type + const targetTableName = `${event.eventType}_events`; + const targetTable = getTable(targetTableName); + + if (targetTable) { + // Route to the appropriate table + await targetTable.insert([{ + userId: event.userId, + timestamp: new Date(), + ...event.data + }]); + } else { + // Send to dead letter queue if table doesn't exist + await dlq.send({ + originalRecord: event, + errorMessage: `No table found for event type: ${event.eventType}`, + errorType: "RoutingError", + failedAt: new Date(), + source: "transform" + }); + } +}); +``` + +Available registry functions: `getTables()`, `getTable()`, `getApis()`, `getApi()`, `getStreams()`, `getStream()`, `getWorkflows()`, `getWorkflow()` in both TypeScript and Python. + +## Other Features and Improvements +- **Documentation search** – Command palette-style search for guides and API references +- **MCP template setup** – Monorepo structure, changing started instructions to use `pnpm` and `moose dev` +- **Automatic database context detection** – MCP server uses ClickHouse's `currentDatabase()` for simpler setup +- **Better logging** – `[CompilerPlugin]` prefix in dev terminal messages for clearer debugging +- **Deep health monitoring** – Concurrent health checks for Redis, ClickHouse, Redpanda, and Consumption API with `/_moose_internal/health` endpoint +- **CORS headers** – Applied consistently across all API endpoints including error responses + +## Bug Fixes +- **Database-qualified table handling** – Support for database prefixes in SQL queries across TypeScript and Python +- **JWT environment variables** – Fixed auth docs to use correct names (`MOOSE_JWT__SECRET`, `MOOSE_JWT__ISSUER`, `MOOSE_JWT__AUDIENCE`) +- **Serverless migrations** – Fixed table reconciliation with remote ClickHouse databases +- **Migration generation** – Fixed `moose generate migration --url` to work with Moose servers +- **Schema compatibility** – Fixed incorrect breaking change detection for JSON, nested structures, and FixedString columns diff --git a/apps/framework-docs/src/pages/release-notes/_meta.tsx b/apps/framework-docs/src/pages/release-notes/_meta.tsx index 4ec6a4d59e..62de8f3493 100644 --- a/apps/framework-docs/src/pages/release-notes/_meta.tsx +++ b/apps/framework-docs/src/pages/release-notes/_meta.tsx @@ -7,6 +7,9 @@ const rawMeta = { breadcrumb: false, }, }, + "2025-11-14": { + title: "November 14, 2025", + }, "2025-11-07": { title: "November 7, 2025", }, diff --git a/apps/framework-docs/src/pages/release-notes/index.mdx b/apps/framework-docs/src/pages/release-notes/index.mdx index 70aad817cb..84eac07c63 100644 --- a/apps/framework-docs/src/pages/release-notes/index.mdx +++ b/apps/framework-docs/src/pages/release-notes/index.mdx @@ -12,6 +12,7 @@ Welcome to the Moose, Sloan, and Boreal release notes. Here you'll find informat ## Latest Updates +* [November 14, 2025](/release-notes/2025-11-14) * [November 7, 2025](/release-notes/2025-11-07) * [November 1, 2025](/release-notes/2025-11-01) * [October 24, 2025](/release-notes/2025-10-24) From f8570c5f03af219d20ef3cd0fd6ddacd843e8278 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Mon, 17 Nov 2025 12:40:46 -0500 Subject: [PATCH 22/59] ENG-1251: Refactor ClickHouse diagnostics into reusable module (#2985) Extract diagnostic providers from MCP tool into shared infrastructure module to enable reuse across CLI, MCP, and other consumers. --- > [!NOTE] > Extracts a reusable ClickHouse diagnostics module with providers and orchestration, and refactors the MCP infra_issues tool to consume it. > > - **ClickHouse Diagnostics (new shared module)**: > - Adds `infrastructure/olap/clickhouse/diagnostics` with reusable providers: `MutationDiagnostic`, `PartsDiagnostic`, `MergeDiagnostic`, `ErrorStatsDiagnostic`, `S3QueueDiagnostic`, `ReplicationDiagnostic`, `MergeFailureDiagnostic`, `StoppedOperationsDiagnostic`. > - Introduces orchestration API: `DiagnosticProvider` trait, `DiagnosticRequest`, `DiagnosticOptions`, `run_diagnostics`, and shared types (`Component`, `Issue`, `Severity`, `DiagnosticError`, `DiagnosticOutput`). > - Providers refactored to expose `parse_*` helpers; unify error handling (`DiagnosticError`) and timeouts; add tests and mocks. > - **MCP Tool (infra_issues)**: > - Rewrites tool to use the shared diagnostics orchestration; simplifies provider management and removes inline provider impls (e.g., deleted `stopped_operations.rs`). > - Updates parameter parsing, severity mapping, error types, output, and tests accordingly. > - **ClickHouse module**: > - Exposes `pub mod diagnostics;` for reuse across CLI/MCP. > - **Core minor**: > - Small robustness tweaks in `framework/core/infrastructure/table.rs` (`engine_hash` typing, hash update via `as_str().as_bytes()`). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit a60635640ac64d085d0fcbb1bb4804a6c297d3fb. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../framework/core/infrastructure/table.rs | 5 +- .../olap/clickhouse/diagnostics}/errors.rs | 134 ++- .../clickhouse/diagnostics}/merge_failures.rs | 121 ++- .../olap/clickhouse/diagnostics}/merges.rs | 134 ++- .../olap/clickhouse/diagnostics/mod.rs | 938 ++++++++++++++++++ .../olap/clickhouse/diagnostics}/mutations.rs | 134 ++- .../olap/clickhouse/diagnostics}/parts.rs | 135 ++- .../clickhouse/diagnostics}/replication.rs | 299 +++--- .../olap/clickhouse/diagnostics}/s3queue.rs | 135 ++- .../diagnostics/stopped_operations.rs | 273 +++++ .../src/infrastructure/olap/clickhouse/mod.rs | 1 + .../src/mcp/tools/infra_issues/mod.rs | 698 ++++--------- .../tools/infra_issues/stopped_operations.rs | 210 ---- 13 files changed, 2075 insertions(+), 1142 deletions(-) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/errors.rs (70%) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/merge_failures.rs (70%) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/merges.rs (68%) create mode 100644 apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/mutations.rs (75%) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/parts.rs (70%) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/replication.rs (77%) rename apps/framework-cli/src/{mcp/tools/infra_issues => infrastructure/olap/clickhouse/diagnostics}/s3queue.rs (71%) create mode 100644 apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs delete mode 100644 apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index 03e358cbdb..51b5d28982 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -335,7 +335,8 @@ impl Table { use sha2::{Digest, Sha256}; // Combine engine hash and database into a single hash - let engine_hash = self.engine.as_ref().map(|e| e.non_alterable_params_hash()); + let engine_hash: Option = + self.engine.as_ref().map(|e| e.non_alterable_params_hash()); // If we have neither engine hash nor database, return None if engine_hash.is_none() && self.database.is_none() { @@ -347,7 +348,7 @@ impl Table { // Include engine params hash if it exists if let Some(ref hash) = engine_hash { - hasher.update(hash.as_bytes()); + hasher.update(hash.as_str().as_bytes()); } // Include database field diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/errors.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs similarity index 70% rename from apps/framework-cli/src/mcp/tools/infra_issues/errors.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs index 6a71927c2c..1bfc932f23 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/errors.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,64 +12,37 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking system-wide errors -pub struct ErrorStatsDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for ErrorStatsDiagnostic { - fn name(&self) -> &str { - "ErrorStatsDiagnostic" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Error stats are system-wide, not component-specific - // This should be run separately outside the component loop - false +/// +/// Use `ErrorStatsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct ErrorStatsDiagnostic(()); + +impl ErrorStatsDiagnostic { + /// Create a new ErrorStatsDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn is_system_wide(&self) -> bool { - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract error statistics issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed (used for system-wide context) + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Get recent errors with significant counts - let query = "SELECT - name, - value, - last_error_time, - last_error_message - FROM system.errors - WHERE value > 0 - ORDER BY value DESC - LIMIT 10 - FORMAT JSON"; - - debug!("Executing errors query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -114,7 +87,10 @@ impl DiagnosticProvider for ErrorStatsDiagnostic { source: "system.errors".to_string(), component: component.clone(), error_type: "system_error".to_string(), - message: format!("Error '{}' occurred {} times. Last: {}", name, value, last_error_message), + message: format!( + "Error '{}' occurred {} times. Last: {}", + name, value, last_error_message + ), details, suggested_action: "Review error pattern and recent query logs. Check ClickHouse server logs for more details.".to_string(), related_queries: vec![ @@ -127,3 +103,55 @@ impl DiagnosticProvider for ErrorStatsDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for ErrorStatsDiagnostic { + fn name(&self) -> &str { + "ErrorStatsDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Error stats are system-wide, not component-specific + // This should be run separately outside the component loop + false + } + + fn is_system_wide(&self) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Get recent errors with significant counts + let query = "SELECT + name, + value, + last_error_time, + last_error_message + FROM system.errors + WHERE value > 0 + ORDER BY value DESC + LIMIT 10 + FORMAT JSON"; + + debug!("Executing errors query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs similarity index 70% rename from apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs index e97e9d7485..79db903db6 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/merge_failures.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,55 +12,35 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking merge failures from system.metrics -pub struct MergeFailureDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MergeFailureDiagnostic { - fn name(&self) -> &str { - "merge_failures" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Merge failures from system.metrics are system-wide, not component-specific - // This should be run separately outside the component loop - false - } - - fn is_system_wide(&self) -> bool { - true +/// +/// Use `MergeFailureDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MergeFailureDiagnostic(()); + +impl MergeFailureDiagnostic { + /// Create a new MergeFailureDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract merge failure issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed (used for system-wide context) + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // Check system.metrics for background merge failures - // Note: This is a system-wide metric, not per-table, but we report it per-table for context - let metrics_query = - "SELECT value FROM system.metrics WHERE metric = 'FailedBackgroundMerges' FORMAT JSON"; - - debug!("Executing merge failure metrics query: {}", metrics_query); + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(metrics_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let failed_merges = json_response + let failed_merges = json_value .get("data") .and_then(|v| v.as_array()) .and_then(|arr| arr.first()) @@ -68,6 +48,8 @@ impl DiagnosticProvider for MergeFailureDiagnostic { .and_then(|v| v.as_u64()) .unwrap_or(0); + let mut issues = Vec::new(); + if failed_merges > 0 { let severity = if failed_merges > 10 { Severity::Error @@ -94,7 +76,7 @@ impl DiagnosticProvider for MergeFailureDiagnostic { "SELECT * FROM system.metrics WHERE metric LIKE '%Merge%'".to_string(), format!( "SELECT * FROM system.merges WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); @@ -103,3 +85,48 @@ impl DiagnosticProvider for MergeFailureDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MergeFailureDiagnostic { + fn name(&self) -> &str { + "merge_failures" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Merge failures from system.metrics are system-wide, not component-specific + // This should be run separately outside the component loop + false + } + + fn is_system_wide(&self) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check system.metrics for background merge failures + // Note: This is a system-wide metric, not per-table, but we report it per-table for context + let metrics_query = + "SELECT value FROM system.metrics WHERE metric = 'FailedBackgroundMerges' FORMAT JSON"; + + debug!("Executing merge failure metrics query: {}", metrics_query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(metrics_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/merges.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs similarity index 68% rename from apps/framework-cli/src/mcp/tools/infra_issues/merges.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs index 1d2176c3df..62aec66941 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/merges.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,62 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking stuck background merges -pub struct MergeDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MergeDiagnostic { - fn name(&self) -> &str { - "MergeDiagnostic" +/// +/// Use `MergeDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MergeDiagnostic(()); + +impl MergeDiagnostic { + /// Create a new MergeDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract merge issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Check for long-running merges - let query = format!( - "SELECT - elapsed, - progress, - num_parts, - result_part_name, - total_size_bytes_compressed - FROM system.merges - WHERE database = '{}' AND table = '{}' - AND elapsed > 300 - ORDER BY elapsed DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing merges query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -102,16 +79,17 @@ impl DiagnosticProvider for MergeDiagnostic { error_type: "slow_merge".to_string(), message: format!( "Background merge running for {:.1} seconds ({:.1}% complete)", - elapsed, progress * 100.0 + elapsed, + progress * 100.0 ), details, suggested_action: "Monitor merge progress. If stuck, check server resources (CPU, disk I/O, memory). Consider stopping merge with SYSTEM STOP MERGES if necessary.".to_string(), related_queries: vec![ format!( "SELECT * FROM system.merges WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM STOP MERGES {}.{}", config.db_name, component.name), + format!("SYSTEM STOP MERGES {}.{}", db_name, component.name), ], }); } @@ -119,3 +97,53 @@ impl DiagnosticProvider for MergeDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MergeDiagnostic { + fn name(&self) -> &str { + "MergeDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for long-running merges + let query = format!( + "SELECT + elapsed, + progress, + num_parts, + result_part_name, + total_size_bytes_compressed + FROM system.merges + WHERE database = '{}' AND table = '{}' + AND elapsed > 300 + ORDER BY elapsed DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing merges query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs new file mode 100644 index 0000000000..87252bb29b --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs @@ -0,0 +1,938 @@ +//! # ClickHouse Diagnostics Module +//! +//! This module provides reusable diagnostic capabilities for ClickHouse infrastructure. +//! It defines a provider-based architecture where each diagnostic check is implemented +//! as a separate provider that can be run independently or orchestrated together. +//! +//! ## Architecture +//! +//! Three-layer design: +//! 1. **Provider Layer** - Individual diagnostics with testable parsing logic +//! 2. **Orchestration Layer** - Running diagnostics with common request/filter structs +//! 3. **Consumer Layer** - Tools (MCP, CLI) that translate inputs to DiagnosticRequest +//! +//! ## Diagnostic Providers +//! +//! ### 1. MutationDiagnostic +//! Detects stuck or failing mutations (ALTER operations). +//! - **Source**: `system.mutations` +//! - **Thresholds**: Error (has failure reason), Warning (not done) +//! +//! ### 2. PartsDiagnostic +//! Identifies excessive data parts per partition. +//! - **Source**: `system.parts` +//! - **Thresholds**: Error (>300 parts), Warning (>100 parts) +//! +//! ### 3. MergeDiagnostic +//! Monitors long-running background merges. +//! - **Source**: `system.merges` +//! - **Thresholds**: Error (>1800s), Warning (>300s) +//! +//! ### 4. ErrorStatsDiagnostic +//! Aggregates errors from ClickHouse system.errors. +//! - **Source**: `system.errors` +//! - **Thresholds**: Error (>100), Warning (>10), Info (>0) +//! +//! ### 5. S3QueueDiagnostic (S3Queue tables only) +//! Detects S3Queue ingestion failures. +//! - **Source**: `system.s3queue_log` +//! - **Thresholds**: Error (any failed entries) +//! +//! ### 6. ReplicationDiagnostic (Replicated* tables only) +//! Monitors replication health and queue backlogs. +//! - **Sources**: `system.replication_queue`, `system.replicas` +//! - **Thresholds**: Error (queue>50, tries>10), Warning (queue>10, tries>3) +//! +//! ### 7. MergeFailureDiagnostic +//! Detects system-wide background merge failures. +//! - **Source**: `system.metrics` +//! - **Thresholds**: Error (>10 failures), Warning (>0 failures) +//! +//! ### 8. StoppedOperationsDiagnostic +//! Identifies manually stopped operations. +//! - **Sources**: `system.parts`, `system.merges`, `system.replicas` +//! - **Thresholds**: Error (stopped replication), Warning (stopped merges) + +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; +use std::collections::HashMap; + +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + +// Module declarations for diagnostic providers +mod errors; +mod merge_failures; +mod merges; +mod mutations; +mod parts; +mod replication; +mod s3queue; +mod stopped_operations; + +// Re-export diagnostic providers +pub use errors::ErrorStatsDiagnostic; +pub use merge_failures::MergeFailureDiagnostic; +pub use merges::MergeDiagnostic; +pub use mutations::MutationDiagnostic; +pub use parts::PartsDiagnostic; +pub use replication::ReplicationDiagnostic; +pub use s3queue::S3QueueDiagnostic; +pub use stopped_operations::StoppedOperationsDiagnostic; + +/// Error types for diagnostic operations +#[derive(Debug, thiserror::Error)] +pub enum DiagnosticError { + #[error("Failed to connect to ClickHouse: {0}")] + ConnectionFailed(String), + + #[error("Failed to execute diagnostic query: {0}")] + QueryFailed(String), + + #[error("Query timeout after {0} seconds")] + QueryTimeout(u64), + + #[error("Failed to parse query result: {0}")] + ParseError(String), + + #[error("Invalid parameter: {0}")] + InvalidParameter(String), +} + +/// Severity level for issues +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Severity { + Error, + Warning, + Info, +} + +impl Severity { + /// Check if this severity should include issues of the given level + pub fn includes(&self, other: &Severity) -> bool { + match self { + Severity::Info => true, // Info includes all severities + Severity::Warning => matches!(other, Severity::Warning | Severity::Error), + Severity::Error => matches!(other, Severity::Error), + } + } +} + +/// Component information for issue context +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Component { + pub component_type: String, + pub name: String, + /// Flexible metadata for component-specific context (e.g., database, namespace, cluster) + #[serde(skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +/// Detailed information about an infrastructure issue +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Issue { + pub severity: Severity, + pub source: String, + pub component: Component, + pub error_type: String, + pub message: String, + pub details: Map, + pub suggested_action: String, + pub related_queries: Vec, +} + +/// Options for filtering and configuring diagnostic runs +#[derive(Debug, Clone)] +pub struct DiagnosticOptions { + /// Specific diagnostic names to run (empty = run all applicable diagnostics) + pub diagnostic_names: Vec, + /// Minimum severity level to report (filters results) + pub min_severity: Severity, + /// Optional time filter (e.g., "-1h" for last hour) + pub since: Option, +} + +impl Default for DiagnosticOptions { + fn default() -> Self { + Self { + diagnostic_names: Vec::new(), + min_severity: Severity::Info, + since: None, + } + } +} + +/// Request to run diagnostics on components +#[derive(Debug, Clone)] +pub struct DiagnosticRequest { + /// Components to diagnose (tables, views, etc.) + pub components: Vec<(Component, Option)>, + /// Diagnostic options for filtering and configuration + pub options: DiagnosticOptions, +} + +/// Summary statistics for diagnostic results +#[derive(Debug, Serialize, Deserialize)] +pub struct IssueSummary { + pub total_issues: usize, + pub by_severity: HashMap, + pub by_component: HashMap, +} + +/// Infrastructure type for diagnostic context +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum InfrastructureType { + ClickHouse, +} + +/// Complete diagnostic output +#[derive(Debug, Serialize, Deserialize)] +pub struct DiagnosticOutput { + pub infrastructure_type: InfrastructureType, + pub issues: Vec, + pub summary: IssueSummary, +} + +impl DiagnosticOutput { + /// Create a new diagnostic output and compute summary statistics + pub fn new(infrastructure_type: InfrastructureType, issues: Vec) -> Self { + let mut by_severity = HashMap::new(); + let mut by_component = HashMap::new(); + + for issue in &issues { + let severity_key = format!("{:?}", issue.severity).to_lowercase(); + *by_severity.entry(severity_key).or_insert(0) += 1; + + let component_key = issue.component.name.clone(); + *by_component.entry(component_key).or_insert(0) += 1; + } + + let summary = IssueSummary { + total_issues: issues.len(), + by_severity, + by_component, + }; + + Self { + infrastructure_type, + issues, + summary, + } + } +} + +/// Trait for ClickHouse diagnostic providers +/// +/// Each provider implements checks for a specific aspect of ClickHouse infrastructure health. +/// Providers can be system-wide (run once) or component-specific (run per table/component). +#[async_trait::async_trait] +pub trait DiagnosticProvider: Send + Sync { + /// Name of this diagnostic provider + fn name(&self) -> &str; + + /// Check if this provider is applicable to the given component + fn applicable_to(&self, component: &Component, engine: Option<&ClickhouseEngine>) -> bool; + + /// Check if this provider is system-wide (not component-specific) + /// System-wide providers are run once, not per-component + fn is_system_wide(&self) -> bool { + false + } + + /// Run diagnostics and return list of issues found + async fn diagnose( + &self, + component: &Component, + engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + since: Option<&str>, + ) -> Result, DiagnosticError>; +} + +/// Create all available diagnostic providers +/// +/// Returns a vector containing instances of all diagnostic providers. +/// These can be filtered by name or applicability before running. +pub fn create_all_providers() -> Vec> { + vec![ + Box::new(MutationDiagnostic::new()), + Box::new(PartsDiagnostic::new()), + Box::new(MergeDiagnostic::new()), + Box::new(ErrorStatsDiagnostic::new()), + Box::new(S3QueueDiagnostic::new()), + Box::new(ReplicationDiagnostic::new()), + Box::new(MergeFailureDiagnostic::new()), + Box::new(StoppedOperationsDiagnostic::new()), + ] +} + +/// Get a specific diagnostic provider by name +/// +/// # Arguments +/// * `name` - The name of the provider to retrieve +/// +/// # Returns +/// Some(provider) if found, None otherwise +pub fn get_provider(name: &str) -> Option> { + create_all_providers() + .into_iter() + .find(|p| p.name() == name) +} + +/// Run diagnostics on the provided components +/// +/// This is the main orchestration function that: +/// 1. Filters providers by diagnostic_names (empty = run all applicable) +/// 2. Separates system-wide vs component-specific providers +/// 3. Runs system-wide providers once +/// 4. Runs component-specific providers for each applicable component +/// 5. Filters results by minimum severity +/// 6. Returns aggregated results +/// +/// # Arguments +/// * `request` - The diagnostic request containing components and options +/// * `config` - ClickHouse configuration for database connection +/// +/// # Returns +/// DiagnosticOutput with all issues found, filtered by severity +pub async fn run_diagnostics( + request: DiagnosticRequest, + config: &ClickHouseConfig, +) -> Result { + use tokio::task::JoinSet; + + let all_providers = create_all_providers(); + + // Filter providers by requested diagnostic names (empty = all) + let providers: Vec> = if request.options.diagnostic_names.is_empty() + { + all_providers + } else { + // Validate that requested diagnostic names exist + let available_names: Vec = + all_providers.iter().map(|p| p.name().to_string()).collect(); + let invalid_names: Vec = request + .options + .diagnostic_names + .iter() + .filter(|name| !available_names.contains(name)) + .cloned() + .collect(); + + if !invalid_names.is_empty() { + return Err(DiagnosticError::InvalidParameter(format!( + "Unknown diagnostic names: {}. Available diagnostics: {}", + invalid_names.join(", "), + available_names.join(", ") + ))); + } + + all_providers + .into_iter() + .filter(|p| { + request + .options + .diagnostic_names + .contains(&p.name().to_string()) + }) + .collect() + }; + + // Separate system-wide from component-specific providers + let (system_wide, component_specific): (Vec<_>, Vec<_>) = + providers.into_iter().partition(|p| p.is_system_wide()); + + let mut join_set = JoinSet::new(); + let config = config.clone(); + let since = request.options.since.clone(); + + // Spawn system-wide providers as concurrent tasks (use first component for context) + if let Some((first_component, _)) = request.components.first() { + let first_component = first_component.clone(); + for provider in system_wide { + let config = config.clone(); + let component = first_component.clone(); + let since = since.clone(); + let provider_name = provider.name().to_string(); + + join_set.spawn(async move { + let result = provider + .diagnose(&component, None, &config, since.as_deref()) + .await; + + (provider_name, result) + }); + } + } + + // Spawn component-specific providers as concurrent tasks + // We need to collect (component, provider) pairs to spawn since we can't borrow provider + let mut tasks_to_spawn = Vec::new(); + + for (component, engine) in request.components { + for provider in &component_specific { + // Check if provider is applicable to this component + if !provider.applicable_to(&component, engine.as_ref()) { + continue; + } + + tasks_to_spawn.push(( + component.clone(), + engine.clone(), + provider.name().to_string(), + )); + } + } + + // Now spawn tasks with recreated providers for each task + for (component, engine, provider_name) in tasks_to_spawn { + let config = config.clone(); + let since = since.clone(); + + // Get a fresh provider instance for this task + let provider = get_provider(&provider_name); + + join_set.spawn(async move { + let result = if let Some(provider) = provider { + provider + .diagnose(&component, engine.as_ref(), &config, since.as_deref()) + .await + } else { + // This shouldn't happen since we just got the name from a valid provider + Err(DiagnosticError::InvalidParameter(format!( + "Provider {} not found", + provider_name + ))) + }; + + (provider_name, result) + }); + } + + // Collect results as they complete + let mut all_issues = Vec::new(); + + while let Some(join_result) = join_set.join_next().await { + match join_result { + Ok((provider_name, diagnostic_result)) => match diagnostic_result { + Ok(issues) => all_issues.extend(issues), + Err(e) => { + // Log error but continue with other providers + log::warn!("Provider {} failed: {}", provider_name, e); + } + }, + Err(e) => { + // Task panicked or was cancelled + log::error!("Diagnostic task failed: {}", e); + } + } + } + + // Filter issues by minimum severity + let filtered_issues: Vec = all_issues + .into_iter() + .filter(|issue| request.options.min_severity.includes(&issue.severity)) + .collect(); + + Ok(DiagnosticOutput::new( + InfrastructureType::ClickHouse, + filtered_issues, + )) +} + +#[cfg(test)] +pub mod test_providers { + use super::*; + use serde_json::json; + + /// Mock diagnostic provider that returns predictable issues for testing + /// + /// This provider can be configured to return specific issues without requiring + /// a real ClickHouse connection, making it useful for testing the orchestration + /// layer and MCP integration. + pub struct MockDiagnostic { + pub name: String, + pub system_wide: bool, + pub issues_to_return: Vec, + } + + impl MockDiagnostic { + /// Create a mock that returns specific issues + pub fn with_issues(name: &str, issues: Vec) -> Self { + Self { + name: name.to_string(), + system_wide: false, + issues_to_return: issues, + } + } + + /// Create a mock that returns an error issue + pub fn with_error(component_name: &str) -> Self { + let mut details = Map::new(); + details.insert("test_field".to_string(), json!("test_value")); + details.insert("count".to_string(), json!(42)); + + Self::with_issues( + "mock_diagnostic", + vec![Issue { + severity: Severity::Error, + source: "mock_source".to_string(), + component: Component { + component_type: "table".to_string(), + name: component_name.to_string(), + metadata: HashMap::new(), + }, + error_type: "mock_error".to_string(), + message: format!("Test error for {}", component_name), + details, + suggested_action: "Fix the mock issue".to_string(), + related_queries: vec![ + format!("SELECT * FROM {}", component_name), + "SHOW CREATE TABLE".to_string(), + ], + }], + ) + } + + /// Create a mock that returns a warning issue + pub fn with_warning(component_name: &str) -> Self { + let mut details = Map::new(); + details.insert("threshold".to_string(), json!(100)); + + Self::with_issues( + "mock_warning", + vec![Issue { + severity: Severity::Warning, + source: "mock_source".to_string(), + component: Component { + component_type: "table".to_string(), + name: component_name.to_string(), + metadata: HashMap::new(), + }, + error_type: "mock_warning".to_string(), + message: format!("Test warning for {}", component_name), + details, + suggested_action: "Monitor the situation".to_string(), + related_queries: vec![], + }], + ) + } + + /// Create a mock that always succeeds with no issues + pub fn always_healthy() -> Self { + Self::with_issues("healthy_mock", vec![]) + } + + /// Create a system-wide mock provider + pub fn system_wide(name: &str, issues: Vec) -> Self { + Self { + name: name.to_string(), + system_wide: true, + issues_to_return: issues, + } + } + } + + #[async_trait::async_trait] + impl DiagnosticProvider for MockDiagnostic { + fn name(&self) -> &str { + &self.name + } + + fn applicable_to(&self, _: &Component, _: Option<&ClickhouseEngine>) -> bool { + true + } + + fn is_system_wide(&self) -> bool { + self.system_wide + } + + async fn diagnose( + &self, + _component: &Component, + _engine: Option<&ClickhouseEngine>, + _config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + Ok(self.issues_to_return.clone()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_mock_diagnostic_with_error() { + let mock = test_providers::MockDiagnostic::with_error("test_table"); + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + let issues = mock + .diagnose(&component, None, &config, None) + .await + .unwrap(); + + assert_eq!(issues.len(), 1); + assert_eq!(issues[0].severity, Severity::Error); + assert_eq!(issues[0].error_type, "mock_error"); + assert_eq!(issues[0].component.name, "test_table"); + assert_eq!(issues[0].related_queries.len(), 2); + } + + #[tokio::test] + async fn test_mock_diagnostic_always_healthy() { + let mock = test_providers::MockDiagnostic::always_healthy(); + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + let issues = mock + .diagnose(&component, None, &config, None) + .await + .unwrap(); + assert_eq!(issues.len(), 0); + } + + #[test] + fn test_severity_includes() { + // Info includes all severities + assert!(Severity::Info.includes(&Severity::Error)); + assert!(Severity::Info.includes(&Severity::Warning)); + assert!(Severity::Info.includes(&Severity::Info)); + + // Warning includes warning and error + assert!(Severity::Warning.includes(&Severity::Error)); + assert!(Severity::Warning.includes(&Severity::Warning)); + assert!(!Severity::Warning.includes(&Severity::Info)); + + // Error includes only error + assert!(Severity::Error.includes(&Severity::Error)); + assert!(!Severity::Error.includes(&Severity::Warning)); + assert!(!Severity::Error.includes(&Severity::Info)); + } + + #[test] + fn test_severity_filtering() { + let mut details = Map::new(); + details.insert("level".to_string(), serde_json::json!("test")); + + let issues = [ + Issue { + severity: Severity::Error, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "error_type".to_string(), + message: "Error".to_string(), + details: details.clone(), + suggested_action: "Fix".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Warning, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "warning_type".to_string(), + message: "Warning".to_string(), + details: details.clone(), + suggested_action: "Check".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Info, + component: Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }, + source: "test".to_string(), + error_type: "info_type".to_string(), + message: "Info".to_string(), + details, + suggested_action: "Note".to_string(), + related_queries: vec![], + }, + ]; + + // Filter for errors only + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Error.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 1); + assert_eq!(filtered[0].severity, Severity::Error); + + // Filter for warnings and above + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Warning.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 2); + + // Filter for all (info and above) + let filtered: Vec<_> = issues + .iter() + .filter(|i| Severity::Info.includes(&i.severity)) + .collect(); + + assert_eq!(filtered.len(), 3); + } + + #[test] + fn test_diagnostic_output_summary() { + let issues = vec![ + Issue { + severity: Severity::Error, + source: "mutations".to_string(), + component: Component { + component_type: "table".to_string(), + name: "users".to_string(), + metadata: HashMap::new(), + }, + error_type: "stuck_mutation".to_string(), + message: "Mutation stuck".to_string(), + details: Map::new(), + suggested_action: "Fix".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Warning, + source: "parts".to_string(), + component: Component { + component_type: "table".to_string(), + name: "users".to_string(), + metadata: HashMap::new(), + }, + error_type: "too_many_parts".to_string(), + message: "Too many parts".to_string(), + details: Map::new(), + suggested_action: "Wait for merge".to_string(), + related_queries: vec![], + }, + Issue { + severity: Severity::Error, + source: "replication".to_string(), + component: Component { + component_type: "table".to_string(), + name: "events".to_string(), + metadata: HashMap::new(), + }, + error_type: "replication_lag".to_string(), + message: "Replication lagging".to_string(), + details: Map::new(), + suggested_action: "Check network".to_string(), + related_queries: vec![], + }, + ]; + + let output = DiagnosticOutput::new(InfrastructureType::ClickHouse, issues); + + assert_eq!(output.summary.total_issues, 3); + assert_eq!(output.summary.by_severity.get("error"), Some(&2)); + assert_eq!(output.summary.by_severity.get("warning"), Some(&1)); + assert_eq!(output.summary.by_component.get("users"), Some(&2)); + assert_eq!(output.summary.by_component.get("events"), Some(&1)); + } + + #[tokio::test] + async fn test_concurrent_diagnostics_execution() { + use std::sync::atomic::{AtomicU32, Ordering}; + use std::sync::Arc; + use tokio::time::{sleep, Duration}; + + // Mock provider that tracks execution order + struct ConcurrentTestProvider { + name: String, + delay_ms: u64, + execution_counter: Arc, + execution_order: Arc, + } + + #[async_trait::async_trait] + impl DiagnosticProvider for ConcurrentTestProvider { + fn name(&self) -> &str { + &self.name + } + + fn applicable_to(&self, _: &Component, _: Option<&ClickhouseEngine>) -> bool { + true + } + + async fn diagnose( + &self, + _: &Component, + _: Option<&ClickhouseEngine>, + _: &ClickHouseConfig, + _: Option<&str>, + ) -> Result, DiagnosticError> { + // Simulate work with delay + sleep(Duration::from_millis(self.delay_ms)).await; + + // Track when this provider finished (not when it started) + let order = self.execution_counter.fetch_add(1, Ordering::SeqCst); + self.execution_order.store(order, Ordering::SeqCst); + + Ok(vec![]) + } + } + + // Test that fast provider completes before slow provider + // This proves concurrent execution (vs serial which would have slow finish first) + let execution_counter = Arc::new(AtomicU32::new(0)); + let slow_order = Arc::new(AtomicU32::new(0)); + let fast_order = Arc::new(AtomicU32::new(0)); + + let config = ClickHouseConfig { + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + db_name: "test_db".to_string(), + use_ssl: false, + user: "default".to_string(), + password: "".to_string(), + host_data_path: None, + additional_databases: Vec::new(), + clusters: None, + }; + + // Note: This test demonstrates the concurrent execution pattern, + // but can't actually test it without modifying run_diagnostics to accept custom providers. + // The actual concurrency is tested via observing real-world behavior (fast diagnostics return quickly) + + // For now, just verify the mock providers work + let slow = ConcurrentTestProvider { + name: "slow".to_string(), + delay_ms: 100, + execution_counter: execution_counter.clone(), + execution_order: slow_order.clone(), + }; + + let fast = ConcurrentTestProvider { + name: "fast".to_string(), + delay_ms: 10, + execution_counter: execution_counter.clone(), + execution_order: fast_order.clone(), + }; + + let component = Component { + component_type: "table".to_string(), + name: "test".to_string(), + metadata: HashMap::new(), + }; + + // Run them serially to establish baseline + let _ = slow.diagnose(&component, None, &config, None).await; + let _ = fast.diagnose(&component, None, &config, None).await; + + // In serial execution: slow finishes first (order=0), fast second (order=1) + assert_eq!(slow_order.load(Ordering::SeqCst), 0); + assert_eq!(fast_order.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn test_invalid_diagnostic_names_return_error() { + let config = ClickHouseConfig { + db_name: "test".to_string(), + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + ..Default::default() + }; + + let component = Component { + component_type: "table".to_string(), + name: "test_table".to_string(), + metadata: HashMap::new(), + }; + + // Test with invalid diagnostic name + let request = DiagnosticRequest { + components: vec![(component.clone(), None)], + options: DiagnosticOptions { + diagnostic_names: vec!["invalid_diagnostic".to_string()], + min_severity: Severity::Info, + since: None, + }, + }; + + let result = run_diagnostics(request, &config).await; + assert!(result.is_err()); + + if let Err(DiagnosticError::InvalidParameter(msg)) = result { + assert!(msg.contains("invalid_diagnostic")); + assert!(msg.contains("Available diagnostics:")); + } else { + panic!("Expected InvalidParameter error"); + } + + // Test with mix of valid and invalid names + let request = DiagnosticRequest { + components: vec![(component.clone(), None)], + options: DiagnosticOptions { + diagnostic_names: vec![ + "MutationDiagnostic".to_string(), // Valid name + "invalid_one".to_string(), + "invalid_two".to_string(), + ], + min_severity: Severity::Info, + since: None, + }, + }; + + let result = run_diagnostics(request, &config).await; + assert!(result.is_err()); + + if let Err(DiagnosticError::InvalidParameter(msg)) = result { + assert!(msg.contains("invalid_one")); + assert!(msg.contains("invalid_two")); + assert!(msg.contains("Unknown diagnostic names:")); + assert!(!msg.contains("MutationDiagnostic, invalid")); // Valid name not listed as invalid + } else { + panic!("Expected InvalidParameter error"); + } + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs similarity index 75% rename from apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs index b7e34f14fe..bce0079981 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/mutations.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,65 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking stuck or failed mutations -pub struct MutationDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for MutationDiagnostic { - fn name(&self) -> &str { - "MutationDiagnostic" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Mutations can occur on any table - true +/// +/// Use `MutationDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct MutationDiagnostic(()); + +impl MutationDiagnostic { + /// Create a new MutationDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract mutation issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let query = format!( - "SELECT - mutation_id, - command, - create_time, - is_done, - latest_failed_part, - latest_fail_time, - latest_fail_reason - FROM system.mutations - WHERE database = '{}' AND table = '{}' - AND (is_done = 0 OR latest_fail_reason != '') - ORDER BY create_time DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing mutations query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - // Parse ClickHouse JSON response - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -130,7 +104,7 @@ impl DiagnosticProvider for MutationDiagnostic { let related_queries = vec![ format!( "SELECT * FROM system.mutations WHERE database = '{}' AND table = '{}' AND mutation_id = '{}'", - config.db_name, component.name, mutation_id + db_name, component.name, mutation_id ), format!("KILL MUTATION WHERE mutation_id = '{}'", mutation_id), ]; @@ -150,3 +124,55 @@ impl DiagnosticProvider for MutationDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for MutationDiagnostic { + fn name(&self) -> &str { + "MutationDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Mutations can occur on any table + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let query = format!( + "SELECT + mutation_id, + command, + create_time, + is_done, + latest_failed_part, + latest_fail_time, + latest_fail_reason + FROM system.mutations + WHERE database = '{}' AND table = '{}' + AND (is_done = 0 OR latest_fail_reason != '') + ORDER BY create_time DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing mutations query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/parts.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs similarity index 70% rename from apps/framework-cli/src/mcp/tools/infra_issues/parts.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs index 8e88e63c9a..926209e881 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/parts.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,63 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking data parts issues -pub struct PartsDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for PartsDiagnostic { - fn name(&self) -> &str { - "PartsDiagnostic" +/// +/// Use `PartsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct PartsDiagnostic(()); + +impl PartsDiagnostic { + /// Create a new PartsDiagnostic provider + pub const fn new() -> Self { + Self(()) } - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Parts are relevant for all MergeTree tables - true - } - - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract parts issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - // Check for excessive parts count per partition - let query = format!( - "SELECT - partition, - count() as part_count, - sum(rows) as total_rows, - sum(bytes_on_disk) as total_bytes - FROM system.parts - WHERE database = '{}' AND table = '{}' AND active = 1 - GROUP BY partition - HAVING part_count > 100 - ORDER BY part_count DESC - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing parts query: {}", query); + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -110,16 +86,16 @@ impl DiagnosticProvider for PartsDiagnostic { details, suggested_action: format!( "Run OPTIMIZE TABLE to merge parts: OPTIMIZE TABLE {}.{} PARTITION '{}'", - config.db_name, component.name, partition + db_name, component.name, partition ), related_queries: vec![ format!( "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND partition = '{}' AND active = 1", - config.db_name, component.name, partition + db_name, component.name, partition ), format!( "OPTIMIZE TABLE {}.{} PARTITION '{}'", - config.db_name, component.name, partition + db_name, component.name, partition ), ], }); @@ -128,3 +104,54 @@ impl DiagnosticProvider for PartsDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for PartsDiagnostic { + fn name(&self) -> &str { + "PartsDiagnostic" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Parts are relevant for all MergeTree tables + true + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for excessive parts count per partition + let query = format!( + "SELECT + partition, + count() as part_count, + sum(rows) as total_rows, + sum(bytes_on_disk) as total_bytes + FROM system.parts + WHERE database = '{}' AND table = '{}' AND active = 1 + GROUP BY partition + HAVING part_count > 100 + ORDER BY part_count DESC + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing parts query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/replication.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs similarity index 77% rename from apps/framework-cli/src/mcp/tools/infra_issues/replication.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs index 7b6a0e203b..79020926d0 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/replication.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,63 +12,27 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking replication health -pub struct ReplicationDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for ReplicationDiagnostic { - fn name(&self) -> &str { - "ReplicationDiagnostic" - } - - fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { - // Only applicable to Replicated* tables - matches!( - engine, - Some(ClickhouseEngine::ReplicatedMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) - ) +/// +/// Use `ReplicationDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct ReplicationDiagnostic(()); + +impl ReplicationDiagnostic { + /// Create a new ReplicationDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse queue size response and extract backlog issues + pub fn parse_queue_size_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // First check for large queue backlogs (indicates stopped or slow replication) - let queue_size_query = format!( - "SELECT count() as queue_size - FROM system.replication_queue - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!( - "Executing replication queue size query: {}", - queue_size_query - ); - - let queue_size_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&queue_size_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let queue_size_json: Value = serde_json::from_str(&queue_size_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let queue_size = queue_size_json + let queue_size = json_value .get("data") .and_then(|v| v.as_array()) .and_then(|arr| arr.first()) @@ -76,6 +40,8 @@ impl DiagnosticProvider for ReplicationDiagnostic { .and_then(|v| v.as_u64()) .unwrap_or(0); + let mut issues = Vec::new(); + // Report large queue backlogs (potential stopped replication) if queue_size > 10 { let severity = if queue_size > 50 { @@ -101,54 +67,38 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replication_queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM START REPLICATION QUEUES {}.{}", config.db_name, component.name), + format!("SYSTEM START REPLICATION QUEUES {}.{}", db_name, component.name), ], }); } - // Check replication queue for stuck entries (retries or exceptions) - let queue_query = format!( - "SELECT - type, - source_replica, - create_time, - num_tries, - last_exception - FROM system.replication_queue - WHERE database = '{}' AND table = '{}' - AND (num_tries > 3 OR last_exception != '') - ORDER BY create_time ASC - LIMIT 20 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replication queue query: {}", queue_query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&queue_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; + Ok(issues) + } - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + /// Parse replication queue entries and extract stuck entry issues + pub fn parse_queue_entries_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; + let mut issues = Vec::new(); + for row in data { let entry_type = row .get("type") @@ -201,47 +151,31 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replication_queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); } - // Also check replica health status - let replica_query = format!( - "SELECT - is_readonly, - is_session_expired, - future_parts, - parts_to_check, - queue_size, - inserts_in_queue, - merges_in_queue, - absolute_delay - FROM system.replicas - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replicas query: {}", replica_query); + Ok(issues) + } - let replica_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&replica_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; + /// Parse replica health status and extract health issues + pub fn parse_replica_health_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - let replica_json: Value = serde_json::from_str(&replica_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; + let mut issues = Vec::new(); - if let Some(replica_data) = replica_json.get("data").and_then(|v| v.as_array()) { + if let Some(replica_data) = json_value.get("data").and_then(|v| v.as_array()) { for row in replica_data { let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); let is_session_expired = row @@ -310,9 +244,9 @@ impl DiagnosticProvider for ReplicationDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), - format!("SYSTEM RESTART REPLICA {}.{}", config.db_name, component.name), + format!("SYSTEM RESTART REPLICA {}.{}", db_name, component.name), ], }); } @@ -322,3 +256,130 @@ impl DiagnosticProvider for ReplicationDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for ReplicationDiagnostic { + fn name(&self) -> &str { + "ReplicationDiagnostic" + } + + fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { + // Only applicable to Replicated* tables + matches!( + engine, + Some(ClickhouseEngine::ReplicatedMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) + ) + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let mut issues = Vec::new(); + + // First check for large queue backlogs (indicates stopped or slow replication) + let queue_size_query = format!( + "SELECT count() as queue_size + FROM system.replication_queue + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!( + "Executing replication queue size query: {}", + queue_size_query + ); + + let queue_size_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&queue_size_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_queue_size_response( + &queue_size_result, + component, + &config.db_name, + )?); + + // Check replication queue for stuck entries (retries or exceptions) + let queue_query = format!( + "SELECT + type, + source_replica, + create_time, + num_tries, + last_exception + FROM system.replication_queue + WHERE database = '{}' AND table = '{}' + AND (num_tries > 3 OR last_exception != '') + ORDER BY create_time ASC + LIMIT 20 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replication queue query: {}", queue_query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&queue_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_queue_entries_response( + &result, + component, + &config.db_name, + )?); + + // Also check replica health status + let replica_query = format!( + "SELECT + is_readonly, + is_session_expired, + future_parts, + parts_to_check, + queue_size, + inserts_in_queue, + merges_in_queue, + absolute_delay + FROM system.replicas + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replicas query: {}", replica_query); + + let replica_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&replica_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_replica_health_response( + &replica_result, + component, + &config.db_name, + )?); + + Ok(issues) + } +} diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs similarity index 71% rename from apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs rename to apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs index ea2d5ea0f9..37ad704857 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/s3queue.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs @@ -3,7 +3,7 @@ use log::debug; use serde_json::{json, Map, Value}; -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; @@ -12,64 +12,39 @@ use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; /// Diagnostic provider for checking S3Queue ingestion -pub struct S3QueueDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for S3QueueDiagnostic { - fn name(&self) -> &str { - "S3QueueDiagnostic" - } - - fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { - // Only applicable to S3Queue tables - matches!(engine, Some(ClickhouseEngine::S3Queue { .. })) +/// +/// Use `S3QueueDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct S3QueueDiagnostic(()); + +impl S3QueueDiagnostic { + /// Create a new S3QueueDiagnostic provider + pub const fn new() -> Self { + Self(()) } - async fn diagnose( - &self, + /// Parse the ClickHouse JSON response and extract S3Queue ingestion issues + /// + /// # Arguments + /// * `json_response` - The raw JSON string from ClickHouse + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues found in the response + pub fn parse_response( + json_response: &str, component: &Component, - _engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; + db_name: &str, + ) -> Result, DiagnosticError> { + let json_value: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; - // Check for S3Queue ingestion errors - let query = format!( - "SELECT - file_name, - status, - processing_start_time, - processing_end_time, - exception - FROM system.s3queue_log - WHERE database = '{}' AND table = '{}' - AND status IN ('Failed', 'ProcessingFailed') - ORDER BY processing_start_time DESC - LIMIT 20 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing S3Queue query: {}", query); - - let result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let json_response: Value = serde_json::from_str(&result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let data = json_response + let data = json_value .get("data") .and_then(|v| v.as_array()) .ok_or_else(|| { - DiagnoseError::ParseError("Missing 'data' field in response".to_string()) + DiagnosticError::ParseError("Missing 'data' field in response".to_string()) })?; let mut issues = Vec::new(); @@ -109,11 +84,11 @@ impl DiagnosticProvider for S3QueueDiagnostic { related_queries: vec![ format!( "SELECT * FROM system.s3queue_log WHERE database = '{}' AND table = '{}' ORDER BY processing_start_time DESC LIMIT 50", - config.db_name, component.name + db_name, component.name ), format!( "SELECT * FROM system.s3queue WHERE database = '{}' AND table = '{}'", - config.db_name, component.name + db_name, component.name ), ], }); @@ -122,3 +97,55 @@ impl DiagnosticProvider for S3QueueDiagnostic { Ok(issues) } } + +#[async_trait::async_trait] +impl DiagnosticProvider for S3QueueDiagnostic { + fn name(&self) -> &str { + "S3QueueDiagnostic" + } + + fn applicable_to(&self, _component: &Component, engine: Option<&ClickhouseEngine>) -> bool { + // Only applicable to S3Queue tables + matches!(engine, Some(ClickhouseEngine::S3Queue { .. })) + } + + async fn diagnose( + &self, + component: &Component, + _engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + // Check for S3Queue ingestion errors + let query = format!( + "SELECT + file_name, + status, + processing_start_time, + processing_end_time, + exception + FROM system.s3queue_log + WHERE database = '{}' AND table = '{}' + AND status IN ('Failed', 'ProcessingFailed') + ORDER BY processing_start_time DESC + LIMIT 20 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing S3Queue query: {}", query); + + let result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + Self::parse_response(&result, component, &config.db_name) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs new file mode 100644 index 0000000000..397e44b907 --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs @@ -0,0 +1,273 @@ +//! Diagnostic provider for checking stopped operations (merges, replication) + +use log::debug; +use serde_json::{json, Map, Value}; + +use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; +use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + +/// Query timeout for diagnostic checks (30 seconds) +const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; + +/// Diagnostic provider for checking stopped operations (merges, replication) +/// +/// Use `StoppedOperationsDiagnostic::new()` or `Default::default()` to construct. +#[derive(Default)] +pub struct StoppedOperationsDiagnostic(()); + +impl StoppedOperationsDiagnostic { + /// Create a new StoppedOperationsDiagnostic provider + pub const fn new() -> Self { + Self(()) + } + + /// Parse parts count and merge count to detect stopped merges + /// + /// # Arguments + /// * `parts_json_response` - JSON response from parts count query + /// * `merges_json_response` - JSON response from merges count query + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues if merges appear to be stopped + pub fn parse_stopped_merges_response( + parts_json_response: &str, + merges_json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let parts_json: Value = serde_json::from_str(parts_json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let parts_count = parts_json + .get("data") + .and_then(|v| v.as_array()) + .and_then(|arr| arr.first()) + .and_then(|row| row.get("part_count")) + .and_then(|v| v.as_u64()) + .unwrap_or(0); + + let mut issues = Vec::new(); + + // If we have many parts, check if merges are running + if parts_count > 100 { + let merges_json: Value = serde_json::from_str(merges_json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let merge_count = merges_json + .get("data") + .and_then(|v| v.as_array()) + .and_then(|arr| arr.first()) + .and_then(|row| row.get("merge_count")) + .and_then(|v| v.as_u64()) + .unwrap_or(0); + + // If we have excessive parts but no merges running, merges might be stopped + if merge_count == 0 { + let mut details = Map::new(); + details.insert("part_count".to_string(), json!(parts_count)); + details.insert("active_merges".to_string(), json!(0)); + + issues.push(Issue { + severity: Severity::Warning, + source: "system.parts,system.merges".to_string(), + component: component.clone(), + error_type: "merges_possibly_stopped".to_string(), + message: format!( + "Table has {} active parts but no running merges. Merges may be stopped or throttled.", + parts_count + ), + details, + suggested_action: format!( + "Check if merges were manually stopped with 'SELECT * FROM system.settings WHERE name LIKE \"%merge%\"'. Start merges if needed: 'SYSTEM START MERGES {}.{}'", + db_name, component.name + ), + related_queries: vec![ + format!( + "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND active = 1 ORDER BY modification_time DESC LIMIT 20", + db_name, component.name + ), + format!( + "SYSTEM START MERGES {}.{}", + db_name, component.name + ), + ], + }); + } + } + + Ok(issues) + } + + /// Parse replica status to detect stopped replication + /// + /// # Arguments + /// * `json_response` - JSON response from replicas query + /// * `component` - The component being diagnosed + /// * `db_name` - Database name for generating related queries + /// + /// # Returns + /// Vector of issues if replication appears to be stopped + pub fn parse_stopped_replication_response( + json_response: &str, + component: &Component, + db_name: &str, + ) -> Result, DiagnosticError> { + let replicas_json: Value = serde_json::from_str(json_response) + .map_err(|e| DiagnosticError::ParseError(format!("{}", e)))?; + + let mut issues = Vec::new(); + + if let Some(replica_data) = replicas_json.get("data").and_then(|v| v.as_array()) { + for row in replica_data { + let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); + let queue_size = row.get("queue_size").and_then(|v| v.as_u64()).unwrap_or(0); + + // If replica is readonly with items in queue, replication might be stopped + if is_readonly == 1 && queue_size > 0 { + let mut details = Map::new(); + details.insert("is_readonly".to_string(), json!(true)); + details.insert("queue_size".to_string(), json!(queue_size)); + + issues.push(Issue { + severity: Severity::Error, + source: "system.replicas".to_string(), + component: component.clone(), + error_type: "replication_stopped".to_string(), + message: format!( + "Replica is in read-only mode with {} items in queue. Replication may be stopped.", + queue_size + ), + details, + suggested_action: format!( + "Investigate why replica is read-only. Try restarting replication: 'SYSTEM START REPLICATION QUEUES {}.{}'", + db_name, component.name + ), + related_queries: vec![ + format!( + "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", + db_name, component.name + ), + format!( + "SYSTEM START REPLICATION QUEUES {}.{}", + db_name, component.name + ), + ], + }); + } + } + } + + Ok(issues) + } +} + +#[async_trait::async_trait] +impl DiagnosticProvider for StoppedOperationsDiagnostic { + fn name(&self) -> &str { + "stopped_operations" + } + + fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { + // Applicable to all tables - we check both merges and replication + true + } + + async fn diagnose( + &self, + component: &Component, + engine: Option<&ClickhouseEngine>, + config: &ClickHouseConfig, + _since: Option<&str>, + ) -> Result, DiagnosticError> { + let client = ClickHouseClient::new(config) + .map_err(|e| DiagnosticError::ConnectionFailed(format!("{}", e)))?; + + let mut issues = Vec::new(); + + // Check if merges are stopped for this table + // We can detect this by checking if there are no running merges but many parts + let parts_count_query = format!( + "SELECT count() as part_count + FROM system.parts + WHERE database = '{}' AND table = '{}' AND active = 1 + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing parts count query: {}", parts_count_query); + + let parts_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&parts_count_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + let merges_query = format!( + "SELECT count() as merge_count + FROM system.merges + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing merges query: {}", merges_query); + + let merges_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&merges_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_stopped_merges_response( + &parts_result, + &merges_result, + component, + &config.db_name, + )?); + + // For replicated tables, check if replication queues are stopped + let is_replicated = matches!( + engine, + Some(ClickhouseEngine::ReplicatedMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) + | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) + ); + + if is_replicated { + let replicas_query = format!( + "SELECT is_readonly, queue_size + FROM system.replicas + WHERE database = '{}' AND table = '{}' + FORMAT JSON", + config.db_name, component.name + ); + + debug!("Executing replicas query: {}", replicas_query); + + let replicas_result = tokio::time::timeout( + std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), + client.execute_sql(&replicas_query), + ) + .await + .map_err(|_| DiagnosticError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? + .map_err(|e| DiagnosticError::QueryFailed(format!("{}", e)))?; + + issues.extend(Self::parse_stopped_replication_response( + &replicas_result, + component, + &config.db_name, + )?); + } + + Ok(issues) + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 3be21d87de..19c07ad5cb 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -66,6 +66,7 @@ use crate::project::Project; pub mod client; pub mod config; +pub mod diagnostics; pub mod diff_strategy; pub mod errors; pub mod inserter; diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs index c7bf079cc6..49fdbcabc6 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs +++ b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs @@ -7,120 +7,12 @@ //! Initial implementation focuses on ClickHouse diagnostics with extensible architecture //! for future infrastructure types. //! -//! ## ClickHouse Diagnostic Providers -//! -//! The tool automatically runs multiple diagnostic providers based on table engine types: -//! -//! ### 1. MutationDiagnostic -//! Detects stuck or failing mutations (ALTER operations) that can block table maintenance. -//! - **Source**: `system.mutations` -//! - **Detection**: Mutations not done (is_done = 0) or with non-empty failure reasons -//! - **Thresholds**: -//! - Error: Mutation has a failure reason (latest_fail_reason not empty) -//! - Warning: Mutation in progress but not completed (is_done = 0) -//! - **Suggested Action**: Cancel stuck mutations with KILL MUTATION -//! -//! ### 2. PartsDiagnostic -//! Identifies excessive data parts per partition that impact query performance. -//! - **Source**: `system.parts` -//! - **Detection**: Active parts count per partition > 100 -//! - **Thresholds**: -//! - Error: part_count > 300 -//! - Warning: 100 < part_count ≤ 300 -//! - **Suggested Action**: Run OPTIMIZE TABLE to merge parts -//! -//! ### 3. MergeDiagnostic -//! Monitors long-running background merges. -//! - **Source**: `system.merges` -//! - **Detection**: Merges running > 300 seconds -//! - **Thresholds**: -//! - Error: elapsed_time > 1800s (30 minutes) -//! - Warning: 300s < elapsed_time ≤ 1800s -//! - **Note**: Progress is tracked and reported but not used in severity determination -//! - **Suggested Action**: Monitor merge progress and check server resources (CPU, disk I/O, memory) -//! -//! ### 4. ErrorStatsDiagnostic -//! Aggregates errors from ClickHouse system.errors to surface recurring issues. -//! - **Source**: `system.errors` -//! - **Detection**: All errors with count > 0 (reports top 10 by occurrence) -//! - **Thresholds**: -//! - Error: error_count > 100 -//! - Warning: error_count > 10 -//! - Info: 0 < error_count ≤ 10 -//! - **Suggested Action**: Review error messages and recent system changes -//! -//! ### 5. S3QueueDiagnostic (S3Queue tables only) -//! Detects S3Queue ingestion failures and processing issues. -//! - **Source**: `system.s3queue_log` -//! - **Detection**: Failed or ProcessingFailed status entries in S3Queue log -//! - **Threshold**: All failed entries trigger Error severity -//! - **Suggested Action**: Check S3 credentials, permissions, and file formats -//! -//! ### 6. ReplicationDiagnostic (Replicated* tables only) -//! Monitors replication health, queue backlogs, and stuck replication entries. -//! - **Sources**: `system.replication_queue`, `system.replicas` -//! - **Detection**: -//! - Large queue backlogs (queue_size > 10 or > 100 for replicas health) -//! - Stuck entries (num_tries > 3 or has exceptions) -//! - Replica health issues (readonly, session_expired, high delay > 300s) -//! - **Thresholds**: -//! - Error: queue_size > 50, num_tries > 10, session_expired, delay > 600s -//! - Warning: queue_size > 10, 3 < num_tries ≤ 10, readonly, 300s < delay ≤ 600s -//! - **Suggested Action**: Check ZooKeeper connectivity, restart replication queues -//! -//! ### 7. MergeFailureDiagnostic -//! Detects system-wide background merge failures that may affect multiple tables. -//! - **Source**: `system.metrics` -//! - **Detection**: FailedBackgroundMerges metric > 0 -//! - **Thresholds**: -//! - Error: failed_merges > 10 -//! - Warning: failed_merges > 0 -//! - **Suggested Action**: Check system.errors for merge failure details, review disk space -//! -//! ### 8. StoppedOperationsDiagnostic -//! Identifies manually stopped or stalled merge/replication operations. -//! - **Sources**: `system.parts`, `system.merges`, `system.replicas` -//! - **Detection**: -//! - Many parts (>100) but no active merges -//! - Replica readonly with pending queue items -//! - **Thresholds**: -//! - Error: Replica readonly with queue items (replication stopped) -//! - Warning: Excessive parts with no merges (merges possibly stopped) -//! - **Suggested Action**: Run SYSTEM START MERGES or SYSTEM START REPLICATION QUEUES -//! -//! ## Query Timeout -//! All diagnostic queries have a 30-second timeout to prevent blocking on slow queries. -//! -//! ## Filtering Options -//! - **Component Filter**: Regex pattern to target specific tables/components -//! - **Severity Filter**: Filter by error, warning, or info (default: info shows all) -//! - **Time Filter**: Filter issues by time range (e.g., "-1h" for last hour) -//! -//! ## Output Format -//! Returns structured JSON with: -//! - \`severity\`: error, warning, or info -//! - \`source\`: System table(s) queried -//! - \`component\`: Affected table/component -//! - \`error_type\`: Category of issue -//! - \`message\`: Human-readable description -//! - \`details\`: Additional context (counts, values) -//! - \`suggested_action\`: Remediation steps -//! - \`related_queries\`: Diagnostic and fix queries - -// Diagnostic provider modules -mod errors; -mod merge_failures; -mod merges; -mod mutations; -mod parts; -mod replication; -mod s3queue; -mod stopped_operations; +//! See the shared `crate::infrastructure::olap::clickhouse::diagnostics` module for +//! detailed documentation on each diagnostic provider. use log::{debug, info}; use regex::Regex; use rmcp::model::{CallToolResult, Tool}; -use serde::{Deserialize, Serialize}; use serde_json::{json, Map, Value}; use std::collections::HashMap; use std::sync::Arc; @@ -128,36 +20,19 @@ use std::sync::Arc; use super::{create_error_result, create_success_result}; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; +use crate::infrastructure::olap::clickhouse::diagnostics::{ + Component, DiagnosticOptions, DiagnosticOutput, DiagnosticRequest, InfrastructureType, Severity, +}; use crate::infrastructure::redis::redis_client::RedisClient; -// Re-export diagnostic providers -pub use errors::ErrorStatsDiagnostic; -pub use merge_failures::MergeFailureDiagnostic; -pub use merges::MergeDiagnostic; -pub use mutations::MutationDiagnostic; -pub use parts::PartsDiagnostic; -pub use replication::ReplicationDiagnostic; -pub use s3queue::S3QueueDiagnostic; -pub use stopped_operations::StoppedOperationsDiagnostic; - -/// Error types for infrastructure diagnostic operations +/// Error types for MCP infrastructure diagnostic operations #[derive(Debug, thiserror::Error)] pub enum DiagnoseError { #[error("Failed to load infrastructure map: {0}")] InfraMapLoad(#[from] anyhow::Error), - #[error("Failed to connect to ClickHouse: {0}")] - ClickHouseConnection(String), - - #[error("Failed to execute diagnostic query: {0}")] - QueryFailed(String), - - #[error("Query timeout after {0} seconds")] - QueryTimeout(u64), - - #[error("Failed to parse query result: {0}")] - ParseError(String), + #[error("Failed to execute diagnostics: {0}")] + DiagnosticFailed(String), #[error("Invalid parameter: {0}")] InvalidParameter(String), @@ -173,25 +48,6 @@ pub enum DiagnoseError { UnsupportedInfrastructureType(String), } -/// Infrastructure type enum -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum InfrastructureType { - ClickHouse, - // Future support: - // Kafka, - // Temporal, -} - -impl InfrastructureType { - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "clickhouse" => Ok(InfrastructureType::ClickHouse), - _ => Err(DiagnoseError::UnsupportedInfrastructureType(s.to_string())), - } - } -} - /// Component filter for targeting specific infrastructure components #[derive(Debug, Clone)] pub struct ComponentFilter { @@ -201,13 +57,17 @@ pub struct ComponentFilter { pub component_name: Option, } -/// Severity level for issues -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum Severity { - Error, - Warning, - Info, +/// Parameters for the diagnose_infrastructure MCP tool +#[derive(Debug)] +pub struct DiagnoseInfraParams { + /// Which infrastructure type to diagnose + pub infrastructure_type: InfrastructureType, + /// Optional filter for specific components + pub component_filter: Option, + /// Minimum severity level to report + pub severity: Severity, + /// Optional time filter (e.g., "-1h" for last hour) + pub since: Option, } impl Severity { @@ -223,126 +83,17 @@ impl Severity { ))), } } - - /// Check if this severity should include issues of the given level - fn includes(&self, other: &Severity) -> bool { - match self { - Severity::Info => true, // Info includes all severities - Severity::Warning => matches!(other, Severity::Warning | Severity::Error), - Severity::Error => matches!(other, Severity::Error), - } - } -} - -/// Parameters for the diagnose_infrastructure tool -#[derive(Debug)] -pub struct DiagnoseInfraParams { - /// Which infrastructure type to diagnose - pub infrastructure_type: InfrastructureType, - /// Optional filter for specific components - pub component_filter: Option, - /// Minimum severity level to report - pub severity: Severity, - /// Optional time filter (e.g., "-1h" for last hour) - pub since: Option, -} - -/// Component information for issue context -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Component { - pub component_type: String, - pub name: String, - /// Flexible metadata for component-specific context (e.g., database, namespace, cluster) - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub metadata: HashMap, -} - -/// Detailed information about an infrastructure issue -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Issue { - pub severity: Severity, - pub source: String, - pub component: Component, - pub error_type: String, - pub message: String, - pub details: Map, - pub suggested_action: String, - pub related_queries: Vec, -} - -/// Summary statistics for diagnostic results -#[derive(Debug, Serialize, Deserialize)] -pub struct IssueSummary { - pub total_issues: usize, - pub by_severity: HashMap, - pub by_component: HashMap, } -/// Complete diagnostic output -#[derive(Debug, Serialize, Deserialize)] -pub struct DiagnosticOutput { - pub infrastructure_type: InfrastructureType, - pub issues: Vec, - pub summary: IssueSummary, -} - -impl DiagnosticOutput { - /// Create a new diagnostic output and compute summary statistics - pub fn new(infrastructure_type: InfrastructureType, issues: Vec) -> Self { - let mut by_severity = HashMap::new(); - let mut by_component = HashMap::new(); - - for issue in &issues { - let severity_key = format!("{:?}", issue.severity).to_lowercase(); - *by_severity.entry(severity_key).or_insert(0) += 1; - - let component_key = issue.component.name.clone(); - *by_component.entry(component_key).or_insert(0) += 1; - } - - let summary = IssueSummary { - total_issues: issues.len(), - by_severity, - by_component, - }; - - Self { - infrastructure_type, - issues, - summary, +impl InfrastructureType { + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "clickhouse" => Ok(InfrastructureType::ClickHouse), + _ => Err(DiagnoseError::UnsupportedInfrastructureType(s.to_string())), } } } -/// Trait for ClickHouse diagnostic providers -/// Each provider implements checks for a specific aspect of ClickHouse infrastructure health -/// -/// Note: Currently ClickHouse-specific. Will need refactoring to support other -/// infrastructure types (Kafka, Temporal, etc.) in the future. -#[async_trait::async_trait] -pub trait DiagnosticProvider: Send + Sync { - /// Name of this diagnostic provider - fn name(&self) -> &str; - - /// Check if this provider is applicable to the given component - fn applicable_to(&self, component: &Component, engine: Option<&ClickhouseEngine>) -> bool; - - /// Check if this provider is system-wide (not component-specific) - /// System-wide providers are run once, not per-component - fn is_system_wide(&self) -> bool { - false - } - - /// Run diagnostics and return list of issues found - async fn diagnose( - &self, - component: &Component, - engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - since: Option<&str>, - ) -> Result, DiagnoseError>; -} - /// Returns the tool definition for the MCP server pub fn tool_definition() -> Tool { let schema = json!({ @@ -387,7 +138,7 @@ pub fn tool_definition() -> Tool { Tool { name: "diagnose_infrastructure".into(), description: Some( - "Proactively diagnose infrastructure issues by intelligently checking relevant diagnostic sources based on infrastructure type. For ClickHouse, automatically checks: stuck mutations, S3Queue ingestion errors (for S3Queue tables), replication health (for replicated tables), data parts issues, background merge problems, system errors, and Docker container logs. Returns structured, actionable information about errors and warnings with suggested remediation steps.".into() + "Proactively diagnose infrastructure issues by intelligently checking relevant diagnostic sources based on infrastructure type. For ClickHouse, automatically checks: stuck mutations, S3Queue ingestion errors (for S3Queue tables), replication health (for replicated tables), data parts issues, background merge problems, system errors, and stopped operations. Returns structured, actionable information about errors and warnings with suggested remediation steps.".into() ), input_schema: Arc::new(schema.as_object().unwrap().clone()), annotations: None, @@ -463,7 +214,7 @@ fn parse_params( }) } -/// Handle the tool call with the given arguments +/// Handle the MCP tool call with the given arguments pub async fn handle_call( arguments: Option<&Map>, redis_client: Arc, @@ -504,7 +255,7 @@ async fn execute_diagnose_infrastructure( } } -/// Diagnose ClickHouse infrastructure +/// Diagnose ClickHouse infrastructure using the shared diagnostics module async fn diagnose_clickhouse( params: DiagnoseInfraParams, redis_client: Arc, @@ -547,124 +298,46 @@ async fn diagnose_clickhouse( debug!("Checking {} tables for issues", tables_to_check.len()); - // Create diagnostic providers - let providers = create_clickhouse_providers(); - - // Separate component-specific and system-wide providers - let component_providers: Vec<_> = providers.iter().filter(|p| !p.is_system_wide()).collect(); - let system_wide_providers: Vec<_> = providers.iter().filter(|p| p.is_system_wide()).collect(); - - // Run diagnostics for each table - let mut all_issues = Vec::new(); - - for (_map_key, table) in tables_to_check { - let mut metadata = HashMap::new(); - metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); + // Build DiagnosticRequest with components from infrastructure map + let components: Vec<_> = tables_to_check + .iter() + .map(|(_map_key, table)| { + let mut metadata = HashMap::new(); + metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); - let component = Component { - component_type: "table".to_string(), - name: table.name.clone(), // Use the actual table name, not the infra map key - metadata, - }; + let component = Component { + component_type: "table".to_string(), + name: table.name.clone(), // Use the actual table name + metadata, + }; - let engine = table.engine.as_ref(); - - // Run each applicable component-specific provider - for provider in &component_providers { - if provider.applicable_to(&component, engine) { - debug!( - "Running {} diagnostic for table {}", - provider.name(), - table.name - ); - - match provider - .diagnose( - &component, - engine, - clickhouse_config, - params.since.as_deref(), - ) - .await - { - Ok(mut issues) => { - // Filter by severity - issues.retain(|issue| params.severity.includes(&issue.severity)); - all_issues.extend(issues); - } - Err(e) => { - debug!( - "Provider {} failed for table {}: {}", - provider.name(), - table.name, - e - ); - // Continue with other providers even if one fails - } - } - } - } - } - - // Run system-wide diagnostics once - let mut system_metadata = HashMap::new(); - system_metadata.insert("database".to_string(), clickhouse_config.db_name.clone()); + (component, table.engine.clone()) + }) + .collect(); - let system_component = Component { - component_type: "system".to_string(), - name: "clickhouse".to_string(), - metadata: system_metadata, + let request = DiagnosticRequest { + components, + options: DiagnosticOptions { + diagnostic_names: Vec::new(), // Run all diagnostics + min_severity: params.severity, + since: params.since, + }, }; - for provider in system_wide_providers { - debug!("Running system-wide {} diagnostic", provider.name()); - - match provider - .diagnose( - &system_component, - None, - clickhouse_config, - params.since.as_deref(), - ) - .await - { - Ok(mut issues) => { - // Filter by severity - issues.retain(|issue| params.severity.includes(&issue.severity)); - all_issues.extend(issues); - } - Err(e) => { - debug!("System-wide provider {} failed: {}", provider.name(), e); - // Continue with other providers even if one fails - } - } - } - - // TODO: Add Docker logs diagnostic (not component-specific) + // Use the shared run_diagnostics function + let output = crate::infrastructure::olap::clickhouse::diagnostics::run_diagnostics( + request, + clickhouse_config, + ) + .await + .map_err(|e| DiagnoseError::DiagnosticFailed(format!("{}", e)))?; info!( "Infrastructure diagnostics complete. Found {} issues.", - all_issues.len() + output.issues.len() ); - Ok(DiagnosticOutput::new( - InfrastructureType::ClickHouse, - all_issues, - )) -} - -/// Create all ClickHouse diagnostic providers -fn create_clickhouse_providers() -> Vec> { - vec![ - Box::new(MutationDiagnostic), - Box::new(PartsDiagnostic), - Box::new(MergeDiagnostic), - Box::new(ErrorStatsDiagnostic), - Box::new(S3QueueDiagnostic), - Box::new(ReplicationDiagnostic), - Box::new(MergeFailureDiagnostic), - Box::new(StoppedOperationsDiagnostic), - ] + Ok(output) } #[cfg(test)] @@ -699,24 +372,20 @@ mod tests { #[test] fn test_severity_includes() { - let error = Severity::Error; - let warning = Severity::Warning; - let info = Severity::Info; - - // Error only includes error - assert!(error.includes(&Severity::Error)); - assert!(!error.includes(&Severity::Warning)); - assert!(!error.includes(&Severity::Info)); + // Info includes all + assert!(Severity::Info.includes(&Severity::Error)); + assert!(Severity::Info.includes(&Severity::Warning)); + assert!(Severity::Info.includes(&Severity::Info)); // Warning includes warning and error - assert!(warning.includes(&Severity::Error)); - assert!(warning.includes(&Severity::Warning)); - assert!(!warning.includes(&Severity::Info)); - - // Info includes everything - assert!(info.includes(&Severity::Error)); - assert!(info.includes(&Severity::Warning)); - assert!(info.includes(&Severity::Info)); + assert!(Severity::Warning.includes(&Severity::Error)); + assert!(Severity::Warning.includes(&Severity::Warning)); + assert!(!Severity::Warning.includes(&Severity::Info)); + + // Error includes only error + assert!(Severity::Error.includes(&Severity::Error)); + assert!(!Severity::Error.includes(&Severity::Warning)); + assert!(!Severity::Error.includes(&Severity::Info)); } #[test] @@ -724,18 +393,20 @@ mod tests { let args = json!({ "infrastructure_type": "clickhouse" }); - let map = args.as_object().unwrap(); - let result = parse_params(Some(map)); - assert!(result.is_ok()); - let params = result.unwrap(); - assert_eq!(params.infrastructure_type, InfrastructureType::ClickHouse); + + let params = parse_params(args.as_object()).unwrap(); + + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); assert!(params.component_filter.is_none()); - assert_eq!(params.severity, Severity::Info); + assert!(matches!(params.severity, Severity::Info)); // Default assert!(params.since.is_none()); } #[test] - fn test_parse_params_with_filter() { + fn test_parse_params_full() { let args = json!({ "infrastructure_type": "clickhouse", "component_filter": { @@ -745,114 +416,149 @@ mod tests { "severity": "error", "since": "-1h" }); - let map = args.as_object().unwrap(); - let result = parse_params(Some(map)); - assert!(result.is_ok()); - let params = result.unwrap(); - assert!(params.component_filter.is_some()); + + let params = parse_params(args.as_object()).unwrap(); + + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); + assert!(matches!(params.severity, Severity::Error)); + assert_eq!(params.since, Some("-1h".to_string())); + let filter = params.component_filter.unwrap(); assert_eq!(filter.component_type, Some("table".to_string())); assert!(filter.component_name.is_some()); - assert_eq!(params.severity, Severity::Error); - assert_eq!(params.since, Some("-1h".to_string())); + + let regex = filter.component_name.unwrap(); + assert!(regex.is_match("user_events")); + assert!(regex.is_match("user_profiles")); + assert!(!regex.is_match("events")); } #[test] - fn test_parse_params_invalid() { - // Missing required parameter - let args = json!({}); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - - // Invalid infrastructure type - let args = json!({"infrastructure_type": "invalid"}); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - - // Invalid severity + fn test_parse_params_component_filter_type_only() { let args = json!({ "infrastructure_type": "clickhouse", - "severity": "invalid" + "component_filter": { + "component_type": "view" + } }); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); - // Invalid regex pattern + let params = parse_params(args.as_object()).unwrap(); + + let filter = params.component_filter.unwrap(); + assert_eq!(filter.component_type, Some("view".to_string())); + assert!(filter.component_name.is_none()); + } + + #[test] + fn test_parse_params_component_filter_name_only() { let args = json!({ "infrastructure_type": "clickhouse", "component_filter": { - "component_name": "[invalid" + "component_name": "events" } }); - let map = args.as_object().unwrap(); - assert!(parse_params(Some(map)).is_err()); + + let params = parse_params(args.as_object()).unwrap(); + + let filter = params.component_filter.unwrap(); + assert!(filter.component_type.is_none()); + assert!(filter.component_name.is_some()); } #[test] - fn test_diagnostic_output_summary() { - let issues = vec![ - Issue { - severity: Severity::Error, - source: "system.mutations".to_string(), - component: Component { - component_type: "table".to_string(), - name: "users".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "stuck_mutation".to_string(), - message: "Mutation stuck".to_string(), - details: Map::new(), - suggested_action: "Kill mutation".to_string(), - related_queries: vec![], - }, - Issue { - severity: Severity::Warning, - source: "system.parts".to_string(), - component: Component { - component_type: "table".to_string(), - name: "users".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "excessive_parts".to_string(), - message: "Too many parts".to_string(), - details: Map::new(), - suggested_action: "Optimize table".to_string(), - related_queries: vec![], - }, - Issue { - severity: Severity::Error, - source: "system.mutations".to_string(), - component: Component { - component_type: "table".to_string(), - name: "orders".to_string(), - metadata: { - let mut m = HashMap::new(); - m.insert("database".to_string(), "test_db".to_string()); - m - }, - }, - error_type: "failed_mutation".to_string(), - message: "Mutation failed".to_string(), - details: Map::new(), - suggested_action: "Check logs".to_string(), - related_queries: vec![], - }, - ]; + fn test_parse_params_invalid_regex() { + let args = json!({ + "infrastructure_type": "clickhouse", + "component_filter": { + "component_name": "[invalid(regex" + } + }); + + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidRegex { .. }))); + + if let Err(DiagnoseError::InvalidRegex { pattern, .. }) = result { + assert_eq!(pattern, "[invalid(regex"); + } + } + + #[test] + fn test_parse_params_invalid_infrastructure_type() { + let args = json!({ + "infrastructure_type": "kafka" + }); + + let result = parse_params(args.as_object()); + assert!(matches!( + result, + Err(DiagnoseError::UnsupportedInfrastructureType(_)) + )); + } + + #[test] + fn test_parse_params_invalid_severity() { + let args = json!({ + "infrastructure_type": "clickhouse", + "severity": "critical" + }); + + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } - let output = DiagnosticOutput::new(InfrastructureType::ClickHouse, issues); + #[test] + fn test_parse_params_missing_infrastructure_type() { + let args = json!({ + "severity": "error" + }); + + let result = parse_params(args.as_object()); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } + + #[test] + fn test_parse_params_no_arguments() { + let result = parse_params(None); + assert!(matches!(result, Err(DiagnoseError::InvalidParameter(_)))); + } - assert_eq!(output.summary.total_issues, 3); - assert_eq!(output.summary.by_severity.get("error"), Some(&2)); - assert_eq!(output.summary.by_severity.get("warning"), Some(&1)); - assert_eq!(output.summary.by_component.get("users"), Some(&2)); - assert_eq!(output.summary.by_component.get("orders"), Some(&1)); + #[test] + fn test_parse_params_all_severity_variants() { + for (severity_str, expected) in [ + ("error", Severity::Error), + ("warning", Severity::Warning), + ("info", Severity::Info), + ("all", Severity::Info), // "all" maps to Info + ] { + let args = json!({ + "infrastructure_type": "clickhouse", + "severity": severity_str + }); + + let params = parse_params(args.as_object()).unwrap(); + assert_eq!( + params.severity, expected, + "Failed for severity: {}", + severity_str + ); + } + } + + #[test] + fn test_parse_params_case_insensitive() { + let args = json!({ + "infrastructure_type": "CLICKHOUSE", + "severity": "ERROR" + }); + + let params = parse_params(args.as_object()).unwrap(); + assert!(matches!( + params.infrastructure_type, + InfrastructureType::ClickHouse + )); + assert!(matches!(params.severity, Severity::Error)); } } diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs b/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs deleted file mode 100644 index 62b6cb7d69..0000000000 --- a/apps/framework-cli/src/mcp/tools/infra_issues/stopped_operations.rs +++ /dev/null @@ -1,210 +0,0 @@ -//! Diagnostic provider for checking stopped operations (merges, replication) - -use log::debug; -use serde_json::{json, Map, Value}; - -use super::{Component, DiagnoseError, DiagnosticProvider, Issue, Severity}; -use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - -/// Query timeout for diagnostic checks (30 seconds) -const DIAGNOSTIC_QUERY_TIMEOUT_SECS: u64 = 30; - -/// Diagnostic provider for checking stopped operations (merges, replication) -pub struct StoppedOperationsDiagnostic; - -#[async_trait::async_trait] -impl DiagnosticProvider for StoppedOperationsDiagnostic { - fn name(&self) -> &str { - "stopped_operations" - } - - fn applicable_to(&self, _component: &Component, _engine: Option<&ClickhouseEngine>) -> bool { - // Applicable to all tables - we check both merges and replication - true - } - - async fn diagnose( - &self, - component: &Component, - engine: Option<&ClickhouseEngine>, - config: &ClickHouseConfig, - _since: Option<&str>, - ) -> Result, DiagnoseError> { - let client = ClickHouseClient::new(config) - .map_err(|e| DiagnoseError::ClickHouseConnection(format!("{}", e)))?; - - let mut issues = Vec::new(); - - // Check if merges are stopped for this table - // We can detect this by checking if there are no running merges but many parts - let parts_count_query = format!( - "SELECT count() as part_count - FROM system.parts - WHERE database = '{}' AND table = '{}' AND active = 1 - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing parts count query: {}", parts_count_query); - - let parts_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&parts_count_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let parts_json: Value = serde_json::from_str(&parts_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let parts_count = parts_json - .get("data") - .and_then(|v| v.as_array()) - .and_then(|arr| arr.first()) - .and_then(|row| row.get("part_count")) - .and_then(|v| v.as_u64()) - .unwrap_or(0); - - // If we have many parts, check if merges are running - if parts_count > 100 { - let merges_query = format!( - "SELECT count() as merge_count - FROM system.merges - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing merges query: {}", merges_query); - - let merges_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&merges_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let merges_json: Value = serde_json::from_str(&merges_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - let merge_count = merges_json - .get("data") - .and_then(|v| v.as_array()) - .and_then(|arr| arr.first()) - .and_then(|row| row.get("merge_count")) - .and_then(|v| v.as_u64()) - .unwrap_or(0); - - // If we have excessive parts but no merges running, merges might be stopped - if merge_count == 0 { - let mut details = Map::new(); - details.insert("part_count".to_string(), json!(parts_count)); - details.insert("active_merges".to_string(), json!(0)); - - issues.push(Issue { - severity: Severity::Warning, - source: "system.parts,system.merges".to_string(), - component: component.clone(), - error_type: "merges_possibly_stopped".to_string(), - message: format!( - "Table has {} active parts but no running merges. Merges may be stopped or throttled.", - parts_count - ), - details, - suggested_action: format!( - "Check if merges were manually stopped with 'SELECT * FROM system.settings WHERE name LIKE \"%merge%\"'. Start merges if needed: 'SYSTEM START MERGES {}.{}'", - config.db_name, component.name - ), - related_queries: vec![ - format!( - "SELECT * FROM system.parts WHERE database = '{}' AND table = '{}' AND active = 1 ORDER BY modification_time DESC LIMIT 20", - config.db_name, component.name - ), - format!( - "SYSTEM START MERGES {}.{}", - config.db_name, component.name - ), - ], - }); - } - } - - // For replicated tables, check if replication queues are stopped - let is_replicated = matches!( - engine, - Some(ClickhouseEngine::ReplicatedMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedReplacingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { .. }) - | Some(ClickhouseEngine::ReplicatedSummingMergeTree { .. }) - ); - - if is_replicated { - let replicas_query = format!( - "SELECT is_readonly, queue_size - FROM system.replicas - WHERE database = '{}' AND table = '{}' - FORMAT JSON", - config.db_name, component.name - ); - - debug!("Executing replicas query: {}", replicas_query); - - let replicas_result = tokio::time::timeout( - std::time::Duration::from_secs(DIAGNOSTIC_QUERY_TIMEOUT_SECS), - client.execute_sql(&replicas_query), - ) - .await - .map_err(|_| DiagnoseError::QueryTimeout(DIAGNOSTIC_QUERY_TIMEOUT_SECS))? - .map_err(|e| DiagnoseError::QueryFailed(format!("{}", e)))?; - - let replicas_json: Value = serde_json::from_str(&replicas_result) - .map_err(|e| DiagnoseError::ParseError(format!("{}", e)))?; - - if let Some(replica_data) = replicas_json.get("data").and_then(|v| v.as_array()) { - for row in replica_data { - let is_readonly = row.get("is_readonly").and_then(|v| v.as_u64()).unwrap_or(0); - let queue_size = row.get("queue_size").and_then(|v| v.as_u64()).unwrap_or(0); - - // If replica is readonly with items in queue, replication might be stopped - if is_readonly == 1 && queue_size > 0 { - let mut details = Map::new(); - details.insert("is_readonly".to_string(), json!(true)); - details.insert("queue_size".to_string(), json!(queue_size)); - - issues.push(Issue { - severity: Severity::Error, - source: "system.replicas".to_string(), - component: component.clone(), - error_type: "replication_stopped".to_string(), - message: format!( - "Replica is in read-only mode with {} items in queue. Replication may be stopped.", - queue_size - ), - details, - suggested_action: format!( - "Investigate why replica is read-only. Try restarting replication: 'SYSTEM START REPLICATION QUEUES {}.{}'", - config.db_name, component.name - ), - related_queries: vec![ - format!( - "SELECT * FROM system.replicas WHERE database = '{}' AND table = '{}'", - config.db_name, component.name - ), - format!( - "SYSTEM START REPLICATION QUEUES {}.{}", - config.db_name, component.name - ), - ], - }); - } - } - } - } - - Ok(issues) - } -} From 46eed84511a65eea586b1d0637cd9eef0b08a33c Mon Sep 17 00:00:00 2001 From: George Anderson Date: Mon, 17 Nov 2025 11:35:43 -0800 Subject: [PATCH 23/59] Moose Plan backwards compatability (#2982) Old versions of moose might have engine null stored in redis, in which case we should use the default engine Keys stored in old versions did not have the database information for the tables --- > [!NOTE] > Makes `Table.engine` non-optional with default `MergeTree`, adds infra map normalization (e.g., order_by from primary keys, arrays required), and updates diff/diagnostics/generators to ensure stable backward-compatible plans. > > - **Core Infrastructure**: > - Make `Table.engine` non-optional (`ClickhouseEngine`, default `MergeTree`); always display/serialize engine and default on decode. > - Update `compute_non_alterable_params_hash`, `order_by_equals`, and proto conversions to reflect mandatory engine. > - **Infrastructure Map & Diff**: > - Add `InfrastructureMap::normalize()` (fill `order_by` from primary keys for MergeTree, force array columns `required=true`). > - Normalize remote/local maps before diff; compare using HashMap keys; adjust engine/order_by comparisons to non-optional engine. > - Resolve S3 credentials and recalc `engine_params_hash` accordingly. > - **ClickHouse Integration**: > - Parse engine from `CREATE TABLE`, default to `MergeTree`; mapper uses engine directly. > - Diff strategy: engine change detection simplified; S3Queue column changes force drop+create. > - Diagnostics API now carries a concrete engine (no `Option`). > - **Code Generation & Models**: > - TS/Python generators emit concrete engine configs; data model maps deduplicate to `ReplacingMergeTree` else `MergeTree`. > - **Tests/E2E**: > - Update tests/fixtures for mandatory engine; minor e2e log noise reduction. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 92bcb265ea3ac3c1d002d91c9378614cd84183b5. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../test/backward-compatibility.test.ts | 17 +- .../src/cli/display/infrastructure.rs | 9 +- apps/framework-cli/src/cli/local_webserver.rs | 3 +- .../framework-cli/src/cli/routines/migrate.rs | 3 +- apps/framework-cli/src/cli/routines/mod.rs | 12 + .../framework/core/infra_reality_checker.rs | 14 +- .../framework/core/infrastructure/table.rs | 42 +- .../src/framework/core/infrastructure_map.rs | 187 +++++--- .../core/partial_infrastructure_map.rs | 120 ++++-- apps/framework-cli/src/framework/core/plan.rs | 3 +- .../src/framework/core/plan_validator.rs | 13 +- .../src/framework/data_model/model.rs | 16 +- .../src/framework/python/generate.rs | 400 +++++++++--------- .../src/framework/typescript/generate.rs | 352 ++++++++------- .../olap/clickhouse/diagnostics/mod.rs | 10 +- .../olap/clickhouse/diff_strategy.rs | 54 ++- .../infrastructure/olap/clickhouse/mapper.rs | 6 +- .../src/infrastructure/olap/clickhouse/mod.rs | 12 +- .../infrastructure/olap/clickhouse/queries.rs | 2 + .../src/infrastructure/olap/ddl_ordering.rs | 48 +-- 20 files changed, 691 insertions(+), 632 deletions(-) diff --git a/apps/framework-cli-e2e/test/backward-compatibility.test.ts b/apps/framework-cli-e2e/test/backward-compatibility.test.ts index 28bb5daaf5..8e85887b5f 100644 --- a/apps/framework-cli-e2e/test/backward-compatibility.test.ts +++ b/apps/framework-cli-e2e/test/backward-compatibility.test.ts @@ -15,7 +15,7 @@ * 5. Asserts that no (or minimal expected) changes are detected * * This is critical for catching breaking changes in infrastructure map format, - * particularly changes like table ID prefixes with database names. + * particularly changes like table ID prefixes with database names and upgrades */ import { spawn, ChildProcess } from "child_process"; @@ -352,19 +352,13 @@ describe("Backward Compatibility Tests", function () { it("should show no changes when running moose plan with new CLI", async function () { this.timeout(TIMEOUTS.TEST_SETUP_MS); - console.log( - `\nRunning 'moose plan' with NEW CLI (${CLI_PATH}) on project initialized with latest published CLI...`, - ); - console.log( - "Querying running dev server (started with old CLI) to get infrastructure map", - ); - // Run moose plan with NEW CLI (querying the running server) // Use the same admin token that was configured for the old dev server try { const TEST_ADMIN_TOKEN = "deadbeefdeadbeefdeadbeefdeadbeef.0123456789abcdef0123456789abcdef"; - const { stdout, stderr } = await execAsync( + + const { stdout } = await execAsync( `"${CLI_PATH}" plan --url "http://localhost:4000" --token "${TEST_ADMIN_TOKEN}"`, { cwd: TEST_PROJECT_DIR, @@ -385,11 +379,6 @@ describe("Backward Compatibility Tests", function () { }, ); - console.log("moose plan stdout:", stdout); - if (stderr) { - console.log("moose plan stderr:", stderr); - } - // Strip ANSI color codes from the output for reliable parsing const stripAnsi = (str: string) => str.replace(/\x1b\[[0-9;]*m/g, ""); const cleanOutput = stripAnsi(stdout); diff --git a/apps/framework-cli/src/cli/display/infrastructure.rs b/apps/framework-cli/src/cli/display/infrastructure.rs index 64769ea83f..88e561b16b 100644 --- a/apps/framework-cli/src/cli/display/infrastructure.rs +++ b/apps/framework-cli/src/cli/display/infrastructure.rs @@ -241,10 +241,11 @@ fn format_table_display( details.push(format!("Cluster: {}", cluster)); } - // Engine section (if present) - if let Some(ref engine) = table.engine { - details.push(format!("Engine: {}", Into::::into(engine.clone()))); - } + // Engine section + details.push(format!( + "Engine: {}", + Into::::into(table.engine.clone()) + )); (title, details) } diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 87e3a8ed90..5df1c173f8 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3525,6 +3525,7 @@ mod tests { use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::versions::Version; use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; fn create_test_table(name: &str) -> Table { Table { @@ -3543,7 +3544,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 8eb2e87bfd..0e9b4ae33a 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -737,6 +737,7 @@ mod tests { use crate::framework::core::infrastructure::table::{Column, ColumnType, OrderBy}; use crate::framework::core::infrastructure_map::PrimitiveSignature; use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; /// Helper to create a minimal test table fn create_test_table(name: &str) -> Table { @@ -764,7 +765,7 @@ mod tests { primitive_type: crate::framework::core::infrastructure_map::PrimitiveTypes::DataModel, }, - engine: None, + engine: ClickhouseEngine::MergeTree, metadata: None, life_cycle: LifeCycle::FullyManaged, engine_params_hash: None, diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 38a49280b8..3473c65f30 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -1063,6 +1063,12 @@ pub async fn remote_plan( } }; + // Normalize both infra maps for backward compatibility + // This ensures consistent comparison between old and new CLI versions + // by applying the same normalization logic (e.g., filling order_by from primary key) + let remote_infra_map = remote_infra_map.normalize(); + let local_infra_map = local_infra_map.normalize(); + // Calculate and display changes let changes = calculate_plan_diff_local( &remote_infra_map, @@ -1167,6 +1173,12 @@ pub async fn remote_gen_migration( } }; + // Normalize both infra maps for backward compatibility + // This ensures consistent comparison between old and new CLI versions + // by applying the same normalization logic (e.g., filling order_by from primary key) + let remote_infra_map = remote_infra_map.normalize(); + let local_infra_map = local_infra_map.normalize(); + let changes = calculate_plan_diff_local( &remote_infra_map, &local_infra_map, diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 80b1f6458b..742d481385 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -407,7 +407,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -634,11 +634,11 @@ mod tests { let mut infra_table = create_base_table("test_table"); // Set different engine values - actual_table.engine = Some(ClickhouseEngine::ReplacingMergeTree { + actual_table.engine = ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }); - infra_table.engine = None; + }; + infra_table.engine = ClickhouseEngine::MergeTree; let mock_client = MockOlapClient { tables: vec![Table { @@ -681,10 +681,10 @@ mod tests { match &discrepancies.mismatched_tables[0] { OlapChange::Table(TableChange::Updated { before, after, .. }) => { assert!(matches!( - before.engine.as_ref(), - Some(ClickhouseEngine::ReplacingMergeTree { .. }) + &before.engine, + ClickhouseEngine::ReplacingMergeTree { .. } )); - assert_eq!(after.engine.as_ref(), None); + assert!(matches!(&after.engine, ClickhouseEngine::MergeTree)); } _ => panic!("Expected TableChange::Updated variant"), } diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index 51b5d28982..200251a56b 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -275,7 +275,7 @@ pub struct Table { #[serde(skip_serializing_if = "Option::is_none", default)] pub sample_by: Option, #[serde(default)] - pub engine: Option, + pub engine: ClickhouseEngine, pub version: Option, pub source_primitive: PrimitiveSignature, pub metadata: Option, @@ -335,21 +335,16 @@ impl Table { use sha2::{Digest, Sha256}; // Combine engine hash and database into a single hash - let engine_hash: Option = - self.engine.as_ref().map(|e| e.non_alterable_params_hash()); + let engine_hash = self.engine.non_alterable_params_hash(); - // If we have neither engine hash nor database, return None - if engine_hash.is_none() && self.database.is_none() { - return None; - } + // If we have no database, return None (engine always exists now) + self.database.as_ref()?; // Create a combined hash that includes both engine params and database let mut hasher = Sha256::new(); - // Include engine params hash if it exists - if let Some(ref hash) = engine_hash { - hasher.update(hash.as_str().as_bytes()); - } + // Include engine params hash + hasher.update(engine_hash.as_bytes()); // Include database field if let Some(ref db) = self.database { @@ -378,11 +373,7 @@ impl Table { .map(|c| format!("{}: {}", c.name, c.data_type)) .collect::>() .join(", "); - let engine_str = self - .engine - .as_ref() - .map(|e| format!(" - engine: {}", Into::::into(e.clone()))) - .unwrap_or_default(); + let engine_str = format!(" - engine: {}", Into::::into(self.engine.clone())); format!( "Table: {} Version {:?} - {} - {}{}", self.name, self.version, columns_str, self.order_by, engine_str @@ -417,9 +408,8 @@ impl Table { // but the implicit order_by from primary keys can be the same // ONLY for engines that support ORDER BY (MergeTree family and S3) // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree || (target.order_by.is_empty() - && target.engine.as_ref().is_none_or(|e| e.supports_order_by()) + && target.engine.supports_order_by() && matches!( &self.order_by, OrderBy::Fields(v) if v.iter().map(String::as_str).collect::>() == target.primary_key_columns() @@ -460,14 +450,11 @@ impl Table { sample_by_expression: self.sample_by.clone(), version: self.version.as_ref().map(|v| v.to_string()), source_primitive: MessageField::some(self.source_primitive.to_proto()), - deduplicate: self - .engine - .as_ref() - .is_some_and(|e| matches!(e, ClickhouseEngine::ReplacingMergeTree { .. })), - engine: MessageField::from_option(self.engine.as_ref().map(|engine| StringValue { - value: engine.clone().to_proto_string(), + deduplicate: matches!(self.engine, ClickhouseEngine::ReplacingMergeTree { .. }), + engine: MessageField::some(StringValue { + value: self.engine.clone().to_proto_string(), special_fields: Default::default(), - })), + }), order_by2: MessageField::some(proto_order_by2), // Store the hash for change detection, including database field engine_params_hash: self @@ -514,7 +501,8 @@ impl Table { ver: None, is_deleted: None, }) - }); + }) + .unwrap_or(ClickhouseEngine::MergeTree); // Engine settings are now handled via table_settings field @@ -1636,7 +1624,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Users".to_string(), diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 622b20de46..3ccdf3e597 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1760,7 +1760,8 @@ impl InfrastructureMap { let mut table_additions = 0; // Use normalized tables for comparison, but original tables for changes - for normalized_table in normalized_self.values() { + // Iterate over key-value pairs to preserve the HashMap key for lookups + for (key, normalized_table) in normalized_self.iter() { // self_tables can be from remote where the keys are IDs with another database prefix // but they are then the default database, // the `database` field is None and we build the ID ourselves @@ -1768,13 +1769,14 @@ impl InfrastructureMap { normalized_target.get(&normalized_table.id(default_database)) { if !tables_equal_ignore_metadata(normalized_table, normalized_target) { - // Get original tables for use in changes + // Get original tables for use in changes using the HashMap key + // not the computed ID, since remote keys may differ from computed IDs let table = self_tables - .get(&normalized_table.id(default_database)) - .unwrap(); + .get(key) + .expect("normalized_self and self_tables should have same keys"); let target_table = target_tables .get(&normalized_target.id(default_database)) - .unwrap(); + .expect("normalized_target exists, so target_table should too"); // Respect lifecycle: ExternallyManaged tables are never modified if target_table.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { @@ -1842,9 +1844,8 @@ impl InfrastructureMap { // but the implicit order_by from primary keys can be the same // ONLY for engines that support ORDER BY (MergeTree family and S3) // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree && !(target_table.order_by.is_empty() - && target_table.engine.as_ref().is_none_or(|e| e.supports_order_by()) + && target_table.engine.supports_order_by() && matches!( &table.order_by, OrderBy::Fields(v) @@ -1925,10 +1926,10 @@ impl InfrastructureMap { } } } else { - // Get original table for removal + // Get original table for removal using the HashMap key let table = self_tables - .get(&normalized_table.id(default_database)) - .unwrap(); + .get(key) + .expect("normalized_self and self_tables should have same keys"); // Respect lifecycle: DeletionProtected and ExternallyManaged tables are never removed match (table.life_cycle, respect_life_cycle) { (LifeCycle::FullyManaged, _) | (_, false) => { @@ -2144,9 +2145,8 @@ impl InfrastructureMap { // but the implicit order_by from primary keys can be the same // ONLY for engines that support ORDER BY (MergeTree family and S3) // Buffer, S3Queue, and Distributed don't support ORDER BY - // When engine is None, ClickHouse defaults to MergeTree && !(target_table.order_by.is_empty() - && target_table.engine.as_ref().is_none_or(|e| e.supports_order_by()) + && target_table.engine.supports_order_by() && matches!( &table.order_by, crate::framework::core::infrastructure::table::OrderBy::Fields(v) @@ -2243,15 +2243,14 @@ impl InfrastructureMap { for table in self.tables.values_mut() { let mut should_recalc_hash = false; - if let Some(engine) = &mut table.engine { - match engine { - ClickhouseEngine::S3Queue { - aws_access_key_id, - aws_secret_access_key, - .. - } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) + match &mut table.engine { + ClickhouseEngine::S3Queue { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + // Resolve environment variable markers for AWS credentials + let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) .map_err(|e| { format!( "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", @@ -2259,7 +2258,7 @@ impl InfrastructureMap { ) })?; - let resolved_secret_key = + let resolved_secret_key = resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { format!( "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", @@ -2267,56 +2266,54 @@ impl InfrastructureMap { ) })?; - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; - should_recalc_hash = true; + *aws_access_key_id = resolved_access_key; + *aws_secret_access_key = resolved_secret_key; + should_recalc_hash = true; - log::debug!( - "Resolved S3Queue credentials for table '{}' at runtime", - table.name - ); - } - ClickhouseEngine::S3 { - aws_access_key_id, - aws_secret_access_key, - .. - } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) - .map_err(|e| { + log::debug!( + "Resolved S3Queue credentials for table '{}' at runtime", + table.name + ); + } + ClickhouseEngine::S3 { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + // Resolve environment variable markers for AWS credentials + let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) + .map_err(|e| { + format!( + "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", + table.name, e + ) + })?; + + let resolved_secret_key = + resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", + "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", table.name, e ) })?; - let resolved_secret_key = - resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", - table.name, e - ) - })?; - - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; - should_recalc_hash = true; + *aws_access_key_id = resolved_access_key; + *aws_secret_access_key = resolved_secret_key; + should_recalc_hash = true; - log::debug!( - "Resolved S3 credentials for table '{}' at runtime", - table.name - ); - } - _ => { - // No credentials to resolve for other engine types - } + log::debug!( + "Resolved S3 credentials for table '{}' at runtime", + table.name + ); + } + _ => { + // No credentials to resolve for other engine types } } // Recalculate engine_params_hash after resolving credentials if should_recalc_hash { - table.engine_params_hash = - table.engine.as_ref().map(|e| e.non_alterable_params_hash()); + table.engine_params_hash = Some(table.engine.non_alterable_params_hash()); log::debug!( "Recalculated engine_params_hash for table '{}' after credential resolution", table.name @@ -2565,6 +2562,57 @@ impl InfrastructureMap { self.tables.values().find(|table| table.name == name) } + /// Normalizes the infrastructure map for backward compatibility + /// + /// This applies the same normalization logic as partial_infrastructure_map.rs + /// to ensure consistent comparison between old and new infrastructure maps. + /// + /// Specifically: + /// - Falls back to primary key columns for order_by when it's empty (for MergeTree tables) + /// - Ensures arrays are always required=true (ClickHouse doesn't support Nullable(Array)) + /// + /// This is needed because older CLI versions didn't persist order_by when it was + /// derived from primary key columns. + pub fn normalize(mut self) -> Self { + use crate::framework::core::infrastructure::table::{ColumnType, OrderBy}; + + self.tables = self + .tables + .into_iter() + .map(|(id, mut table)| { + // Fall back to primary key columns if order_by is empty for MergeTree engines + // This ensures backward compatibility when order_by isn't explicitly set + // We only do this for MergeTree family to avoid breaking S3 tables + if table.order_by.is_empty() && table.engine.is_merge_tree_family() { + let primary_key_columns: Vec = table + .columns + .iter() + .filter_map(|c| { + if c.primary_key { + Some(c.name.clone()) + } else { + None + } + }) + .collect(); + table.order_by = OrderBy::Fields(primary_key_columns); + } + + // Normalize columns: ClickHouse doesn't support Nullable(Array(...)) + // Arrays must always be NOT NULL (required=true) + for col in &mut table.columns { + if matches!(col.data_type, ColumnType::Array { .. }) { + col.required = true; + } + } + + (id, table) + }) + .collect(); + + self + } + /// Adds a topic to the infrastructure map /// /// # Arguments @@ -2967,12 +3015,13 @@ mod tests { }; use crate::framework::versions::Version; use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; #[test] fn test_compute_table_diff() { let before = Table { name: "test_table".to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![ Column { name: "id".to_string(), @@ -3028,7 +3077,7 @@ mod tests { let after = Table { name: "test_table".to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![ Column { name: "id".to_string(), @@ -3238,7 +3287,7 @@ mod diff_tests { pub fn create_test_table(name: &str, version: &str) -> Table { Table { name: name.to_string(), - engine: None, + engine: ClickhouseEngine::MergeTree, columns: vec![], order_by: OrderBy::Fields(vec![]), partition_by: None, @@ -3537,11 +3586,11 @@ mod diff_tests { let mut before = create_test_table("test", "1.0"); let mut after = create_test_table("test", "1.0"); - before.engine = Some(ClickhouseEngine::MergeTree); - after.engine = Some(ClickhouseEngine::ReplacingMergeTree { + before.engine = ClickhouseEngine::MergeTree; + after.engine = ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }); + }; // Set database field for both tables before.database = Some(DEFAULT_DATABASE_NAME.to_string()); @@ -3566,10 +3615,10 @@ mod diff_tests { after: a, .. }) => { - assert_eq!(b.engine.as_ref(), Some(&ClickhouseEngine::MergeTree)); + assert!(matches!(&b.engine, ClickhouseEngine::MergeTree)); assert!(matches!( - a.engine.as_ref(), - Some(ClickhouseEngine::ReplacingMergeTree { .. }) + &a.engine, + ClickhouseEngine::ReplacingMergeTree { .. } )); } _ => panic!("Expected Updated change with engine modification"), diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index 0359633d6b..69519b2a2c 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -51,7 +51,7 @@ use super::{ olap_process::OlapProcess, orchestration_worker::OrchestrationWorker, sql_resource::SqlResource, - table::{Column, Metadata, Table, TableIndex}, + table::{Column, ColumnType, Metadata, Table, TableIndex}, topic::{KafkaSchema, Topic, DEFAULT_MAX_MESSAGE_BYTES}, topic_sync_process::{TopicToTableSyncProcess, TopicToTopicSyncProcess}, view::View, @@ -617,7 +617,7 @@ impl PartialInfrastructureMap { .map(|v_str| Version::from_string(v_str.clone())); let engine = self.parse_engine(partial_table)?; - let engine_params_hash = engine.as_ref().map(|e| e.non_alterable_params_hash()); + let engine_params_hash = Some(engine.non_alterable_params_hash()); // S3Queue settings should come directly from table_settings in the user code let mut table_settings = partial_table.table_settings.clone().unwrap_or_default(); @@ -625,11 +625,7 @@ impl PartialInfrastructureMap { // Apply ClickHouse default settings for MergeTree family engines // This ensures our internal representation matches what ClickHouse actually has // and prevents unnecessary diffs - // Note: When engine is None, ClickHouse defaults to MergeTree, so we apply defaults in that case too - let should_apply_mergetree_defaults = match &engine { - None => true, // No engine specified defaults to MergeTree - Some(eng) => eng.is_merge_tree_family(), - }; + let should_apply_mergetree_defaults = engine.is_merge_tree_family(); if should_apply_mergetree_defaults { // Apply MergeTree defaults if not explicitly set by user @@ -656,25 +652,55 @@ impl PartialInfrastructureMap { } // Buffer, S3Queue, Distributed, and other non-MergeTree engines don't support PRIMARY KEY - // When engine is None, ClickHouse defaults to MergeTree which does support it - let supports_primary_key = engine.as_ref().is_none_or(|e| e.supports_order_by()); - // Clear primary_key flag from columns if engine doesn't support it - let columns = if supports_primary_key { - partial_table.columns.clone() - } else { - partial_table - .columns - .iter() - .map(|col| Column { - primary_key: false, - ..col.clone() - }) - .collect() - }; + let supports_primary_key = engine.supports_order_by(); + + // Normalize columns: + // 1. Clear primary_key flag if engine doesn't support it + // 2. Force arrays to be required=true (ClickHouse doesn't support nullable arrays) + let columns: Vec = partial_table + .columns + .iter() + .map(|col| { + let mut normalized_col = col.clone(); + + // Clear primary_key if engine doesn't support it + if !supports_primary_key { + normalized_col.primary_key = false; + } + + // ClickHouse doesn't support Nullable(Array(...)) + // Arrays must always be NOT NULL (required=true) + if matches!(col.data_type, ColumnType::Array { .. }) { + normalized_col.required = true; + } + + normalized_col + }) + .collect(); // Extract table-level TTL from partial table let table_ttl_setting = partial_table.ttl.clone(); + // Fall back to primary key columns if order_by is empty for MergeTree engines + // This ensures backward compatibility when order_by isn't explicitly set + // We only do this for MergeTree family to avoid breaking S3 tables + let order_by = if partial_table.order_by.is_empty() && engine.is_merge_tree_family() + { + let primary_key_columns: Vec = columns + .iter() + .filter_map(|c| { + if c.primary_key { + Some(c.name.clone()) + } else { + None + } + }) + .collect(); + OrderBy::Fields(primary_key_columns) + } else { + partial_table.order_by.clone() + }; + let table = Table { name: version .as_ref() @@ -682,7 +708,7 @@ impl PartialInfrastructureMap { format!("{}_{}", partial_table.name, version.as_suffix()) }), columns, - order_by: partial_table.order_by.clone(), + order_by, partition_by: partial_table.partition_by.clone(), sample_by: partial_table.sample_by.clone(), engine, @@ -718,81 +744,81 @@ impl PartialInfrastructureMap { fn parse_engine( &self, partial_table: &PartialTable, - ) -> Result, DmV2LoadingError> { + ) -> Result { match &partial_table.engine_config { - Some(EngineConfig::MergeTree {}) => Ok(Some(ClickhouseEngine::MergeTree)), + Some(EngineConfig::MergeTree {}) => Ok(ClickhouseEngine::MergeTree), Some(EngineConfig::ReplacingMergeTree { ver, is_deleted }) => { - Ok(Some(ClickhouseEngine::ReplacingMergeTree { + Ok(ClickhouseEngine::ReplacingMergeTree { ver: ver.clone(), is_deleted: is_deleted.clone(), - })) + }) } Some(EngineConfig::AggregatingMergeTree {}) => { - Ok(Some(ClickhouseEngine::AggregatingMergeTree)) + Ok(ClickhouseEngine::AggregatingMergeTree) } Some(EngineConfig::SummingMergeTree { columns }) => { - Ok(Some(ClickhouseEngine::SummingMergeTree { + Ok(ClickhouseEngine::SummingMergeTree { columns: columns.clone(), - })) + }) } Some(EngineConfig::ReplicatedMergeTree { keeper_path, replica_name, - }) => Ok(Some(ClickhouseEngine::ReplicatedMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), - })), + }), Some(EngineConfig::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted, - }) => Ok(Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), ver: ver.clone(), is_deleted: is_deleted.clone(), - })), + }), Some(EngineConfig::ReplicatedAggregatingMergeTree { keeper_path, replica_name, - }) => Ok(Some(ClickhouseEngine::ReplicatedAggregatingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), - })), + }), Some(EngineConfig::ReplicatedSummingMergeTree { keeper_path, replica_name, columns, - }) => Ok(Some(ClickhouseEngine::ReplicatedSummingMergeTree { + }) => Ok(ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path: keeper_path.clone(), replica_name: replica_name.clone(), columns: columns.clone(), - })), + }), Some(EngineConfig::S3Queue(config)) => { // Keep environment variable markers as-is - credentials will be resolved at runtime // S3Queue settings are handled in table_settings, not in the engine - Ok(Some(ClickhouseEngine::S3Queue { + Ok(ClickhouseEngine::S3Queue { s3_path: config.s3_path.clone(), format: config.format.clone(), compression: config.compression.clone(), headers: config.headers.clone(), aws_access_key_id: config.aws_access_key_id.clone(), aws_secret_access_key: config.aws_secret_access_key.clone(), - })) + }) } Some(EngineConfig::S3(config)) => { // Keep environment variable markers as-is - credentials will be resolved at runtime - Ok(Some(ClickhouseEngine::S3 { + Ok(ClickhouseEngine::S3 { path: config.path.clone(), format: config.format.clone(), aws_access_key_id: config.aws_access_key_id.clone(), @@ -800,10 +826,10 @@ impl PartialInfrastructureMap { compression: config.compression.clone(), partition_strategy: config.partition_strategy.clone(), partition_columns_in_data_file: config.partition_columns_in_data_file.clone(), - })) + }) } - Some(EngineConfig::Buffer(config)) => Ok(Some(ClickhouseEngine::Buffer { + Some(EngineConfig::Buffer(config)) => Ok(ClickhouseEngine::Buffer { target_database: config.target_database.clone(), target_table: config.target_table.clone(), num_layers: config.num_layers, @@ -816,17 +842,17 @@ impl PartialInfrastructureMap { flush_time: config.flush_time, flush_rows: config.flush_rows, flush_bytes: config.flush_bytes, - })), + }), - Some(EngineConfig::Distributed(config)) => Ok(Some(ClickhouseEngine::Distributed { + Some(EngineConfig::Distributed(config)) => Ok(ClickhouseEngine::Distributed { cluster: config.cluster.clone(), target_database: config.target_database.clone(), target_table: config.target_table.clone(), sharding_key: config.sharding_key.clone(), policy_name: config.policy_name.clone(), - })), + }), - None => Ok(None), + None => Ok(ClickhouseEngine::MergeTree), } } diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 76b8b3e7c8..573941f10b 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -409,6 +409,7 @@ mod tests { use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::versions::Version; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; use crate::infrastructure::olap::clickhouse::TableWithUnsupportedType; use crate::infrastructure::olap::OlapChangesError; use crate::infrastructure::olap::OlapOperations; @@ -449,7 +450,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 7863ec27d6..99f9dda6a5 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -92,7 +92,10 @@ mod tests { use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::core::plan::InfraPlan; use crate::framework::versions::Version; - use crate::infrastructure::olap::clickhouse::config::{ClickHouseConfig, ClusterConfig}; + use crate::infrastructure::olap::clickhouse::{ + config::{ClickHouseConfig, ClusterConfig}, + queries::ClickhouseEngine, + }; use crate::project::{Project, ProjectFeatures}; use std::collections::HashMap; use std::path::PathBuf; @@ -150,7 +153,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::default(), version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: name.to_string(), @@ -307,7 +310,7 @@ mod tests { fn create_table_with_engine( name: &str, cluster_name: Option, - engine: Option, + engine: crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine, ) -> Table { Table { name: name.to_string(), @@ -344,10 +347,8 @@ mod tests { #[test] fn test_non_replicated_engine_without_cluster_succeeds() { - use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; - let project = create_test_project(None); - let table = create_table_with_engine("test_table", None, Some(ClickhouseEngine::MergeTree)); + let table = create_table_with_engine("test_table", None, ClickhouseEngine::MergeTree); let plan = create_test_plan(vec![table]); let result = validate(&project, &plan); diff --git a/apps/framework-cli/src/framework/data_model/model.rs b/apps/framework-cli/src/framework/data_model/model.rs index c4b2ac1435..6591277682 100644 --- a/apps/framework-cli/src/framework/data_model/model.rs +++ b/apps/framework-cli/src/framework/data_model/model.rs @@ -36,14 +36,14 @@ impl DataModel { OrderBy::Fields(self.primary_key_columns()) }; - let engine = - self.config - .storage - .deduplicate - .then_some(ClickhouseEngine::ReplacingMergeTree { - ver: None, - is_deleted: None, - }); + let engine = if self.config.storage.deduplicate { + ClickhouseEngine::ReplacingMergeTree { + ver: None, + is_deleted: None, + } + } else { + ClickhouseEngine::MergeTree + }; // Create the table first, then compute the combined hash that includes database let mut table = Table { diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index 4cdc3f653f..7448fe1536 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -744,209 +744,207 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri if let Some(ttl_expr) = &table.table_ttl_setting { writeln!(output, " ttl={:?},", ttl_expr).unwrap(); } - if let Some(engine) = &table.engine { - match engine { - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { - s3_path, - format, - compression, - headers, - aws_access_key_id, - aws_secret_access_key, - } => { - // Generate S3Queue configuration object - writeln!(output, " engine=S3QueueEngine(").unwrap(); - writeln!(output, " s3_path={:?},", s3_path).unwrap(); - writeln!(output, " format={:?},", format).unwrap(); - if let Some(compression) = compression { - writeln!(output, " compression={:?},", compression).unwrap(); - } - if let Some(key_id) = aws_access_key_id { - writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); - } - if let Some(headers) = headers { - write!(output, " headers={{").unwrap(); - for (i, (key, value)) in headers.iter().enumerate() { - if i > 0 { write!(output, ",").unwrap(); } - write!(output, " {:?}: {:?}", key, value).unwrap(); - } - writeln!(output, " }},").unwrap(); - } - writeln!(output, " ),").unwrap(); + match &table.engine { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { + s3_path, + format, + compression, + headers, + aws_access_key_id, + aws_secret_access_key, + } => { + // Generate S3Queue configuration object + writeln!(output, " engine=S3QueueEngine(").unwrap(); + writeln!(output, " s3_path={:?},", s3_path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(compression) = compression { + writeln!(output, " compression={:?},", compression).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { - writeln!(output, " engine=MergeTreeEngine(),").unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { - // Emit ReplacingMergeTreeEngine with parameters if present - write!(output, " engine=ReplacingMergeTreeEngine(").unwrap(); - if let Some(ver_col) = ver { - write!(output, "ver=\"{}\"", ver_col).unwrap(); - if is_deleted.is_some() { - write!(output, ", ").unwrap(); - } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(headers) = headers { + write!(output, " headers={{").unwrap(); + for (i, (key, value)) in headers.iter().enumerate() { + if i > 0 { write!(output, ",").unwrap(); } + write!(output, " {:?}: {:?}", key, value).unwrap(); } - if let Some(is_deleted_col) = is_deleted { - write!(output, "is_deleted=\"{}\"", is_deleted_col).unwrap(); + writeln!(output, " }},").unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { + writeln!(output, " engine=MergeTreeEngine(),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { + // Emit ReplacingMergeTreeEngine with parameters if present + write!(output, " engine=ReplacingMergeTreeEngine(").unwrap(); + if let Some(ver_col) = ver { + write!(output, "ver=\"{}\"", ver_col).unwrap(); + if is_deleted.is_some() { + write!(output, ", ").unwrap(); } - writeln!(output, "),").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { - writeln!(output, " engine=AggregatingMergeTreeEngine(),").unwrap(); + if let Some(is_deleted_col) = is_deleted { + write!(output, "is_deleted=\"{}\"", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { - write!(output, " engine=SummingMergeTreeEngine(").unwrap(); - if let Some(cols) = columns { - if !cols.is_empty() { - write!(output, "columns={:?}", cols).unwrap(); - } + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { + writeln!(output, " engine=AggregatingMergeTreeEngine(),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { + write!(output, " engine=SummingMergeTreeEngine(").unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + write!(output, "columns={:?}", cols).unwrap(); } - writeln!(output, "),").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { - keeper_path, - replica_name, - } => { - write!(output, " engine=ReplicatedMergeTreeEngine(").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); - } - writeln!(output, "),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { + keeper_path, + replica_name, + } => { + write!(output, " engine=ReplicatedMergeTreeEngine(").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { - keeper_path, - replica_name, - ver, - is_deleted, - } => { - write!(output, " engine=ReplicatedReplacingMergeTreeEngine(").unwrap(); - let mut params = vec![]; - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); - } - if let Some(v) = ver { - params.push(format!("ver={:?}", v)); - } - if let Some(d) = is_deleted { - params.push(format!("is_deleted={:?}", d)); - } - write!(output, "{}", params.join(", ")).unwrap(); - writeln!(output, "),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { + keeper_path, + replica_name, + ver, + is_deleted, + } => { + write!(output, " engine=ReplicatedReplacingMergeTreeEngine(").unwrap(); + let mut params = vec![]; + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { - keeper_path, - replica_name, - } => { - write!(output, " engine=ReplicatedAggregatingMergeTreeEngine(").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); - } - writeln!(output, "),").unwrap(); + if let Some(v) = ver { + params.push(format!("ver={:?}", v)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { - keeper_path, - replica_name, - columns, - } => { - write!(output, " engine=ReplicatedSummingMergeTreeEngine(").unwrap(); - let mut params = vec![]; - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); - } - if let Some(cols) = columns { - if !cols.is_empty() { - params.push(format!("columns={:?}", cols)); - } - } - write!(output, "{}", params.join(", ")).unwrap(); - writeln!(output, "),").unwrap(); + if let Some(d) = is_deleted { + params.push(format!("is_deleted={:?}", d)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { - path, - format, - aws_access_key_id, - aws_secret_access_key, - compression, - partition_strategy, - partition_columns_in_data_file, - } => { - writeln!(output, " engine=S3Engine(").unwrap(); - writeln!(output, " path={:?},", path).unwrap(); - writeln!(output, " format={:?},", format).unwrap(); - if let Some(key_id) = aws_access_key_id { - writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); - } - if let Some(comp) = compression { - writeln!(output, " compression={:?},", comp).unwrap(); - } - if let Some(ps) = partition_strategy { - writeln!(output, " partition_strategy={:?},", ps).unwrap(); - } - if let Some(pc) = partition_columns_in_data_file { - writeln!(output, " partition_columns_in_data_file={:?},", pc).unwrap(); - } - writeln!(output, " ),").unwrap(); + write!(output, "{}", params.join(", ")).unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { + keeper_path, + replica_name, + } => { + write!(output, " engine=ReplicatedAggregatingMergeTreeEngine(").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + write!(output, "keeper_path={:?}, replica_name={:?}", path, name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => { - writeln!(output, " engine=BufferEngine(").unwrap(); - writeln!(output, " target_database={:?},", target_database).unwrap(); - writeln!(output, " target_table={:?},", target_table).unwrap(); - writeln!(output, " num_layers={},", num_layers).unwrap(); - writeln!(output, " min_time={},", min_time).unwrap(); - writeln!(output, " max_time={},", max_time).unwrap(); - writeln!(output, " min_rows={},", min_rows).unwrap(); - writeln!(output, " max_rows={},", max_rows).unwrap(); - writeln!(output, " min_bytes={},", min_bytes).unwrap(); - writeln!(output, " max_bytes={},", max_bytes).unwrap(); - if let Some(ft) = flush_time { - writeln!(output, " flush_time={},", ft).unwrap(); - } - if let Some(fr) = flush_rows { - writeln!(output, " flush_rows={},", fr).unwrap(); - } - if let Some(fb) = flush_bytes { - writeln!(output, " flush_bytes={},", fb).unwrap(); - } - writeln!(output, " ),").unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { + keeper_path, + replica_name, + columns, + } => { + write!(output, " engine=ReplicatedSummingMergeTreeEngine(").unwrap(); + let mut params = vec![]; + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + params.push(format!("keeper_path={:?}, replica_name={:?}", path, name)); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { - cluster, - target_database, - target_table, - sharding_key, - policy_name, - } => { - writeln!(output, " engine=DistributedEngine(").unwrap(); - writeln!(output, " cluster={:?},", cluster).unwrap(); - writeln!(output, " target_database={:?},", target_database).unwrap(); - writeln!(output, " target_table={:?},", target_table).unwrap(); - if let Some(key) = sharding_key { - writeln!(output, " sharding_key={:?},", key).unwrap(); - } - if let Some(policy) = policy_name { - writeln!(output, " policy_name={:?},", policy).unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + params.push(format!("columns={:?}", cols)); } - writeln!(output, " ),").unwrap(); } + write!(output, "{}", params.join(", ")).unwrap(); + writeln!(output, "),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + partition_strategy, + partition_columns_in_data_file, + } => { + writeln!(output, " engine=S3Engine(").unwrap(); + writeln!(output, " path={:?},", path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression={:?},", comp).unwrap(); + } + if let Some(ps) = partition_strategy { + writeln!(output, " partition_strategy={:?},", ps).unwrap(); + } + if let Some(pc) = partition_columns_in_data_file { + writeln!(output, " partition_columns_in_data_file={:?},", pc).unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + } => { + writeln!(output, " engine=BufferEngine(").unwrap(); + writeln!(output, " target_database={:?},", target_database).unwrap(); + writeln!(output, " target_table={:?},", target_table).unwrap(); + writeln!(output, " num_layers={},", num_layers).unwrap(); + writeln!(output, " min_time={},", min_time).unwrap(); + writeln!(output, " max_time={},", max_time).unwrap(); + writeln!(output, " min_rows={},", min_rows).unwrap(); + writeln!(output, " max_rows={},", max_rows).unwrap(); + writeln!(output, " min_bytes={},", min_bytes).unwrap(); + writeln!(output, " max_bytes={},", max_bytes).unwrap(); + if let Some(ft) = flush_time { + writeln!(output, " flush_time={},", ft).unwrap(); + } + if let Some(fr) = flush_rows { + writeln!(output, " flush_rows={},", fr).unwrap(); + } + if let Some(fb) = flush_bytes { + writeln!(output, " flush_bytes={},", fb).unwrap(); + } + writeln!(output, " ),").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + writeln!(output, " engine=DistributedEngine(").unwrap(); + writeln!(output, " cluster={:?},", cluster).unwrap(); + writeln!(output, " target_database={:?},", target_database).unwrap(); + writeln!(output, " target_table={:?},", target_table).unwrap(); + if let Some(key) = sharding_key { + writeln!(output, " sharding_key={:?},", key).unwrap(); + } + if let Some(policy) = policy_name { + writeln!(output, " policy_name={:?},", policy).unwrap(); + } + writeln!(output, " ),").unwrap(); } } if let Some(version) = &table.version { @@ -1052,7 +1050,7 @@ mod tests { order_by: OrderBy::Fields(vec!["primary_key".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Foo".to_string(), @@ -1146,7 +1144,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "NestedArray".to_string(), @@ -1265,7 +1263,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "User".to_string(), @@ -1333,14 +1331,14 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: Some("gzip".to_string()), headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1389,10 +1387,10 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: None, is_deleted: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1468,10 +1466,10 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1543,7 +1541,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Location".to_string(), @@ -1626,7 +1624,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1671,7 +1669,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "IndexPy".to_string(), @@ -1757,7 +1755,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "JsonTest".to_string(), @@ -1810,7 +1808,7 @@ user_table = OlapTable[User]("User", OlapConfig( order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "ExternalData".to_string(), diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index 3a89d3645f..ac3edb8baa 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -653,184 +653,182 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> if let Some(database) = &table.database { writeln!(output, " database: {:?},", database).unwrap(); } - if let Some(engine) = &table.engine { - match engine { - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { - s3_path, - format, - compression, - headers, - aws_access_key_id, - aws_secret_access_key, - } => { - // For S3Queue, properties are at the same level as orderByFields - writeln!(output, " engine: ClickHouseEngines.S3Queue,").unwrap(); - writeln!(output, " s3Path: {:?},", s3_path).unwrap(); - writeln!(output, " format: {:?},", format).unwrap(); - if let Some(compression) = compression { - writeln!(output, " compression: {:?},", compression).unwrap(); - } - if let Some(key_id) = aws_access_key_id { - writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); - } - if let Some(headers) = headers { - write!(output, " headers: {{").unwrap(); - for (i, (key, value)) in headers.iter().enumerate() { - if i > 0 { write!(output, ",").unwrap(); } - write!(output, " {:?}: {:?}", key, value).unwrap(); - } - writeln!(output, " }},").unwrap(); - } + match &table.engine { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3Queue { + s3_path, + format, + compression, + headers, + aws_access_key_id, + aws_secret_access_key, + } => { + // For S3Queue, properties are at the same level as orderByFields + writeln!(output, " engine: ClickHouseEngines.S3Queue,").unwrap(); + writeln!(output, " s3Path: {:?},", s3_path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(compression) = compression { + writeln!(output, " compression: {:?},", compression).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { - writeln!(output, " engine: ClickHouseEngines.MergeTree,").unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { - // Emit ReplacingMergeTree engine configuration - writeln!(output, " engine: ClickHouseEngines.ReplacingMergeTree,").unwrap(); - if let Some(ver_col) = ver { - writeln!(output, " ver: \"{}\",", ver_col).unwrap(); - } - if let Some(is_deleted_col) = is_deleted { - writeln!(output, " isDeleted: \"{}\",", is_deleted_col).unwrap(); + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(headers) = headers { + write!(output, " headers: {{").unwrap(); + for (i, (key, value)) in headers.iter().enumerate() { + if i > 0 { write!(output, ",").unwrap(); } + write!(output, " {:?}: {:?}", key, value).unwrap(); } + writeln!(output, " }},").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { - writeln!(output, " engine: ClickHouseEngines.AggregatingMergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::MergeTree => { + writeln!(output, " engine: ClickHouseEngines.MergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplacingMergeTree { ver, is_deleted } => { + // Emit ReplacingMergeTree engine configuration + writeln!(output, " engine: ClickHouseEngines.ReplacingMergeTree,").unwrap(); + if let Some(ver_col) = ver { + writeln!(output, " ver: \"{}\",", ver_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { - writeln!(output, " engine: ClickHouseEngines.SummingMergeTree,").unwrap(); - if let Some(cols) = columns { - if !cols.is_empty() { - let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); - writeln!(output, " columns: [{}],", col_list).unwrap(); - } - } + if let Some(is_deleted_col) = is_deleted { + writeln!(output, " isDeleted: \"{}\",", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::AggregatingMergeTree => { + writeln!(output, " engine: ClickHouseEngines.AggregatingMergeTree,").unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::SummingMergeTree { columns } => { + writeln!(output, " engine: ClickHouseEngines.SummingMergeTree,").unwrap(); + if let Some(cols) = columns { + if !cols.is_empty() { + let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); + writeln!(output, " columns: [{}],", col_list).unwrap(); } } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedReplacingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } - if let Some(ver_col) = ver { - writeln!(output, " ver: {:?},", ver_col).unwrap(); - } - if let Some(is_deleted_col) = is_deleted { - writeln!(output, " isDeleted: {:?},", is_deleted_col).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedMergeTree { keeper_path, replica_name } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path, replica_name } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedAggregatingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path, replica_name, ver, is_deleted } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedReplacingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path, replica_name, columns } => { - writeln!(output, " engine: ClickHouseEngines.ReplicatedSummingMergeTree,").unwrap(); - if let (Some(path), Some(name)) = (keeper_path, replica_name) { - writeln!(output, " keeperPath: {:?},", path).unwrap(); - writeln!(output, " replicaName: {:?},", name).unwrap(); - } - if let Some(cols) = columns { - if !cols.is_empty() { - let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); - writeln!(output, " columns: [{}],", col_list).unwrap(); - } - } + if let Some(ver_col) = ver { + writeln!(output, " ver: {:?},", ver_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { - path, - format, - aws_access_key_id, - aws_secret_access_key, - compression, - partition_strategy, - partition_columns_in_data_file, - } => { - writeln!(output, " engine: ClickHouseEngines.S3,").unwrap(); - writeln!(output, " path: {:?},", path).unwrap(); - writeln!(output, " format: {:?},", format).unwrap(); - if let Some(key_id) = aws_access_key_id { - writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); - } - if let Some(secret) = aws_secret_access_key { - writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); - } - if let Some(comp) = compression { - writeln!(output, " compression: {:?},", comp).unwrap(); - } - if let Some(ps) = partition_strategy { - writeln!(output, " partitionStrategy: {:?},", ps).unwrap(); - } - if let Some(pc) = partition_columns_in_data_file { - writeln!(output, " partitionColumnsInDataFile: {:?},", pc).unwrap(); - } + if let Some(is_deleted_col) = is_deleted { + writeln!(output, " isDeleted: {:?},", is_deleted_col).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => { - writeln!(output, " engine: ClickHouseEngines.Buffer,").unwrap(); - writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); - writeln!(output, " targetTable: {:?},", target_table).unwrap(); - writeln!(output, " numLayers: {},", num_layers).unwrap(); - writeln!(output, " minTime: {},", min_time).unwrap(); - writeln!(output, " maxTime: {},", max_time).unwrap(); - writeln!(output, " minRows: {},", min_rows).unwrap(); - writeln!(output, " maxRows: {},", max_rows).unwrap(); - writeln!(output, " minBytes: {},", min_bytes).unwrap(); - writeln!(output, " maxBytes: {},", max_bytes).unwrap(); - if let Some(ft) = flush_time { - writeln!(output, " flushTime: {},", ft).unwrap(); - } - if let Some(fr) = flush_rows { - writeln!(output, " flushRows: {},", fr).unwrap(); - } - if let Some(fb) = flush_bytes { - writeln!(output, " flushBytes: {},", fb).unwrap(); - } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedAggregatingMergeTree { keeper_path, replica_name } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedAggregatingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { - cluster, - target_database, - target_table, - sharding_key, - policy_name, - } => { - writeln!(output, " engine: ClickHouseEngines.Distributed,").unwrap(); - writeln!(output, " cluster: {:?},", cluster).unwrap(); - writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); - writeln!(output, " targetTable: {:?},", target_table).unwrap(); - if let Some(key) = sharding_key { - writeln!(output, " shardingKey: {:?},", key).unwrap(); - } - if let Some(policy) = policy_name { - writeln!(output, " policyName: {:?},", policy).unwrap(); + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::ReplicatedSummingMergeTree { keeper_path, replica_name, columns } => { + writeln!(output, " engine: ClickHouseEngines.ReplicatedSummingMergeTree,").unwrap(); + if let (Some(path), Some(name)) = (keeper_path, replica_name) { + writeln!(output, " keeperPath: {:?},", path).unwrap(); + writeln!(output, " replicaName: {:?},", name).unwrap(); + } + if let Some(cols) = columns { + if !cols.is_empty() { + let col_list = cols.iter().map(|c| format!("{:?}", c)).collect::>().join(", "); + writeln!(output, " columns: [{}],", col_list).unwrap(); } } } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::S3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + partition_strategy, + partition_columns_in_data_file, + } => { + writeln!(output, " engine: ClickHouseEngines.S3,").unwrap(); + writeln!(output, " path: {:?},", path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression: {:?},", comp).unwrap(); + } + if let Some(ps) = partition_strategy { + writeln!(output, " partitionStrategy: {:?},", ps).unwrap(); + } + if let Some(pc) = partition_columns_in_data_file { + writeln!(output, " partitionColumnsInDataFile: {:?},", pc).unwrap(); + } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + } => { + writeln!(output, " engine: ClickHouseEngines.Buffer,").unwrap(); + writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); + writeln!(output, " targetTable: {:?},", target_table).unwrap(); + writeln!(output, " numLayers: {},", num_layers).unwrap(); + writeln!(output, " minTime: {},", min_time).unwrap(); + writeln!(output, " maxTime: {},", max_time).unwrap(); + writeln!(output, " minRows: {},", min_rows).unwrap(); + writeln!(output, " maxRows: {},", max_rows).unwrap(); + writeln!(output, " minBytes: {},", min_bytes).unwrap(); + writeln!(output, " maxBytes: {},", max_bytes).unwrap(); + if let Some(ft) = flush_time { + writeln!(output, " flushTime: {},", ft).unwrap(); + } + if let Some(fr) = flush_rows { + writeln!(output, " flushRows: {},", fr).unwrap(); + } + if let Some(fb) = flush_bytes { + writeln!(output, " flushBytes: {},", fb).unwrap(); + } + } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + writeln!(output, " engine: ClickHouseEngines.Distributed,").unwrap(); + writeln!(output, " cluster: {:?},", cluster).unwrap(); + writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); + writeln!(output, " targetTable: {:?},", target_table).unwrap(); + if let Some(key) = sharding_key { + writeln!(output, " shardingKey: {:?},", key).unwrap(); + } + if let Some(policy) = policy_name { + writeln!(output, " policyName: {:?},", policy).unwrap(); + } + } } if let Some(version) = &table.version { writeln!(output, " version: {:?},", version).unwrap(); @@ -989,7 +987,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "User".to_string(), @@ -1060,14 +1058,14 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), sample_by: None, partition_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: Some("gzip".to_string()), headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1115,7 +1113,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1189,10 +1187,10 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::ReplacingMergeTree { + engine: ClickhouseEngine::ReplacingMergeTree { ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1234,10 +1232,10 @@ export const UserTable = new OlapTable("User", { sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, - engine: Some(ClickhouseEngine::ReplicatedMergeTree { + engine: ClickhouseEngine::ReplicatedMergeTree { keeper_path: Some("/clickhouse/tables/{shard}/user_data".to_string()), replica_name: Some("{replica}".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1309,12 +1307,12 @@ export const UserTable = new OlapTable("User", { sample_by: None, order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, - engine: Some(ClickhouseEngine::ReplicatedReplacingMergeTree { + engine: ClickhouseEngine::ReplicatedReplacingMergeTree { keeper_path: Some("/clickhouse/tables/{shard}/user_data".to_string()), replica_name: Some("{replica}".to_string()), ver: Some("version".to_string()), is_deleted: Some("is_deleted".to_string()), - }), + }, version: None, source_primitive: PrimitiveSignature { name: "UserData".to_string(), @@ -1362,7 +1360,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["u64".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "IndexTest".to_string(), @@ -1448,7 +1446,7 @@ export const UserTable = new OlapTable("User", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Task".to_string(), @@ -1526,7 +1524,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string(), "timestamp".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Events".to_string(), @@ -1594,7 +1592,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "JsonTest".to_string(), @@ -1641,7 +1639,7 @@ export const TaskTable = new OlapTable("Task", { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "ExternalData".to_string(), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs index 87252bb29b..66ea7561a6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs @@ -167,7 +167,7 @@ impl Default for DiagnosticOptions { #[derive(Debug, Clone)] pub struct DiagnosticRequest { /// Components to diagnose (tables, views, etc.) - pub components: Vec<(Component, Option)>, + pub components: Vec<(Component, ClickhouseEngine)>, /// Diagnostic options for filtering and configuration pub options: DiagnosticOptions, } @@ -374,7 +374,7 @@ pub async fn run_diagnostics( for (component, engine) in request.components { for provider in &component_specific { // Check if provider is applicable to this component - if !provider.applicable_to(&component, engine.as_ref()) { + if !provider.applicable_to(&component, Some(&engine)) { continue; } @@ -397,7 +397,7 @@ pub async fn run_diagnostics( join_set.spawn(async move { let result = if let Some(provider) = provider { provider - .diagnose(&component, engine.as_ref(), &config, since.as_deref()) + .diagnose(&component, Some(&engine), &config, since.as_deref()) .await } else { // This shouldn't happen since we just got the name from a valid provider @@ -891,7 +891,7 @@ mod tests { // Test with invalid diagnostic name let request = DiagnosticRequest { - components: vec![(component.clone(), None)], + components: vec![(component.clone(), ClickhouseEngine::default())], options: DiagnosticOptions { diagnostic_names: vec!["invalid_diagnostic".to_string()], min_severity: Severity::Info, @@ -911,7 +911,7 @@ mod tests { // Test with mix of valid and invalid names let request = DiagnosticRequest { - components: vec![(component.clone(), None)], + components: vec![(component.clone(), ClickhouseEngine::default())], options: DiagnosticOptions { diagnostic_names: vec![ "MutationDiagnostic".to_string(), // Valid name diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 4e27e2e2b3..ea133f775d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -365,7 +365,7 @@ fn is_only_required_change_for_special_column_type(before: &Column, after: &Colu impl ClickHouseTableDiffStrategy { /// Check if a table uses the S3Queue engine pub fn is_s3queue_table(table: &Table) -> bool { - matches!(&table.engine, Some(ClickhouseEngine::S3Queue { .. })) + matches!(&table.engine, ClickhouseEngine::S3Queue { .. }) } /// Check if a SQL resource is a materialized view that needs population @@ -498,7 +498,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { let after_primary_keys = after.primary_key_columns(); if before_primary_keys != after_primary_keys // S3 allows specifying PK, but that information is not in system.columns - && after.engine.as_ref().is_none_or(|e| e.is_merge_tree_family()) + && after.engine.is_merge_tree_family() { log::warn!( "ClickHouse: Primary key structure changed for table '{}', requiring drop+create", @@ -519,13 +519,10 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { before_hash != after_hash } else { // Fallback to direct engine comparison if hashes are not available - let before_engine = before.engine.as_ref(); - match after.engine.as_ref() { - // after.engine is unset -> before engine should be same as default - None => before_engine.is_some_and(|e| *e != ClickhouseEngine::MergeTree), - // force recreate only if engines are different - Some(e) => Some(e) != before_engine, - } + // Note: Tables are already normalized at this point (None -> Some(MergeTree)) + // via normalize_inframap_engines() in the remote plan flow, so we can + // safely use direct comparison + before.engine != after.engine }; // Check if engine has changed (using hash comparison when available) @@ -592,19 +589,16 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // Check if this is an S3Queue table with column changes // S3Queue only supports MODIFY/RESET SETTING, not column operations - if !column_changes.is_empty() { - if let Some(engine) = &before.engine { - if matches!(engine, ClickhouseEngine::S3Queue { .. }) { - log::warn!( - "ClickHouse: S3Queue table '{}' has column changes, requiring drop+create (S3Queue doesn't support ALTER TABLE for columns)", - before.name - ); - return vec![ - OlapChange::Table(TableChange::Removed(before.clone())), - OlapChange::Table(TableChange::Added(after.clone())), - ]; - } - } + if !column_changes.is_empty() && matches!(&before.engine, ClickhouseEngine::S3Queue { .. }) + { + log::warn!( + "ClickHouse: S3Queue table '{}' has column changes, requiring drop+create (S3Queue doesn't support ALTER TABLE for columns)", + before.name + ); + return vec![ + OlapChange::Table(TableChange::Removed(before.clone())), + OlapChange::Table(TableChange::Added(after.clone())), + ]; } // Filter out no-op changes for ClickHouse semantics: @@ -677,10 +671,14 @@ mod tests { order_by: OrderBy::Fields(order_by), partition_by: None, sample_by: None, - engine: deduplicate.then_some(ClickhouseEngine::ReplacingMergeTree { - ver: None, - is_deleted: None, - }), + engine: if deduplicate { + ClickhouseEngine::ReplacingMergeTree { + ver: None, + is_deleted: None, + } + } else { + ClickhouseEngine::MergeTree + }, version: Some(Version::from_string("1.0.0".to_string())), source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1480,14 +1478,14 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::S3Queue { + engine: ClickhouseEngine::S3Queue { s3_path: "s3://bucket/path".to_string(), format: "JSONEachRow".to_string(), compression: None, headers: None, aws_access_key_id: None, aws_secret_access_key: None, - }), + }, version: None, source_primitive: PrimitiveSignature { name: "test_s3".to_string(), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index a22012beb3..2eecff31d0 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -10,7 +10,6 @@ use crate::infrastructure::olap::clickhouse::model::{ }; use super::errors::ClickhouseError; -use super::queries::ClickhouseEngine; /// Generates a column comment, preserving any existing user comment and adding/updating metadata for enums fn generate_column_comment(column: &Column) -> Result, ClickhouseError> { @@ -328,10 +327,7 @@ pub fn std_columns_to_clickhouse_columns( pub fn std_table_to_clickhouse_table(table: &Table) -> Result { let columns = std_columns_to_clickhouse_columns(&table.columns)?; - let clickhouse_engine = match &table.engine { - Some(engine) => engine.clone(), - None => ClickhouseEngine::MergeTree, - }; + let clickhouse_engine = table.engine.clone(); Ok(ClickHouseTable { name: table.name.clone(), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 19c07ad5cb..0931fe3fdd 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -1886,11 +1886,9 @@ impl OlapOperations for ConfiguredDBClient { // Fallback to the simple engine name from system.tables debug!("Could not extract engine from CREATE TABLE query, falling back to system.tables engine column"); engine.as_str().try_into().ok() - }; - - let engine_params_hash = engine_parsed - .as_ref() - .map(|e: &ClickhouseEngine| e.non_alterable_params_hash()); + } + .unwrap_or(ClickhouseEngine::MergeTree); + let engine_params_hash = Some(engine_parsed.non_alterable_params_hash()); // Extract table settings from CREATE TABLE query let table_settings = extract_table_settings_from_create_table(&create_query); @@ -2910,7 +2908,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Test".to_string(), @@ -2976,7 +2974,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: Some("toYYYYMM(created_at)".to_string()), sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "Test".to_string(), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index ab4fceaae6..8c4fa0da06 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -137,7 +137,9 @@ SETTINGS {{settings}}{{/if}}"#; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[allow(clippy::large_enum_variant)] // S3Queue has many fields, but this is acceptable for our use case +#[derive(Default)] pub enum ClickhouseEngine { + #[default] MergeTree, ReplacingMergeTree { // Optional version column for deduplication diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index f5237e175a..d3f5644820 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1308,7 +1308,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1383,7 +1383,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1406,7 +1406,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1501,7 +1501,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1524,7 +1524,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1639,7 +1639,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1796,7 +1796,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1818,7 +1818,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1840,7 +1840,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1931,7 +1931,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1953,7 +1953,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1975,7 +1975,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -1997,7 +1997,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2019,7 +2019,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2173,7 +2173,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2196,7 +2196,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2318,7 +2318,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2341,7 +2341,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2468,7 +2468,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2490,7 +2490,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2697,7 +2697,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2807,7 +2807,7 @@ mod tests { order_by: OrderBy::Fields(vec![]), partition_by: None, sample_by: None, - engine: None, + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2927,7 +2927,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), @@ -2972,7 +2972,7 @@ mod tests { order_by: OrderBy::Fields(vec!["id".to_string()]), partition_by: None, sample_by: None, - engine: Some(ClickhouseEngine::MergeTree), + engine: ClickhouseEngine::MergeTree, version: None, source_primitive: PrimitiveSignature { name: "test".to_string(), From 7fef10074bdd1703efe7995887e8e1fa26b6b6af Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Mon, 17 Nov 2025 12:11:22 -0800 Subject: [PATCH 24/59] add moose query cli command (#2996) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adds `moose query` Showing how it mimics `moose peek`: Screenshot 2025-11-14 at 3 39 48 PM --- > [!NOTE] > Introduces `moose query` to run SQL from arg/file/stdin, stream NDJSON with row limits, plus telemetry, infra checks, tests, and docs. > > - **CLI**: > - **New command** `moose query`: execute SQL against ClickHouse from `query` arg, `-f/--file`, or stdin; streams newline-delimited JSON; `--limit` caps rows. > - Integrated into `top_command_handler` and command parsing (`Commands::Query`). > - Validates Redis/infrastructure state; requires `moose dev` running. > - **Routines**: > - Added `routines/query.rs` implementing query execution, JSON streaming, and limit enforcement. > - Exposes `row_to_json` in `clickhouse_alt_client` for reuse. > - **Telemetry**: > - Added `ActivityType::QueryCommand` and usage capture in query flow. > - **Tests (E2E)**: > - `cli-query.test.ts`: covers argument, file, stdin, multi-row, `--limit`, and error handling. > - **Docs**: > - Updated CLI references to document `moose query` usage, options, requirements, and output in `apps/framework-docs*`. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit c33bbd52d446300f10c8fbd33960659263f54817. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- apps/framework-cli-e2e/test/cli-query.test.ts | 205 ++++++++++++++++++ apps/framework-cli/src/cli.rs | 25 +++ apps/framework-cli/src/cli/commands.rs | 13 ++ apps/framework-cli/src/cli/routines/mod.rs | 1 + apps/framework-cli/src/cli/routines/query.rs | 183 ++++++++++++++++ .../olap/clickhouse_alt_client.rs | 2 +- apps/framework-cli/src/utilities/capture.rs | 2 + .../content/moosestack/moose-cli.mdx | 28 +++ .../src/pages/moose/moose-cli.mdx | 28 +++ 9 files changed, 486 insertions(+), 1 deletion(-) create mode 100644 apps/framework-cli-e2e/test/cli-query.test.ts create mode 100644 apps/framework-cli/src/cli/routines/query.rs diff --git a/apps/framework-cli-e2e/test/cli-query.test.ts b/apps/framework-cli-e2e/test/cli-query.test.ts new file mode 100644 index 0000000000..f28be5b1d7 --- /dev/null +++ b/apps/framework-cli-e2e/test/cli-query.test.ts @@ -0,0 +1,205 @@ +/// +/// +/// +/** + * E2E tests for moose query command (ENG-1226) + * + * Tests the query command functionality: + * 1. Execute SQL from command line argument + * 2. Execute SQL from file + * 3. Execute SQL from stdin + * 4. Respect limit parameter + * 5. Handle errors gracefully + */ + +import { spawn, ChildProcess } from "child_process"; +import { expect } from "chai"; +import * as fs from "fs"; +import * as path from "path"; +import { promisify } from "util"; + +import { TIMEOUTS } from "./constants"; +import { + waitForServerStart, + createTempTestDirectory, + cleanupTestSuite, + setupTypeScriptProject, +} from "./utils"; + +const execAsync = promisify(require("child_process").exec); + +const CLI_PATH = path.resolve(__dirname, "../../../target/debug/moose-cli"); +const MOOSE_TS_LIB_PATH = path.resolve( + __dirname, + "../../../packages/ts-moose-lib", +); + +describe("moose query command", () => { + let devProcess: ChildProcess; + let testProjectDir: string; + + before(async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + console.log("\n=== Starting Query Command Test ==="); + + // Create temp test directory + testProjectDir = createTempTestDirectory("query-cmd-test"); + console.log("Test project dir:", testProjectDir); + + // Setup TypeScript project + await setupTypeScriptProject( + testProjectDir, + "typescript-empty", + CLI_PATH, + MOOSE_TS_LIB_PATH, + "test-query-cmd", + "npm", + ); + + // Start moose dev + console.log("\nStarting moose dev..."); + devProcess = spawn(CLI_PATH, ["dev"], { + stdio: "pipe", + cwd: testProjectDir, + }); + + await waitForServerStart( + devProcess, + TIMEOUTS.SERVER_STARTUP_MS, + "development server started", + "http://localhost:4000", + ); + + console.log("✓ Infrastructure ready"); + }); + + after(async function () { + this.timeout(TIMEOUTS.CLEANUP_MS); + console.log("\n=== Cleaning up Query Command Test ==="); + + await cleanupTestSuite(devProcess, testProjectDir, "query-cmd-test", { + logPrefix: "Query Command Test", + }); + }); + + it("should execute simple SELECT query from argument", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from argument ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT 1 as num"`, + { + cwd: testProjectDir, + }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('{"num":1}'); + expect(stdout).to.include("1 rows"); + + console.log("✓ Query from argument works"); + }); + + it("should execute query with multiple rows", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query with multiple rows ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT number FROM system.numbers LIMIT 5"`, + { cwd: testProjectDir }, + ); + + const lines = stdout + .trim() + .split("\n") + .filter((l: string) => l.startsWith("{")); + expect(lines.length).to.equal(5); + + // Verify JSON format + lines.forEach((line: string, idx: number) => { + const parsed = JSON.parse(line); + expect(parsed.number).to.equal(idx); + }); + + console.log("✓ Multiple rows returned correctly"); + }); + + it("should execute query from file", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from file ---"); + + const queryFile = path.join(testProjectDir, "test-query.sql"); + fs.writeFileSync(queryFile, "SELECT 'hello' as greeting, 42 as answer"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -f test-query.sql`, + { cwd: testProjectDir }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('"greeting":"hello"'); + expect(stdout).to.include('"answer":42'); + + console.log("✓ Query from file works"); + }); + + it("should execute query from stdin", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing query from stdin ---"); + + const { stdout } = await execAsync( + `echo "SELECT 'stdin' as source" | "${CLI_PATH}" query`, + { cwd: testProjectDir, shell: "/bin/bash" }, + ); + + console.log("Query output:", stdout); + + expect(stdout).to.include('"source":"stdin"'); + + console.log("✓ Query from stdin works"); + }); + + it("should respect limit parameter", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing limit parameter ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query "SELECT number FROM system.numbers" --limit 3`, + { cwd: testProjectDir }, + ); + + const lines = stdout + .trim() + .split("\n") + .filter((l: string) => l.startsWith("{")); + expect(lines.length).to.equal(3); + expect(stdout).to.include("3 rows"); + + console.log("✓ Limit parameter works"); + }); + + it("should handle query errors gracefully", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing error handling ---"); + + try { + await execAsync( + `"${CLI_PATH}" query "SELECT * FROM nonexistent_table_xyz"`, + { cwd: testProjectDir }, + ); + expect.fail("Should have thrown an error"); + } catch (error: any) { + expect(error.message).to.include("ClickHouse query error"); + console.log("✓ Query errors handled gracefully"); + } + }); +}); diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index 87c7e78a0d..4d3e306caf 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -28,6 +28,7 @@ use routines::kafka_pull::write_external_topics; use routines::metrics_console::run_console; use routines::peek::peek; use routines::ps::show_processes; +use routines::query::query; use routines::scripts::{ cancel_workflow, get_workflow_status, list_workflows_history, pause_workflow, run_workflow, terminate_workflow, unpause_workflow, @@ -1363,6 +1364,30 @@ pub async fn top_command_handler( ))) } }, + Commands::Query { + query: sql, + file, + limit, + } => { + info!("Running query command"); + + let project = load_project(commands)?; + let project_arc = Arc::new(project); + + let capture_handle = crate::utilities::capture::capture_usage( + ActivityType::QueryCommand, + Some(project_arc.name()), + &settings, + machine_id.clone(), + HashMap::new(), + ); + + let result = query(project_arc, sql.clone(), file.clone(), *limit).await; + + wait_for_usage_capture(capture_handle).await; + + result + } } } diff --git a/apps/framework-cli/src/cli/commands.rs b/apps/framework-cli/src/cli/commands.rs index 88c7523ac3..5a76bce88c 100644 --- a/apps/framework-cli/src/cli/commands.rs +++ b/apps/framework-cli/src/cli/commands.rs @@ -193,6 +193,19 @@ pub enum Commands { }, /// Manage Kafka-related operations Kafka(KafkaArgs), + /// Execute SQL queries against ClickHouse + Query { + /// SQL query to execute + query: Option, + + /// Read query from file + #[arg(short = 'f', long = "file", conflicts_with = "query")] + file: Option, + + /// Maximum number of rows to return (applied via ClickHouse settings) + #[arg(short, long, default_value = "10000")] + limit: u64, + }, } #[derive(Debug, Args)] diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 3473c65f30..e508cf6219 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -169,6 +169,7 @@ pub mod migrate; pub mod openapi; pub mod peek; pub mod ps; +pub mod query; pub mod scripts; pub mod seed_data; pub mod templates; diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs new file mode 100644 index 0000000000..f6b5d59284 --- /dev/null +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -0,0 +1,183 @@ +//! Module for executing arbitrary SQL queries against ClickHouse. +//! +//! This module provides functionality to execute raw SQL queries and return +//! results as JSON for debugging and exploration purposes. + +use crate::cli::display::Message; +use crate::cli::routines::{setup_redis_client, RoutineFailure, RoutineSuccess}; +use crate::framework::core::infrastructure_map::InfrastructureMap; +use crate::infrastructure::olap::clickhouse_alt_client::{get_pool, row_to_json}; +use crate::project::Project; + +use futures::StreamExt; +use log::info; +use std::io::Read; +use std::path::PathBuf; +use std::sync::Arc; + +/// Reads SQL query from argument, file, or stdin. +/// +/// # Arguments +/// +/// * `sql` - Optional SQL query string from command line +/// * `file` - Optional file path containing SQL query +/// +/// # Returns +/// +/// * `Result` - SQL query string or error +fn get_sql_input(sql: Option, file: Option) -> Result { + if let Some(query_str) = sql { + // SQL provided as argument + Ok(query_str) + } else if let Some(file_path) = file { + // Read SQL from file + std::fs::read_to_string(&file_path).map_err(|e| { + RoutineFailure::new( + Message::new( + "Query".to_string(), + format!("Failed to read file: {}", file_path.display()), + ), + e, + ) + }) + } else { + // Read SQL from stdin + let mut buffer = String::new(); + std::io::stdin().read_to_string(&mut buffer).map_err(|e| { + RoutineFailure::new( + Message::new("Query".to_string(), "Failed to read from stdin".to_string()), + e, + ) + })?; + + if buffer.trim().is_empty() { + return Err(RoutineFailure::error(Message::new( + "Query".to_string(), + "No SQL query provided (use argument, --file, or stdin)".to_string(), + ))); + } + + Ok(buffer) + } +} + +/// Executes a SQL query against ClickHouse and displays results as JSON. +/// +/// Allows users to run arbitrary SQL queries against the ClickHouse database +/// for exploration and debugging. Results are streamed as JSON to stdout. +/// +/// # Arguments +/// +/// * `project` - The project configuration to use +/// * `sql` - Optional SQL query string +/// * `file` - Optional file path containing SQL query +/// * `limit` - Maximum number of rows to return (via ClickHouse settings) +/// +/// # Returns +/// +/// * `Result` - Success or failure of the operation +pub async fn query( + project: Arc, + sql: Option, + file: Option, + limit: u64, +) -> Result { + let sql_query = get_sql_input(sql, file)?; + info!("Executing SQL: {}", sql_query); + + // Get ClickHouse connection pool + let pool = get_pool(&project.clickhouse_config); + + let mut client = pool.get_handle().await.map_err(|_| { + RoutineFailure::error(Message::new( + "Failed".to_string(), + "Error connecting to storage".to_string(), + )) + })?; + + let redis_client = setup_redis_client(project.clone()).await.map_err(|e| { + RoutineFailure::error(Message { + action: "Query".to_string(), + details: format!("Failed to setup redis client: {e:?}"), + }) + })?; + + // Validate that infrastructure state exists and is accessible. + // The value is not used further, but we fail early if it cannot be loaded. + let _infra = InfrastructureMap::load_from_redis(&redis_client) + .await + .map_err(|_| { + RoutineFailure::error(Message::new( + "Failed".to_string(), + "Error retrieving current state".to_string(), + )) + })? + .ok_or_else(|| { + RoutineFailure::error(Message::new( + "Failed".to_string(), + "No infrastructure state found. Is 'moose dev' running?".to_string(), + )) + })?; + + // Execute query and stream results + let mut stream = client.query(&sql_query).stream(); + + let mut success_count = 0; + let mut enum_mappings: Vec>> = Vec::new(); + + while let Some(row_result) = stream.next().await { + let row = match row_result { + Ok(row) => row, + Err(e) => { + return Err(RoutineFailure::new( + Message::new("Query".to_string(), "ClickHouse query error".to_string()), + e, + )); + } + }; + + // Create enum mappings on first row (one None entry per column) + if enum_mappings.is_empty() { + enum_mappings = vec![None; row.len()]; + } + + // Reuse peek's row_to_json with enum mappings + let value = row_to_json(&row, &enum_mappings).map_err(|e| { + RoutineFailure::new( + Message::new( + "Query".to_string(), + "Failed to convert row to JSON".to_string(), + ), + e, + ) + })?; + + let json = serde_json::to_string(&value).map_err(|e| { + RoutineFailure::new( + Message::new( + "Query".to_string(), + "Failed to serialize result".to_string(), + ), + e, + ) + })?; + + println!("{}", json); + info!("{}", json); + success_count += 1; + + // Check limit to avoid unbounded queries + if success_count >= limit { + info!("Reached limit of {} rows", limit); + break; + } + } + + // Add newline for output cleanliness (like peek does) + println!(); + + Ok(RoutineSuccess::success(Message::new( + "Query".to_string(), + format!("{} rows", success_count), + ))) +} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs index 5267529d33..6924750754 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs @@ -201,7 +201,7 @@ where /// /// # Returns /// * `Result` - JSON object or error -fn row_to_json( +pub fn row_to_json( row: &Row<'_, C>, enum_mappings: &[Option>], ) -> Result diff --git a/apps/framework-cli/src/utilities/capture.rs b/apps/framework-cli/src/utilities/capture.rs index 5d7f455d1b..106f3a16de 100644 --- a/apps/framework-cli/src/utilities/capture.rs +++ b/apps/framework-cli/src/utilities/capture.rs @@ -65,6 +65,8 @@ pub enum ActivityType { GenerateSDKCommand, #[serde(rename = "peekCommand")] PeekCommand, + #[serde(rename = "queryCommand")] + QueryCommand, #[serde(rename = "workflowCommand")] WorkflowCommand, #[serde(rename = "workflowInitCommand")] diff --git a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx index 6f03230427..a07bad239a 100644 --- a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx +++ b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx @@ -163,6 +163,34 @@ moose peek [--limit ] [--file ] [-t|--table] [-s|--stream] - `-t, --table`: View data from a table (default if neither flag specified) - `-s, --stream`: View data from a stream/topic +### Query +Execute arbitrary SQL queries against your ClickHouse database during development. +```bash +# Direct query +moose query "SELECT count(*) FROM users" + +# From file +moose query -f queries/analysis.sql + +# From stdin +cat query.sql | moose query + +# With limit +moose query "SELECT * FROM events" --limit 100 +``` +- ``: SQL query string to execute (optional if using --file or stdin) +- `-f, --file `: Read query from file +- `-l, --limit `: Maximum rows to return (default: 10000) + +**Requirements:** +- Requires `moose dev` to be running +- Executes queries against your development ClickHouse instance + +**Output:** +- Returns results as newline-delimited JSON +- One JSON object per row +- Row count summary at end + ## Generation Commands ### Generate Hash Token diff --git a/apps/framework-docs/src/pages/moose/moose-cli.mdx b/apps/framework-docs/src/pages/moose/moose-cli.mdx index eac9f5eec8..cdcf28a8e2 100644 --- a/apps/framework-docs/src/pages/moose/moose-cli.mdx +++ b/apps/framework-docs/src/pages/moose/moose-cli.mdx @@ -162,6 +162,34 @@ moose peek [--limit ] [--file ] [-t|--table] [-s|--stream] - `-t, --table`: View data from a table (default if neither flag specified) - `-s, --stream`: View data from a stream/topic +### Query +Execute arbitrary SQL queries against your ClickHouse database during development. +```bash +# Direct query +moose query "SELECT count(*) FROM users" + +# From file +moose query -f queries/analysis.sql + +# From stdin +cat query.sql | moose query + +# With limit +moose query "SELECT * FROM events" --limit 100 +``` +- ``: SQL query string to execute (optional if using --file or stdin) +- `-f, --file `: Read query from file +- `-l, --limit `: Maximum rows to return (default: 10000) + +**Requirements:** +- Requires `moose dev` to be running +- Executes queries against your development ClickHouse instance + +**Output:** +- Returns results as newline-delimited JSON +- One JSON object per row +- Row count summary at end + ## Generation Commands ### Generate Hash Token From 4effb5320c64d45b65699cd4d56019ad0d3e12d6 Mon Sep 17 00:00:00 2001 From: George Leung Date: Mon, 17 Nov 2025 12:54:09 -0800 Subject: [PATCH 25/59] fixes for 20251114 release notes (#3000) > [!NOTE] > Updates the Nov 14, 2025 release notes with a new resource access functions section (TS/Py APIs + example), added PR/docs links, and relocates Enum16 under Other Features alongside expanded improvements and bug fixes. > > - **Release notes (`apps/framework-docs/src/pages/release-notes/2025-11-14.mdx`)** > - **New section:** *Functions for programmatic resource access* with TypeScript example and API parity lists for TS/Python; adds PR/docs links. > - **Highlights:** replace "Registry functions" with "Resource access functions". > - **MCP template:** add PR/docs references; clarify template context and setup. > - **Enum16:** remove dedicated section; add concise bullet under **Other Features**. > - **Other Features & Bug Fixes:** expand bullets and append PR links (docs search, MCP template setup, DB context detection, logging prefix, health checks, CORS; SQL table qualification, JWT env vars, serverless migrations, migration generation, schema compatibility). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 5f3027ae3aec50e1832dd002693ba89024c4f48e. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/pages/release-notes/2025-11-14.mdx | 65 ++++++------------- 1 file changed, 19 insertions(+), 46 deletions(-) diff --git a/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx b/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx index 1ba271eff2..9fb4a6f194 100644 --- a/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx +++ b/apps/framework-docs/src/pages/release-notes/2025-11-14.mdx @@ -9,8 +9,7 @@ import { Callout } from "@/components"; - **New:** TypeScript MCP template with AI chat integration and data catalog discovery -- **New:** [Enum16 data type](#enum16-data-type-support) -- **New:** [Registry functions](#registry-functions-for-programmatic-resource-access) to inspect and access Moose resources at runtime +- **New:** [Resource access functions](#functions-for-programmatic-resource-access) to inspect and access Moose resources at runtime ## TypeScript MCP template with AI chat integration @@ -41,40 +40,11 @@ pnpm dev The MCP server automatically detects database context and provides readonly access to ClickHouse data with query limits up to 1000 rows. -## Enum16 data type support +PR: [#2970](https://github.com/514-labs/moosestack/pull/2970), [#2991](https://github.com/514-labs/moosestack/pull/2991) | Docs: [MCP template](/moose/templates-examples#typescript-mcp) -Support for ClickHouse Enum16 data type with values from -32,768 to 32,767 (compared to Enum8's -128 to 127). -Use Enum16 for HTTP status codes, business identifiers, or any enum values exceeding Enum8's range. +## Functions for programmatic resource access -```typescript filename="datamodels/ApiRequestLog.ts" copy -import { Key } from "@514labs/moose-lib"; - -// Define HTTP status codes with Enum16 to support values like 404, 500 -export enum HttpStatusCode { - "OK" = 200, - "Created" = 201, - "BadRequest" = 400, - "Unauthorized" = 401, - "NotFound" = 404, - "InternalServerError" = 500, - "BadGateway" = 502, - "ServiceUnavailable" = 503 -} - -export interface ApiRequestLog { - id: Key; - endpoint: string; - method: string; - status_code: HttpStatusCode; // Uses Enum16 - response_time_ms: number; - timestamp: Date; -} -``` - -## Registry functions for programmatic resource access - -New registry functions allow programmatic access to all registered Moose resources at runtime. -Dynamically route stream messages to different tables based on runtime conditions, or build custom tooling that inspects your Moose application structure. +Access your Moose resources (tables, streams, APIs, workflows) programmatically at runtime. Build dynamic data routing logic that adapts to incoming events, create admin tools that inspect your data infrastructure, or write custom debugging utilities that explore your application's resources without hardcoding names. ```typescript filename="app/streams/eventRouter.ts" copy import { getTable, getStream, DeadLetterQueue } from "@514labs/moose-lib"; @@ -116,19 +86,22 @@ incomingStream?.addConsumer(async (event) => { }); ``` -Available registry functions: `getTables()`, `getTable()`, `getApis()`, `getApi()`, `getStreams()`, `getStream()`, `getWorkflows()`, `getWorkflow()` in both TypeScript and Python. +TypeScript now includes resource access functions for parity with Python: `getTables()`, `getTable()`, `getApis()`, `getApi()`, `getStreams()`, `getStream()`, `getWorkflows()`, `getWorkflow()`. Python already includes `get_tables()`, `get_table()`, `get_streams()`, `get_stream()`, `get_ingest_apis()`, `get_ingest_api()`, `get_apis()`, `get_api()`, `get_sql_resources()`, `get_sql_resource()`, `get_workflows()`, `get_workflow()`, `get_web_apps()`, `get_web_app()`. + +PR: [#2961](https://github.com/514-labs/moosestack/pull/2961) | Docs: [TypeScript](/moose/reference/ts-moose-lib), [Python](/moose/reference/py-moose-lib) ## Other Features and Improvements -- **Documentation search** – Command palette-style search for guides and API references -- **MCP template setup** – Monorepo structure, changing started instructions to use `pnpm` and `moose dev` -- **Automatic database context detection** – MCP server uses ClickHouse's `currentDatabase()` for simpler setup -- **Better logging** – `[CompilerPlugin]` prefix in dev terminal messages for clearer debugging -- **Deep health monitoring** – Concurrent health checks for Redis, ClickHouse, Redpanda, and Consumption API with `/_moose_internal/health` endpoint -- **CORS headers** – Applied consistently across all API endpoints including error responses +- **Enum16 data type** – Support for ClickHouse Enum16 with values from -32,768 to 32,767. PR [#2972](https://github.com/514-labs/moosestack/pull/2972) | Docs: [Supported types](/moose/olap/supported-types#enum-types) +- **Documentation search** – Command palette-style search for guides and API references. PR [#2964](https://github.com/514-labs/moosestack/pull/2964) +- **MCP template setup** – Monorepo structure, changing started instructions to use `pnpm` and `moose dev`. PR [#2990](https://github.com/514-labs/moosestack/pull/2990) +- **Automatic database context detection** – MCP server uses ClickHouse's `currentDatabase()` for simpler setup. PR [#2979](https://github.com/514-labs/moosestack/pull/2979) +- **Better logging** – `[CompilerPlugin]` prefix in dev terminal messages for clearer debugging. PR [#2971](https://github.com/514-labs/moosestack/pull/2971) +- **Deep health monitoring** – Concurrent health checks for Redis, ClickHouse, Redpanda, and Consumption API with `/_moose_internal/health` endpoint. PR [#2995](https://github.com/514-labs/moosestack/pull/2995) +- **CORS headers** – Applied consistently across all API endpoints including error responses. PR [#2975](https://github.com/514-labs/moosestack/pull/2975) ## Bug Fixes -- **Database-qualified table handling** – Support for database prefixes in SQL queries across TypeScript and Python -- **JWT environment variables** – Fixed auth docs to use correct names (`MOOSE_JWT__SECRET`, `MOOSE_JWT__ISSUER`, `MOOSE_JWT__AUDIENCE`) -- **Serverless migrations** – Fixed table reconciliation with remote ClickHouse databases -- **Migration generation** – Fixed `moose generate migration --url` to work with Moose servers -- **Schema compatibility** – Fixed incorrect breaking change detection for JSON, nested structures, and FixedString columns +- **Database-qualified table handling** – Support for database prefixes in SQL queries across TypeScript and Python. PR [#2992](https://github.com/514-labs/moosestack/pull/2992), [#2994](https://github.com/514-labs/moosestack/pull/2994) +- **JWT environment variables** – Fixed auth docs to use correct names (`MOOSE_JWT__SECRET`, `MOOSE_JWT__ISSUER`, `MOOSE_JWT__AUDIENCE`). PR [#2987](https://github.com/514-labs/moosestack/pull/2987) | Docs: [Authentication](/moose/app-api-auth) +- **Serverless migrations** – Fixed table reconciliation with remote ClickHouse databases. PR [#2981](https://github.com/514-labs/moosestack/pull/2981) +- **Migration generation** – Fixed `moose generate migration --url` to work with Moose servers. PR [#2984](https://github.com/514-labs/moosestack/pull/2984) +- **Schema compatibility** – Fixed incorrect breaking change detection for JSON, nested structures, and FixedString columns. PR [#2960](https://github.com/514-labs/moosestack/pull/2960) From 1f2fd5b415c5bdbe8de8fc048dbafc4a99b6346e Mon Sep 17 00:00:00 2001 From: Johanan Ottensooser Date: Mon, 17 Nov 2025 15:54:16 -0800 Subject: [PATCH 26/59] Update README.md (#3004) > [!NOTE] > Add project initialization step and clarify dev scripts in the TypeScript MCP template README. > > - **Documentation** (`templates/typescript-mcp/README.md`): > - Add project initialization instructions using `moose init typescript-mcp`. > - Simplify "start services individually" section with concise commands: `pnpm dev:moose` and `pnpm dev:web` with inline comments. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 9decd4a10927e4bd3ea8357068277de8b96198f1. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- templates/typescript-mcp/README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/templates/typescript-mcp/README.md b/templates/typescript-mcp/README.md index 028be9ad14..0742650e3f 100644 --- a/templates/typescript-mcp/README.md +++ b/templates/typescript-mcp/README.md @@ -27,6 +27,12 @@ A Next.js web application with a pre-configured AI chat interface. This applicat ## Getting Started +Initiate your project: + +```bash +moose init typescript-mcp +``` + Install dependencies for both applications: ```bash @@ -42,11 +48,8 @@ pnpm dev Or start services individually: ```bash -# Start only the MooseStack service -pnpm dev:moose - -# Start only the Next.js web app -pnpm dev:web +pnpm dev:moose # Start MooseStack service only +pnpm dev:web # Start web app only ``` ## MCP Tools Available From 79a7c049b57843141fe1baf9b8285ba23a871793 Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Mon, 17 Nov 2025 16:43:38 -0800 Subject: [PATCH 27/59] set anthropic api key in typescript-mcp template (#3006) > [!NOTE] > Add Anthropic API key setup instructions to the TypeScript MCP template README and post-install message. > > - **Docs (typescript-mcp template)**: > - Add setup step to set `ANTHROPIC_API_KEY` in `packages/web-app/.env.local`. > - Update instructions in `templates/typescript-mcp/README.md` and `templates/typescript-mcp/template.config.toml` post-install message. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 41aca356c9f6c095ae0bf345f376810b40077966. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- templates/typescript-mcp/README.md | 7 +++++++ templates/typescript-mcp/template.config.toml | 3 +++ 2 files changed, 10 insertions(+) diff --git a/templates/typescript-mcp/README.md b/templates/typescript-mcp/README.md index 0742650e3f..8477f4ffa0 100644 --- a/templates/typescript-mcp/README.md +++ b/templates/typescript-mcp/README.md @@ -33,6 +33,13 @@ Initiate your project: moose init typescript-mcp ``` +Set the ANTHROPIC_API_KEY environment variable: + +```bash +cd +echo "ANTHROPIC_API_KEY=your_api_key_here" >> packages/web-app/.env.local +``` + Install dependencies for both applications: ```bash diff --git a/templates/typescript-mcp/template.config.toml b/templates/typescript-mcp/template.config.toml index 9d14acbfc8..61e7e1fcfa 100644 --- a/templates/typescript-mcp/template.config.toml +++ b/templates/typescript-mcp/template.config.toml @@ -13,6 +13,9 @@ https://boreal.cloud 📂 Go to your project directory: $ cd {project_dir} +📦 Set Anthropic API Key + $ echo "ANTHROPIC_API_KEY=your_api_key_here" >> packages/web-app/.env.local + 📦 Install all dependencies: $ pnpm install From c75955d85f5de5e4e6184f16f94bfdaea1ad8d5e Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Mon, 17 Nov 2025 17:13:28 -0800 Subject: [PATCH 28/59] update and condense agents.md (#3002) > [!NOTE] > Condenses and restructures `AGENTS.md`, adding concise build/test commands, language-specific style guides, repo/testing guidance, and critical E2E/testing notes. > > - **Docs (`AGENTS.md`)**: > - **Restructure & Condense**: Rewrites into a concise guide with clear sections. > - **Build/Test Commands**: Adds unified commands plus language-specific (Rust/TS/Python) test and format instructions; includes running single tests. > - **E2E Testing**: Documents how to run full and filtered E2E tests; notes pretest steps. > - **Code Style Guidelines**: Adds detailed conventions for TypeScript, Rust, and Python. > - **Repo Structure & Testing Philosophy**: Clarifies `apps/`, `packages/`, `templates/`; distinguishes unit tests vs templates. > - **Key Technologies**: Summarizes stack components. > - **Critical Notes**: Emphasizes mandatory E2E runs for functional changes, template updates for user-facing changes, docs audit, log location, and formatting requirements. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 7914fe1d5eb1a06d5259da7d754183d7420e48b3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: Lucio Franco --- AGENTS.md | 186 +++++++++++++++++++++--------------------------------- 1 file changed, 71 insertions(+), 115 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index cdbf32d22f..142f137fbd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,117 +1,73 @@ # AGENTS.md -When you are changing MooseStack functionality (either in the language specific libraries or in the Rust core) ALWAYS run the -end-to-end tests to make sure you did not break anything. - -When you change user facing functionality for moose, always add end-to-end tests for the `python-tests` and `typescript-tests` -templates and ALWAYS audit for the documentation for update needs. Those projects are Moose project that should be using Moose code. -The checks if the moose code works as expected should be inside `apps/framework-cli-e2e`. - -You can find the logs for moose if you need to troublehoot into `~/.moose/*-cli.log` - -## Commands - -### Build and Development -- **Build all packages**: `pnpm build` (uses Turbo Repo) -- **Development mode**: `pnpm dev` (starts development servers) -- **Linting**: `pnpm lint` -- **Formatting**: `pnpm format` (Prettier on TypeScript/JavaScript files) -- **Clean build artifacts**: `pnpm clean` - -### Rust Components -- **Build Rust**: `cargo build` -- **Run Rust tests**: `cargo test` -- **Lint Rust code**: `cargo clippy --all-targets -- -D warnings` (no warnings allowed) -- **Format Rust code**: `rustfmt --edition 2021 ` - -### Testing - -#### Unit Tests (Library Testing) -- **Rust tests**: `cargo test` -- **TypeScript library tests**: Navigate to `./packages/ts-moose-lib` and run `pnpm test` -- **Python library tests**: Navigate to `./packages/py-moose-lib` and run `pytest` - -Unit tests should be colocated with the library code they test: -- TypeScript library tests: `packages/ts-moose-lib/tests/` -- Python library tests: `packages/py-moose-lib/tests/` -- Rust tests: Inline with code using `#[cfg(test)]` modules - -#### End-to-End Tests (Template Integration Testing) -- **End-to-end tests**: Navigate to `./apps/framework-cli-e2e` and run `pnpm test` - -End-to-end tests verify that complete MooseStack applications work correctly: -- Tests use the templates in `templates/` as working Moose applications -- Templates like `python-tests` and `typescript-tests` are complete MooseStack projects -- E2E tests verify infrastructure creation, data ingestion, API responses, etc. - -#### Testing Distinction: Templates vs Libraries -**IMPORTANT**: Templates are NOT for library unit tests. They are working MooseStack applications. - -- **Templates** (`templates/python-tests`, `templates/typescript-tests`): - - Complete, runnable Moose applications - - Used by E2E tests to verify end-to-end functionality - - Should demonstrate features and serve as examples - - Tested by `apps/framework-cli-e2e` - -- **Library Tests** (`packages/*/tests/`): - - Unit tests for library functionality - - Test individual functions, classes, and modules - - Should be colocated with the library code - - Run independently of the CLI or infrastructure - -## Repository Architecture - -### Monorepo Structure -This is a multi-language monorepo using: -- **PNPM workspaces** with **Turbo Repo** for JavaScript/TypeScript packages -- **Cargo workspace** for Rust components -- **Cross-language integration** between Rust CLI and TypeScript/Python libraries - -### Key Directories -- `apps/`: End-to-end tests, CLI application, docs, and distribution packages - - `framework-cli/`: Main Rust CLI application - - `framework-docs/`: Documentation site - - `framework-cli-e2e/`: End-to-end test suite -- `packages/`: Shared libraries and common dependencies - - `ts-moose-lib/`: TypeScript library for MooseStack - - `py-moose-lib/`: Python library for MooseStack - - `protobuf/`: Protocol buffer definitions -- `templates/`: Standalone Moose project templates - -### Core Technologies -- **Rust**: CLI application, performance-critical components -- **TypeScript**: Developer libraries, web interfaces -- **Python**: Alternative developer library -- **ClickHouse**: OLAP database -- **Redpanda/Kafka**: Streaming platform -- **Temporal**: Workflow orchestration -- **Redis**: Internal state management - -### Architecture Patterns -- **Code-first infrastructure**: Declare tables, streams, APIs in code -- **Type-safe development**: Strong typing across TypeScript and Rust -- **Modular design**: Independent modules (OLAP, Streaming, Workflows, APIs) -- **Local-first development**: Full production mirror via `moose dev` - -## Development Guidelines - -### Pre-commit Requirements -- **TypeScript/JavaScript**: Must pass linting and code formating checks (`npx lint-staged`) -- **Rust**: Must pass `cargo clippy --all-targets -- -D warnings` (no warnings permitted) -- **All components**: Tests must pass before PR submission - -### Error Handling (Rust) -- Define error types near their unit of fallibility (no global `Error` type) -- Use `thiserror` for error definitions with `#[derive(thiserror::Error)]` -- Structure errors in layers with context and specific variants -- Never use `anyhow::Result` - refactor to use `thiserror` - -### Code Standards -- **Constants**: Use `const` in Rust, place in `constants.rs` at appropriate module level -- **Newtypes**: Use tuple structs with validation constructors -- **Documentation**: All public APIs must be documented -- **Linting**: Always run `cargo clippy --all-targets -- -D warnings` for Rust code -- Follow existing patterns and conventions in each language - -### Templates -Templates in the `templates/` directory must be able to run in isolation. When modifying templates, verify they can still function as standalone projects. \ No newline at end of file +Multi-language monorepo (Rust CLI + TypeScript/Python libraries) using PNPM workspaces, Turbo Repo, and Cargo workspace. + +**CRITICAL**: When changing MooseStack functionality, ALWAYS run end-to-end tests. When changing user-facing features, add E2E tests to `python-tests`/`typescript-tests` templates AND audit documentation. Logs: `~/.moose/*-cli.log`. Always format the code. + +## Build & Development Commands + +### All Languages +- **Build all**: `pnpm build` (Turbo orchestrates builds) +- **Dev mode**: `pnpm dev` (starts dev servers) +- **Clean**: `pnpm clean` +- **Lint all**: `pnpm lint` +- **Format**: `pnpm format` (Prettier for TS/JS) + +### Rust +- **Build**: `cargo build` +- **Test all**: `cargo test` +- **Test single**: `cargo test ` or `cargo test --package --test ` +- **Lint**: `cargo clippy --all-targets -- -D warnings` (REQUIRED pre-commit, no warnings allowed) +- **Format**: `cargo fmt` + +### TypeScript +- **Test lib**: `cd packages/ts-moose-lib && pnpm test` (runs mocha tests) +- **Test single**: `cd packages/ts-moose-lib && pnpm test --grep "test name pattern"` +- **Typecheck**: `cd packages/ts-moose-lib && pnpm typecheck` + +### Python +- **Test lib**: `cd packages/py-moose-lib && pytest` +- **Test single**: `cd packages/py-moose-lib && pytest tests/test_file.py::test_function_name` +- **Test pattern**: `cd packages/py-moose-lib && pytest -k "test_pattern"` + +### End-to-End Tests +- **Run E2E**: `cd apps/framework-cli-e2e && pnpm test` (includes pretest: cargo build, pnpm build, package templates) +- **Single E2E test**: `cd apps/framework-cli-e2e && pnpm test --grep "test name"` + +## Code Style Guidelines + +### TypeScript/JavaScript +- **Imports**: Group by external deps, internal modules, types; use named exports from barrel files (`index.ts`) +- **Naming**: camelCase for vars/functions, PascalCase for types/classes/components, UPPER_SNAKE_CASE for constants +- **Types**: Prefer interfaces for objects, types for unions/intersections; explicit return types on public APIs +- **Unused vars**: Prefix with `_` (e.g., `_unusedParam`) to bypass linting errors +- **Formatting**: Prettier with `experimentalTernaries: true`; auto-formats on commit (Husky + lint-staged) +- **ESLint**: Extends Next.js, Turbo, TypeScript recommended; `@typescript-eslint/no-explicit-any` disabled + +### Rust +- **Error handling**: Use `thiserror` with `#[derive(thiserror::Error)]`; define errors near fallibility unit (NO global `Error` type); NEVER use `anyhow::Result` +- **Naming**: snake_case for functions/vars, PascalCase for types/traits, SCREAMING_SNAKE_CASE for constants +- **Constants**: Place in `constants.rs` at appropriate module level +- **Newtypes**: Use tuple structs with validation constructors (e.g., `struct UserId(String)`) +- **Tests**: Inline with `#[cfg(test)]` modules +- **Documentation**: Required for all public APIs + +### Python +- **Style**: Follow PEP 8; snake_case for functions/vars, PascalCase for classes, UPPER_SNAKE_CASE for constants +- **Types**: Use type hints for function signatures and public APIs +- **Tests**: Use pytest with fixtures and parametrize decorators + +## Repository Structure + +- **`apps/`**: CLI (`framework-cli/`), docs (`framework-docs/`), E2E tests (`framework-cli-e2e/`) +- **`packages/`**: Libraries (`ts-moose-lib/`, `py-moose-lib/`), shared deps, protobuf definitions +- **`templates/`**: Standalone Moose apps used by E2E tests (NOT for unit tests) + +## Testing Philosophy + +- **Library tests** (`packages/*/tests/`): Unit tests colocated with library code +- **Templates** (`templates/python-tests`, `templates/typescript-tests`): Complete Moose apps for E2E testing; must run in isolation + +## Key Technologies + +Rust (CLI), TypeScript (libs/web), Python (lib), ClickHouse (OLAP), Redpanda/Kafka (streaming), Temporal (workflows), Redis (state) \ No newline at end of file From 0c65138542ce3527530ae184e4169a559f4ee134 Mon Sep 17 00:00:00 2001 From: George Leung Date: Mon, 17 Nov 2025 17:59:15 -0800 Subject: [PATCH 29/59] fix repeated LowCardinality annotation (#3009) > [!NOTE] > Prevents adding `LowCardinality` multiple times to a column's annotations during model field processing. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 442cb6277b6c3c98bd49c30e5c1b1bb21dd89eb0. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- packages/py-moose-lib/moose_lib/data_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index af8ec989b7..6e0687857b 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -516,7 +516,7 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: annotations.append( ("simpleAggregationFunction", md.to_dict()) ) - if md == "LowCardinality": + if md == "LowCardinality" and all(key != "LowCardinality" for (key, _) in annotations): annotations.append( ("LowCardinality", True) ) From 29b7c610982470334de1026531ed821e01a4db02 Mon Sep 17 00:00:00 2001 From: George Leung Date: Tue, 18 Nov 2025 11:39:05 -0800 Subject: [PATCH 30/59] fix Buffer and Distributed engine parsing (#3003) > [!NOTE] > Refactors ClickHouse engine handling to wrap Buffer in BufferEngine, adds robust parse/serialize for Buffer/Distributed with nested optional rules and warnings, updates TS/Python generators and infra mapping, and adds comprehensive tests. > > - **ClickHouse engine core**: > - **BufferEngine struct**: Introduces `BufferEngine` and changes `ClickhouseEngine::Buffer` to `Buffer(BufferEngine)`; centralizes building strings and nested optional flush params. > - **Parsing/Serialization**: Implements `Buffer(...)` and `Distributed(...)` parse/serialize with nested optional validation and warnings; updates `to_proto_string`, DDL generation, and `non_alterable_params_hash`. > - **Engine extraction**: Improves engine parsing from `CREATE TABLE`; logs warnings and records unsupported engines instead of defaulting silently. > - **Logging**: Adds `warn` usage for invalid param combinations. > - **Tests**: Adds round-trip and edge-case tests for Buffer/Distributed and invalid combinations. > - **Codegen**: > - **Python/TypeScript**: Update generators to handle `ClickhouseEngine::Buffer(BufferEngine { ... })` and emit corresponding fields; import `BufferEngine`. > - **Infra mapping**: > - Maps `EngineConfig::Buffer` to `ClickhouseEngine::Buffer(BufferEngine { ... })` during table conversion. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit b349798b3e7a4d8ac340d6aca13594d7c7f9327a. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../core/partial_infrastructure_map.rs | 5 +- .../src/framework/python/generate.rs | 5 +- .../src/framework/typescript/generate.rs | 5 +- .../src/infrastructure/olap/clickhouse/mod.rs | 30 +- .../infrastructure/olap/clickhouse/queries.rs | 935 ++++++++++++++---- 5 files changed, 764 insertions(+), 216 deletions(-) diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index 69519b2a2c..c2937c5400 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -59,6 +59,7 @@ use super::{ infrastructure_map::{InfrastructureMap, PrimitiveSignature, PrimitiveTypes}, }; use crate::framework::core::infrastructure::table::OrderBy; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; use crate::{ framework::{ consumption::model::ConsumptionQueryParam, languages::SupportedLanguages, @@ -829,7 +830,7 @@ impl PartialInfrastructureMap { }) } - Some(EngineConfig::Buffer(config)) => Ok(ClickhouseEngine::Buffer { + Some(EngineConfig::Buffer(config)) => Ok(ClickhouseEngine::Buffer(BufferEngine { target_database: config.target_database.clone(), target_table: config.target_table.clone(), num_layers: config.num_layers, @@ -842,7 +843,7 @@ impl PartialInfrastructureMap { flush_time: config.flush_time, flush_rows: config.flush_rows, flush_bytes: config.flush_bytes, - }), + })), Some(EngineConfig::Distributed(config)) => Ok(ClickhouseEngine::Distributed { cluster: config.cluster.clone(), diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index 7448fe1536..312cd88f86 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -13,6 +13,7 @@ use std::fmt::Write; use std::sync::LazyLock; use crate::infrastructure::olap::clickhouse::extract_version_from_table_name; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; /// Language-agnostic sanitization: replace common separators with spaces to create word boundaries. pub use ident::sanitize_identifier; @@ -892,7 +893,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri } writeln!(output, " ),").unwrap(); } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -905,7 +906,7 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri flush_time, flush_rows, flush_bytes, - } => { + }) => { writeln!(output, " engine=BufferEngine(").unwrap(); writeln!(output, " target_database={:?},", target_database).unwrap(); writeln!(output, " target_table={:?},", target_table).unwrap(); diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index ac3edb8baa..ba37846f47 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -12,6 +12,7 @@ use std::fmt::Write; // Use shared, language-agnostic sanitization (underscores) from utilities use crate::infrastructure::olap::clickhouse::extract_version_from_table_name; +use crate::infrastructure::olap::clickhouse::queries::BufferEngine; pub use ident::sanitize_identifier; /// Map a string to a valid TypeScript PascalCase identifier (for types/classes/consts). @@ -777,7 +778,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> writeln!(output, " partitionColumnsInDataFile: {:?},", pc).unwrap(); } } - crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer { + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -790,7 +791,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> flush_time, flush_rows, flush_bytes, - } => { + }) => { writeln!(output, " engine: ClickHouseEngines.Buffer,").unwrap(); writeln!(output, " targetDatabase: {:?},", target_database).unwrap(); writeln!(output, " targetTable: {:?},", target_table).unwrap(); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 0931fe3fdd..936f39e168 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -35,7 +35,7 @@ use clickhouse::Client; use clickhouse_rs::ClientHandle; use errors::ClickhouseError; use itertools::Itertools; -use log::{debug, info}; +use log::{debug, info, warn}; use mapper::{std_column_to_clickhouse_column, std_table_to_clickhouse_table}; use model::ClickHouseColumn; use queries::ClickhouseEngine; @@ -1877,17 +1877,33 @@ impl OlapOperations for ConfiguredDBClient { // This is more reliable than using the system.tables engine column which // only contains the engine name without parameters (e.g., "S3Queue" instead of // "S3Queue('path', 'format', ...)") - let engine_parsed = if let Some(engine_def) = + let engine_str_to_parse = if let Some(engine_def) = extract_engine_from_create_table(&create_query) { - // Try to parse the extracted engine definition - engine_def.as_str().try_into().ok() + engine_def } else { // Fallback to the simple engine name from system.tables debug!("Could not extract engine from CREATE TABLE query, falling back to system.tables engine column"); - engine.as_str().try_into().ok() - } - .unwrap_or(ClickhouseEngine::MergeTree); + engine.clone() + }; + + // Try to parse the engine string + let engine_parsed: ClickhouseEngine = match engine_str_to_parse.as_str().try_into() { + Ok(engine) => engine, + Err(failed_str) => { + warn!( + "Failed to parse engine for table '{}': '{}'. This may indicate an unsupported engine type.", + table_name, failed_str + ); + unsupported_tables.push(TableWithUnsupportedType { + database: database.clone(), + name: table_name.clone(), + col_name: "__engine".to_string(), + col_type: String::from(failed_str), + }); + continue 'table_loop; + } + }; let engine_params_hash = Some(engine_parsed.non_alterable_params_hash()); // Extract table settings from CREATE TABLE query diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 8c4fa0da06..19bcd760c7 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -135,6 +135,99 @@ ORDER BY ({{order_by_string}}){{/if}}{{#if ttl_clause}} TTL {{ttl_clause}}{{/if}}{{#if settings}} SETTINGS {{settings}}{{/if}}"#; +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] +pub struct BufferEngine { + // Target database name + pub target_database: String, + // Target table name + pub target_table: String, + // Number of buffer layers (typically 16) + pub num_layers: u32, + // Minimum time in seconds before flushing + pub min_time: u32, + // Maximum time in seconds before flushing + pub max_time: u32, + // Minimum number of rows before flushing + pub min_rows: u64, + // Maximum number of rows before flushing + pub max_rows: u64, + // Minimum bytes before flushing + pub min_bytes: u64, + // Maximum bytes before flushing + pub max_bytes: u64, + // Optional flush time + pub flush_time: Option, + // Optional flush rows + pub flush_rows: Option, + // Optional flush bytes + pub flush_bytes: Option, +} + +impl BufferEngine { + /// Helper function to append nested optional flush parameters for Buffer engine + /// Returns comma-separated string of flush parameters that are present + /// Validates nested optional constraint: flush_rows requires flush_time, flush_bytes requires both + fn append_buffer_flush_params( + flush_time: &Option, + flush_rows: &Option, + flush_bytes: &Option, + ) -> String { + // Warn about invalid combinations (but serialize what we can) + if flush_rows.is_some() && flush_time.is_none() { + log::warn!( + "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { + log::warn!( + "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + + let mut params = String::new(); + if let Some(ft) = flush_time { + params.push_str(&format!(", {}", ft)); + + if let Some(fr) = flush_rows { + params.push_str(&format!(", {}", fr)); + + if let Some(fb) = flush_bytes { + params.push_str(&format!(", {}", fb)); + } + } + } + params + } + + /// Serialize Buffer engine to string format for proto storage + /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time[, flush_rows[, flush_bytes]]]) + /// Note: flush parameters are nested optionals - you cannot skip earlier parameters + fn build_string(&self) -> String { + let mut result = format!( + "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", + self.target_database, + self.target_table, + self.num_layers, + self.min_time, + self.max_time, + self.min_rows, + self.max_rows, + self.min_bytes, + self.max_bytes + ); + + result.push_str(&Self::append_buffer_flush_params( + &self.flush_time, + &self.flush_rows, + &self.flush_bytes, + )); + result.push(')'); + result + } +} + #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[allow(clippy::large_enum_variant)] // S3Queue has many fields, but this is acceptable for our use case #[derive(Default)] @@ -216,32 +309,7 @@ pub enum ClickhouseEngine { // Partition columns in data file partition_columns_in_data_file: Option, }, - Buffer { - // Target database name - target_database: String, - // Target table name - target_table: String, - // Number of buffer layers (typically 16) - num_layers: u32, - // Minimum time in seconds before flushing - min_time: u32, - // Maximum time in seconds before flushing - max_time: u32, - // Minimum number of rows before flushing - min_rows: u64, - // Maximum number of rows before flushing - max_rows: u64, - // Minimum bytes before flushing - min_bytes: u64, - // Maximum bytes before flushing - max_bytes: u64, - // Optional flush time - flush_time: Option, - // Optional flush rows - flush_rows: Option, - // Optional flush bytes - flush_bytes: Option, - }, + Buffer(BufferEngine), Distributed { // Cluster name from ClickHouse configuration cluster: String, @@ -329,33 +397,7 @@ impl Into for ClickhouseEngine { &partition_strategy, &partition_columns_in_data_file, ), - ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => Self::serialize_buffer( - &target_database, - &target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - &flush_time, - &flush_rows, - &flush_bytes, - ), + ClickhouseEngine::Buffer(buffer_engine) => buffer_engine.build_string(), ClickhouseEngine::Distributed { cluster, target_database, @@ -369,6 +411,9 @@ impl Into for ClickhouseEngine { &sharding_key, &policy_name, ), + // this might sound obvious, but when you edit this function + // please check if you have changed the parsing side (try_from) as well + // especially if you're an LLM } } } @@ -745,6 +790,8 @@ impl ClickhouseEngine { } s if s.starts_with("S3Queue(") => Self::parse_regular_s3queue(s, value), s if s.starts_with("S3(") => Self::parse_regular_s3(s, value), + s if s.starts_with("Buffer(") => Self::parse_regular_buffer(s, value), + s if s.starts_with("Distributed(") => Self::parse_regular_distributed(s, value), _ => Err(value), } } @@ -805,6 +852,119 @@ impl ClickhouseEngine { Err(original_value) } } + + /// Parse regular Buffer with parameters + /// Format: Buffer('db', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time][, flush_rows][, flush_bytes]) + fn parse_regular_buffer<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Buffer(") + .and_then(|s| s.strip_suffix(")")) + { + let params = parse_quoted_csv(content); + + // Need at least 9 parameters (database, table, and 7 numeric values) + if params.len() < 9 { + return Err(original_value); + } + + // Parse required parameters + let target_database = params[0].clone(); + let target_table = params[1].clone(); + let num_layers = params[2].parse::().map_err(|_| original_value)?; + let min_time = params[3].parse::().map_err(|_| original_value)?; + let max_time = params[4].parse::().map_err(|_| original_value)?; + let min_rows = params[5].parse::().map_err(|_| original_value)?; + let max_rows = params[6].parse::().map_err(|_| original_value)?; + let min_bytes = params[7].parse::().map_err(|_| original_value)?; + let max_bytes = params[8].parse::().map_err(|_| original_value)?; + + // Parse optional parameters (flush_time, flush_rows, flush_bytes) + let flush_time = if params.len() > 9 { + Some(params[9].parse::().map_err(|_| original_value)?) + } else { + None + }; + + let flush_rows = if params.len() > 10 { + Some(params[10].parse::().map_err(|_| original_value)?) + } else { + None + }; + + let flush_bytes = if params.len() > 11 { + Some(params[11].parse::().map_err(|_| original_value)?) + } else { + None + }; + + Ok(ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + })) + } else { + Err(original_value) + } + } + + /// Parse regular Distributed with parameters + /// Format: Distributed('cluster', 'database', 'table'[, sharding_key][, 'policy']) + fn parse_regular_distributed<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Distributed(") + .and_then(|s| s.strip_suffix(")")) + { + let params = parse_quoted_csv(content); + + // Need at least 3 parameters (cluster, database, table) + if params.len() < 3 { + return Err(original_value); + } + + let cluster = params[0].clone(); + let target_database = params[1].clone(); + let target_table = params[2].clone(); + + // Parse optional sharding_key (4th parameter, not quoted - it's an expression) + let sharding_key = if params.len() > 3 { + Some(params[3].clone()) + } else { + None + }; + + // Parse optional policy_name (5th parameter, quoted) + let policy_name = if params.len() > 4 { + Some(params[4].clone()) + } else { + None + }; + + Ok(ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + }) + } else { + Err(original_value) + } + } } /// Parse comma-separated values from a string @@ -943,33 +1103,7 @@ impl ClickhouseEngine { partition_strategy, partition_columns_in_data_file, ), - ClickhouseEngine::Buffer { - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes, - flush_time, - flush_rows, - flush_bytes, - } => Self::serialize_buffer_proto( - target_database, - target_table, - *num_layers, - *min_time, - *max_time, - *min_rows, - *max_rows, - *min_bytes, - *max_bytes, - flush_time, - flush_rows, - flush_bytes, - ), + ClickhouseEngine::Buffer(buffer_engine) => buffer_engine.build_string(), ClickhouseEngine::Distributed { cluster, target_database, @@ -1122,49 +1256,35 @@ impl ClickhouseEngine { result } - /// Serialize Buffer engine to string format for proto storage - /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]) - #[allow(clippy::too_many_arguments)] - fn serialize_buffer_proto( - target_database: &str, - target_table: &str, - num_layers: u32, - min_time: u32, - max_time: u32, - min_rows: u64, - max_rows: u64, - min_bytes: u64, - max_bytes: u64, - flush_time: &Option, - flush_rows: &Option, - flush_bytes: &Option, + /// Helper function to append nested optional parameters for Distributed engine + /// Returns comma-separated string of parameters that are present + /// Validates nested optional constraint: policy_name requires sharding_key + fn append_distributed_optional_params( + sharding_key: &Option, + policy_name: &Option, + quote_policy: bool, ) -> String { - let mut result = format!( - "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes - ); - - // Add optional flush parameters - if let Some(ft) = flush_time { - result.push_str(&format!(", {}", ft)); - } - if let Some(fr) = flush_rows { - result.push_str(&format!(", {}", fr)); - } - if let Some(fb) = flush_bytes { - result.push_str(&format!(", {}", fb)); + // Warn about invalid combination + if policy_name.is_some() && sharding_key.is_none() { + log::warn!( + "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ + This violates ClickHouse nested optional constraint." + ); } - result.push(')'); - result + let mut params = String::new(); + if let Some(key) = sharding_key { + params.push_str(&format!(", {}", key)); // Expression, not quoted + + if let Some(policy) = policy_name { + if quote_policy { + params.push_str(&format!(", '{}'", policy)); + } else { + params.push_str(&format!(", {}", policy)); + } + } + } + params } /// Serialize Distributed engine to string format for proto storage @@ -1181,16 +1301,11 @@ impl ClickhouseEngine { cluster, target_database, target_table ); - // Add sharding key if present - if let Some(key) = sharding_key { - result.push_str(&format!(", {}", key)); - } - - // Add policy name if present - if let Some(policy) = policy_name { - result.push_str(&format!(", '{}'", policy)); - } - + result.push_str(&Self::append_distributed_optional_params( + sharding_key, + policy_name, + true, + )); result.push(')'); result } @@ -1278,55 +1393,8 @@ impl ClickhouseEngine { result } - /// Serialize Buffer engine to string format - /// Format: Buffer('database', 'table', num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]) - #[allow(clippy::too_many_arguments)] - fn serialize_buffer( - target_database: &str, - target_table: &str, - num_layers: u32, - min_time: u32, - max_time: u32, - min_rows: u64, - max_rows: u64, - min_bytes: u64, - max_bytes: u64, - flush_time: &Option, - flush_rows: &Option, - flush_bytes: &Option, - ) -> String { - let mut result = format!( - "Buffer('{}', '{}', {}, {}, {}, {}, {}, {}, {}", - target_database, - target_table, - num_layers, - min_time, - max_time, - min_rows, - max_rows, - min_bytes, - max_bytes - ); - - // Add optional flush parameters if any are present - if flush_time.is_some() || flush_rows.is_some() || flush_bytes.is_some() { - if let Some(ft) = flush_time { - result.push_str(&format!(", {}", ft)); - } - if let Some(fr) = flush_rows { - result.push_str(&format!(", {}", fr)); - } - if let Some(fb) = flush_bytes { - result.push_str(&format!(", {}", fb)); - } - } - - result.push(')'); - result - } - /// Serialize Distributed engine to string format - /// Format: Distributed('cluster', 'database', 'table'[, 'sharding_key'][, 'policy_name']) + /// Format: Distributed('cluster', 'database', 'table'[, sharding_key][, 'policy_name']) fn serialize_distributed( cluster: &str, target_database: &str, @@ -1339,16 +1407,11 @@ impl ClickhouseEngine { cluster, target_database, target_table ); - // Add sharding key if present - if let Some(key) = sharding_key { - result.push_str(&format!(", {}", key)); // Don't quote - it's an expression - } - - // Add policy name if present - if let Some(policy) = policy_name { - result.push_str(&format!(", '{}'", policy)); - } - + result.push_str(&Self::append_distributed_optional_params( + sharding_key, + policy_name, + true, + )); result.push(')'); result } @@ -1890,7 +1953,7 @@ impl ClickhouseEngine { hasher.update("null".as_bytes()); } } - ClickhouseEngine::Buffer { + ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -1903,7 +1966,7 @@ impl ClickhouseEngine { flush_time, flush_rows, flush_bytes, - } => { + }) => { hasher.update("Buffer".as_bytes()); hasher.update(target_database.as_bytes()); hasher.update(target_table.as_bytes()); @@ -2300,7 +2363,7 @@ pub fn create_table_query( format!("S3({})", engine_parts.join(", ")) } - ClickhouseEngine::Buffer { + ClickhouseEngine::Buffer(BufferEngine { target_database, target_table, num_layers, @@ -2313,7 +2376,21 @@ pub fn create_table_query( flush_time, flush_rows, flush_bytes, - } => { + }) => { + // Warn about invalid combinations + if flush_rows.is_some() && flush_time.is_none() { + log::warn!( + "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { + log::warn!( + "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + let mut engine_parts = vec![ format!("'{}'", target_database), format!("'{}'", target_table), @@ -2326,15 +2403,17 @@ pub fn create_table_query( max_bytes.to_string(), ]; - // Add optional flush parameters + // Add optional flush parameters following nested optional constraint if let Some(ft) = flush_time { engine_parts.push(ft.to_string()); - } - if let Some(fr) = flush_rows { - engine_parts.push(fr.to_string()); - } - if let Some(fb) = flush_bytes { - engine_parts.push(fb.to_string()); + + if let Some(fr) = flush_rows { + engine_parts.push(fr.to_string()); + + if let Some(fb) = flush_bytes { + engine_parts.push(fb.to_string()); + } + } } format!("Buffer({})", engine_parts.join(", ")) @@ -2346,18 +2425,27 @@ pub fn create_table_query( sharding_key, policy_name, } => { + // Warn about invalid combination + if policy_name.is_some() && sharding_key.is_none() { + log::warn!( + "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ + This violates ClickHouse nested optional constraint." + ); + } + let mut engine_parts = vec![ format!("'{}'", cluster), format!("'{}'", target_database), format!("'{}'", target_table), ]; - // Add optional parameters + // Add optional parameters following nested optional constraint if let Some(key) = sharding_key { engine_parts.push(key.clone()); // Don't quote - it's an expression - } - if let Some(policy) = policy_name { - engine_parts.push(format!("'{}'", policy)); + + if let Some(policy) = policy_name { + engine_parts.push(format!("'{}'", policy)); + } } format!("Distributed({})", engine_parts.join(", ")) @@ -4838,4 +4926,445 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected InvalidParameters error"), } } + + #[test] + fn test_buffer_engine_round_trip() { + // Test Buffer engine with all parameters + let engine = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db".to_string(), + target_table: "table".to_string(), + num_layers: 16, + min_time: 10, + max_time: 100, + min_rows: 10000, + max_rows: 100000, + min_bytes: 10000000, + max_bytes: 100000000, + flush_time: Some(5), + flush_rows: Some(50000), + flush_bytes: Some(50000000), + }); + + let serialized: String = engine.clone().into(); + assert_eq!( + serialized, + "Buffer('db', 'table', 16, 10, 100, 10000, 100000, 10000000, 100000000, 5, 50000, 50000000)" + ); + + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(num_layers, 16); + assert_eq!(min_time, 10); + assert_eq!(max_time, 100); + assert_eq!(min_rows, 10000); + assert_eq!(max_rows, 100000); + assert_eq!(min_bytes, 10000000); + assert_eq!(max_bytes, 100000000); + assert_eq!(flush_time, Some(5)); + assert_eq!(flush_rows, Some(50000)); + assert_eq!(flush_bytes, Some(50000000)); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine without optional parameters + let engine2 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "mydb".to_string(), + target_table: "mytable".to_string(), + num_layers: 8, + min_time: 5, + max_time: 50, + min_rows: 5000, + max_rows: 50000, + min_bytes: 5000000, + max_bytes: 50000000, + flush_time: None, + flush_rows: None, + flush_bytes: None, + }); + + let serialized2: String = engine2.clone().into(); + assert_eq!( + serialized2, + "Buffer('mydb', 'mytable', 8, 5, 50, 5000, 50000, 5000000, 50000000)" + ); + + let parsed2 = ClickhouseEngine::try_from(serialized2.as_str()).unwrap(); + match parsed2 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "mydb"); + assert_eq!(target_table, "mytable"); + assert_eq!(num_layers, 8); + assert_eq!(min_time, 5); + assert_eq!(max_time, 50); + assert_eq!(min_rows, 5000); + assert_eq!(max_rows, 50000); + assert_eq!(min_bytes, 5000000); + assert_eq!(max_bytes, 50000000); + assert_eq!(flush_time, None); + assert_eq!(flush_rows, None); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine with only flush_time (nested optional - level 1) + let engine3 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db3".to_string(), + target_table: "table3".to_string(), + num_layers: 4, + min_time: 1, + max_time: 10, + min_rows: 1000, + max_rows: 10000, + min_bytes: 1000000, + max_bytes: 10000000, + flush_time: Some(3), + flush_rows: None, + flush_bytes: None, + }); + + let serialized3: String = engine3.clone().into(); + assert_eq!( + serialized3, + "Buffer('db3', 'table3', 4, 1, 10, 1000, 10000, 1000000, 10000000, 3)" + ); + + let parsed3 = ClickhouseEngine::try_from(serialized3.as_str()).unwrap(); + match parsed3 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db3"); + assert_eq!(target_table, "table3"); + assert_eq!(num_layers, 4); + assert_eq!(min_time, 1); + assert_eq!(max_time, 10); + assert_eq!(min_rows, 1000); + assert_eq!(max_rows, 10000); + assert_eq!(min_bytes, 1000000); + assert_eq!(max_bytes, 10000000); + assert_eq!(flush_time, Some(3)); + assert_eq!(flush_rows, None); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + + // Test Buffer engine with flush_time and flush_rows (nested optional - level 2) + let engine4 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db4".to_string(), + target_table: "table4".to_string(), + num_layers: 2, + min_time: 2, + max_time: 20, + min_rows: 2000, + max_rows: 20000, + min_bytes: 2000000, + max_bytes: 20000000, + flush_time: Some(7), + flush_rows: Some(15000), + flush_bytes: None, + }); + + let serialized4: String = engine4.clone().into(); + assert_eq!( + serialized4, + "Buffer('db4', 'table4', 2, 2, 20, 2000, 20000, 2000000, 20000000, 7, 15000)" + ); + + let parsed4 = ClickhouseEngine::try_from(serialized4.as_str()).unwrap(); + match parsed4 { + ClickhouseEngine::Buffer(BufferEngine { + target_database, + target_table, + num_layers, + min_time, + max_time, + min_rows, + max_rows, + min_bytes, + max_bytes, + flush_time, + flush_rows, + flush_bytes, + }) => { + assert_eq!(target_database, "db4"); + assert_eq!(target_table, "table4"); + assert_eq!(num_layers, 2); + assert_eq!(min_time, 2); + assert_eq!(max_time, 20); + assert_eq!(min_rows, 2000); + assert_eq!(max_rows, 20000); + assert_eq!(min_bytes, 2000000); + assert_eq!(max_bytes, 20000000); + assert_eq!(flush_time, Some(7)); + assert_eq!(flush_rows, Some(15000)); + assert_eq!(flush_bytes, None); + } + _ => panic!("Expected Buffer engine"), + } + } + + #[test] + fn test_distributed_engine_round_trip() { + // Test Distributed engine with all parameters + let engine = ClickhouseEngine::Distributed { + cluster: "my_cluster".to_string(), + target_database: "db".to_string(), + target_table: "table".to_string(), + sharding_key: Some("cityHash64(user_id)".to_string()), + policy_name: Some("my_policy".to_string()), + }; + + let serialized: String = engine.clone().into(); + assert_eq!( + serialized, + "Distributed('my_cluster', 'db', 'table', cityHash64(user_id), 'my_policy')" + ); + + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "my_cluster"); + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(sharding_key, Some("cityHash64(user_id)".to_string())); + assert_eq!(policy_name, Some("my_policy".to_string())); + } + _ => panic!("Expected Distributed engine"), + } + + // Test Distributed engine with only required parameters + let engine2 = ClickhouseEngine::Distributed { + cluster: "prod_cluster".to_string(), + target_database: "mydb".to_string(), + target_table: "mytable".to_string(), + sharding_key: None, + policy_name: None, + }; + + let serialized2: String = engine2.clone().into(); + assert_eq!( + serialized2, + "Distributed('prod_cluster', 'mydb', 'mytable')" + ); + + let parsed2 = ClickhouseEngine::try_from(serialized2.as_str()).unwrap(); + match parsed2 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "prod_cluster"); + assert_eq!(target_database, "mydb"); + assert_eq!(target_table, "mytable"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); + } + _ => panic!("Expected Distributed engine"), + } + + // Test Distributed engine with sharding key but no policy + let engine3 = ClickhouseEngine::Distributed { + cluster: "test_cluster".to_string(), + target_database: "testdb".to_string(), + target_table: "testtable".to_string(), + sharding_key: Some("rand()".to_string()), + policy_name: None, + }; + + let serialized3: String = engine3.clone().into(); + assert_eq!( + serialized3, + "Distributed('test_cluster', 'testdb', 'testtable', rand())" + ); + + let parsed3 = ClickhouseEngine::try_from(serialized3.as_str()).unwrap(); + match parsed3 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "test_cluster"); + assert_eq!(target_database, "testdb"); + assert_eq!(target_table, "testtable"); + assert_eq!(sharding_key, Some("rand()".to_string())); + assert_eq!(policy_name, None); + } + _ => panic!("Expected Distributed engine"), + } + + // Test edge case: policy_name without sharding_key should be silently dropped + // This matches ClickHouse specification where policy_name requires sharding_key + let engine4 = ClickhouseEngine::Distributed { + cluster: "edge_cluster".to_string(), + target_database: "edgedb".to_string(), + target_table: "edgetable".to_string(), + sharding_key: None, + policy_name: Some("orphan_policy".to_string()), // This should be dropped + }; + + let serialized4: String = engine4.clone().into(); + // policy_name should NOT appear since sharding_key is None + assert_eq!( + serialized4, + "Distributed('edge_cluster', 'edgedb', 'edgetable')" + ); + + // Round-trip should work correctly + let parsed4 = ClickhouseEngine::try_from(serialized4.as_str()).unwrap(); + match parsed4 { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "edge_cluster"); + assert_eq!(target_database, "edgedb"); + assert_eq!(target_table, "edgetable"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); // Both should be None after round-trip + } + _ => panic!("Expected Distributed engine"), + } + } + + #[test] + fn test_buffer_invalid_flush_combinations_logged() { + // Test: flush_rows without flush_time - should warn and ignore flush_rows + let engine = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db".to_string(), + target_table: "table".to_string(), + num_layers: 16, + min_time: 10, + max_time: 100, + min_rows: 10000, + max_rows: 100000, + min_bytes: 10000000, + max_bytes: 100000000, + flush_time: None, + flush_rows: Some(50000), // Invalid: no flush_time + flush_bytes: None, + }); + + let serialized: String = engine.clone().into(); + // flush_rows should be ignored, so only required params present + assert_eq!( + serialized, + "Buffer('db', 'table', 16, 10, 100, 10000, 100000, 10000000, 100000000)" + ); + + // Test: flush_bytes without flush_time or flush_rows - should warn and ignore flush_bytes + let engine2 = ClickhouseEngine::Buffer(BufferEngine { + target_database: "db2".to_string(), + target_table: "table2".to_string(), + num_layers: 8, + min_time: 5, + max_time: 50, + min_rows: 5000, + max_rows: 50000, + min_bytes: 5000000, + max_bytes: 50000000, + flush_time: Some(3), + flush_rows: None, + flush_bytes: Some(25000000), // Invalid: no flush_rows + }); + + let serialized2: String = engine2.clone().into(); + // flush_bytes should be ignored, only flush_time present + assert_eq!( + serialized2, + "Buffer('db2', 'table2', 8, 5, 50, 5000, 50000, 5000000, 50000000, 3)" + ); + } + + #[test] + fn test_distributed_invalid_policy_without_sharding_logged() { + // Test: policy_name without sharding_key - should warn and ignore policy_name + let engine = ClickhouseEngine::Distributed { + cluster: "my_cluster".to_string(), + target_database: "db".to_string(), + target_table: "table".to_string(), + sharding_key: None, + policy_name: Some("orphan_policy".to_string()), // Invalid: no sharding_key + }; + + let serialized: String = engine.clone().into(); + // policy_name should be ignored + assert_eq!(serialized, "Distributed('my_cluster', 'db', 'table')"); + + // Verify round-trip works correctly + let parsed = ClickhouseEngine::try_from(serialized.as_str()).unwrap(); + match parsed { + ClickhouseEngine::Distributed { + cluster, + target_database, + target_table, + sharding_key, + policy_name, + } => { + assert_eq!(cluster, "my_cluster"); + assert_eq!(target_database, "db"); + assert_eq!(target_table, "table"); + assert_eq!(sharding_key, None); + assert_eq!(policy_name, None); // Both should be None + } + _ => panic!("Expected Distributed engine"), + } + } } From 7315b1f9fc76d92ca0aec8271c71279d372e74ed Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 18 Nov 2025 15:44:46 -0700 Subject: [PATCH 31/59] fix sample by parsing (#3011) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > [!NOTE] > Corrects SAMPLE BY extraction to terminate at TTL and avoids false matches in identifiers; adds targeted tests. > > - **SQL parsing (ClickHouse)**: > - Update `extract_sample_by_from_create_table` to terminate at `TTL` (with leading space) when extracting `SAMPLE BY`, preventing capture of TTL expressions and avoiding substring matches in identifiers. > - **Tests**: > - Add `test_extract_sample_by_with_ttl_single_line` to validate stopping at `TTL`. > - Add `test_extract_sample_by_with_identifier_containing_ttl` to ensure identifiers like `cattle_count` aren’t misinterpreted. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 021ac8845fff1998d3d46a4785cfab0a7c9b37ea. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../olap/clickhouse/sql_parser.rs | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs index f074294c18..873008a0b6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs @@ -247,6 +247,11 @@ pub fn extract_sample_by_from_create_table(sql: &str) -> Option { if let Some(i) = after_upper.find("PRIMARY KEY") { end = end.min(i); } + // Note: Match " TTL" with leading space to avoid matching substrings + // within identifiers (e.g., "cattle" contains "ttl") + if let Some(i) = after_upper.find(" TTL") { + end = end.min(i); + } let expr = after[..end].trim(); if expr.is_empty() { @@ -1420,6 +1425,35 @@ pub mod tests { ); } + #[test] + fn test_extract_sample_by_with_ttl_single_line() { + // When parsing CREATE TABLE with both SAMPLE BY and TTL, + // the parser needs to stop at TTL keyword to avoid capturing the TTL expression. + // + // Bug: Parser only checked for ORDER BY, SETTINGS, and PRIMARY KEY as terminators, + // so it extracted "sample_hash TTL toDateTime(...)" instead of just "sample_hash". + // + // This primarily affected tables created outside Moose (not in state storage). + // For Moose-managed tables, the correct value from state storage was used instead. + // Customer reported this when migrating external tables. + let sql = "CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree ORDER BY (hour_stamp, sample_hash, ts) SAMPLE BY sample_hash TTL toDateTime(ts / 1000) + toIntervalDay(30) SETTINGS index_granularity = 8192"; + assert_eq!( + extract_sample_by_from_create_table(sql), + Some("sample_hash".to_string()) + ); + } + + #[test] + fn test_extract_sample_by_with_identifier_containing_ttl() { + // Edge case: Ensure identifiers containing "ttl" substring don't cause false matches + // "cattle" contains "ttl" when uppercased, but shouldn't be treated as TTL keyword + let sql = "CREATE TABLE t (id UInt64, cattle_count UInt64) ENGINE = MergeTree ORDER BY id SAMPLE BY cattle_count SETTINGS index_granularity = 8192"; + assert_eq!( + extract_sample_by_from_create_table(sql), + Some("cattle_count".to_string()) + ); + } + #[test] fn test_extract_indexes_from_create_table_multiple() { let sql = "CREATE TABLE local.table_name (`u64` UInt64, `i32` Int32, `s` String, \ From fa5a5ad9f46551d6ce2522cb24fa78d9e339fb6d Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 18 Nov 2025 16:11:31 -0700 Subject: [PATCH 32/59] fix ordering in migration files (#3007) > [!NOTE] > Ensure stable JSON/YAML output by adding sorted-key custom serializers for `InfrastructureMap` and `MigrationPlan`, backed by new JSON utilities. > > - **Core**: > - **`InfrastructureMap`**: Replace derived `Serialize` with custom serializer that sorts all JSON keys via `crate::utilities::json::sort_json_keys` for deterministic output. > - **`MigrationPlan`**: Add custom `Serialize` to sort keys; `to_yaml()` now benefits from deterministic ordering. > - **Utilities**: > - **New module** `utilities/json.rs`: `sort_json_keys` and `to_string_pretty_sorted` helpers with tests; exported in `utilities.rs`. > - **Result**: > - Deterministic ordering in version-controlled migration/state files (`remote_state.json`, `local_infra_map.json`, `plan.yaml`). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit ca2d51996ba99eba59d1eedf6155266dea35cb85. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/framework/core/infrastructure_map.rs | 59 +++++- .../src/framework/core/migration_plan.rs | 39 +++- apps/framework-cli/src/utilities.rs | 1 + apps/framework-cli/src/utilities/json.rs | 179 ++++++++++++++++++ 4 files changed, 274 insertions(+), 4 deletions(-) create mode 100644 apps/framework-cli/src/utilities/json.rs diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 3ccdf3e597..6a96220c6a 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -494,7 +494,10 @@ fn default_database_name() -> String { /// /// The relationship between the components is maintained by reference rather than by value. /// Helper methods facilitate navigating the map and finding related components. -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// Note: This type has a custom `Serialize` implementation that sorts all JSON keys +/// alphabetically for deterministic output in version-controlled migration files. +#[derive(Debug, Clone, Deserialize)] pub struct InfrastructureMap { #[serde(default = "default_database_name")] pub default_database: String, @@ -2999,6 +3002,60 @@ impl Default for InfrastructureMap { } } +impl serde::Serialize for InfrastructureMap { + /// Custom serialization with sorted keys for deterministic output. + /// + /// Migration files are version-controlled, so we need consistent output. + /// Without sorted keys, HashMap serialization order is random, causing noisy diffs. + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + // We need to temporarily derive Serialize on a shadow type to avoid infinite recursion + // Create a JSON value using the derived Serialize, then sort keys + #[derive(serde::Serialize)] + struct InfrastructureMapForSerialization<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + default_database: Option<&'a String>, + topics: &'a HashMap, + api_endpoints: &'a HashMap, + tables: &'a HashMap, + views: &'a HashMap, + topic_to_table_sync_processes: &'a HashMap, + topic_to_topic_sync_processes: &'a HashMap, + function_processes: &'a HashMap, + block_db_processes: &'a OlapProcess, + consumption_api_web_server: &'a ConsumptionApiWebServer, + orchestration_workers: &'a HashMap, + sql_resources: &'a HashMap, + workflows: &'a HashMap, + web_apps: &'a HashMap, + } + + let shadow_map = InfrastructureMapForSerialization { + default_database: Some(&self.default_database), + topics: &self.topics, + api_endpoints: &self.api_endpoints, + tables: &self.tables, + views: &self.views, + topic_to_table_sync_processes: &self.topic_to_table_sync_processes, + topic_to_topic_sync_processes: &self.topic_to_topic_sync_processes, + function_processes: &self.function_processes, + block_db_processes: &self.block_db_processes, + consumption_api_web_server: &self.consumption_api_web_server, + orchestration_workers: &self.orchestration_workers, + sql_resources: &self.sql_resources, + workflows: &self.workflows, + web_apps: &self.web_apps, + }; + + // Serialize to JSON value, sort keys, then serialize that + let json_value = serde_json::to_value(&shadow_map).map_err(serde::ser::Error::custom)?; + let sorted_value = crate::utilities::json::sort_json_keys(json_value); + sorted_value.serialize(serializer) + } +} + #[cfg(test)] mod tests { use crate::framework::core::infrastructure::table::IntType; diff --git a/apps/framework-cli/src/framework/core/migration_plan.rs b/apps/framework-cli/src/framework/core/migration_plan.rs index 2da1aaab94..2df5ecac10 100644 --- a/apps/framework-cli/src/framework/core/migration_plan.rs +++ b/apps/framework-cli/src/framework/core/migration_plan.rs @@ -1,11 +1,15 @@ use crate::framework::core::infrastructure_map::{InfraChanges, InfrastructureMap}; use crate::infrastructure::olap::clickhouse::SerializableOlapOperation; use crate::infrastructure::olap::ddl_ordering::{order_olap_changes, PlanOrderingError}; +use crate::utilities::json; use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; /// A comprehensive migration plan that can be reviewed, approved, and executed -#[derive(Debug, Clone, Serialize, Deserialize)] +/// +/// Note: This type has a custom `Serialize` implementation that sorts all JSON keys +/// alphabetically for deterministic output in version-controlled migration files. +#[derive(Debug, Clone, Deserialize)] pub struct MigrationPlan { /// Timestamp when this plan was generated pub created_at: DateTime, @@ -51,13 +55,42 @@ impl MigrationPlan { } pub fn to_yaml(&self) -> anyhow::Result { - let plan_json = serde_json::to_value(self)?; // going through JSON before YAML because tooling does not support `!tag` + // Sorted keys are handled by the custom Serialize implementation + let plan_json = serde_json::to_value(self)?; let plan_yaml = serde_yaml::to_string(&plan_json)?; Ok(plan_yaml) } } +impl serde::Serialize for MigrationPlan { + /// Custom serialization with sorted keys for deterministic output. + /// + /// Migration files are version-controlled, so we need consistent output. + /// Without sorted keys, HashMap serialization order is random, causing noisy diffs. + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + // Shadow type to avoid infinite recursion + #[derive(serde::Serialize)] + struct MigrationPlanForSerialization<'a> { + created_at: &'a DateTime, + operations: &'a Vec, + } + + let shadow = MigrationPlanForSerialization { + created_at: &self.created_at, + operations: &self.operations, + }; + + // Serialize to JSON value, sort keys, then serialize that + let json_value = serde_json::to_value(&shadow).map_err(serde::ser::Error::custom)?; + let sorted_value = json::sort_json_keys(json_value); + sorted_value.serialize(serializer) + } +} + pub struct MigrationPlanWithBeforeAfter { pub remote_state: InfrastructureMap, pub local_infra_map: InfrastructureMap, diff --git a/apps/framework-cli/src/utilities.rs b/apps/framework-cli/src/utilities.rs index 0895d5e687..922c8310bd 100644 --- a/apps/framework-cli/src/utilities.rs +++ b/apps/framework-cli/src/utilities.rs @@ -8,6 +8,7 @@ pub mod docker; pub mod dotenv; pub mod git; pub mod identifiers; +pub mod json; pub mod keyring; pub mod machine_id; pub mod nodejs_version; diff --git a/apps/framework-cli/src/utilities/json.rs b/apps/framework-cli/src/utilities/json.rs new file mode 100644 index 0000000000..f32c938fdc --- /dev/null +++ b/apps/framework-cli/src/utilities/json.rs @@ -0,0 +1,179 @@ +//! JSON serialization utilities +//! +//! Provides sorted-key JSON serialization for deterministic output. +//! +//! ## Why sorted keys? +//! +//! Migration files (`remote_state.json`, `local_infra_map.json`, `plan.yaml`) are +//! committed to version control. Without sorted keys, HashMaps serialize in random +//! order, causing noisy diffs even when nothing changed semantically. +//! +//! Rust's `serde_json` doesn't provide native sorted serialization, so we implement +//! it here rather than adding a dependency for this single use case. + +use serde::Serialize; +use serde_json::{Map, Value}; + +/// Recursively sorts all object keys in a JSON value +/// +/// This function traverses a JSON value tree and converts all objects +/// (maps) to use sorted keys. Arrays and primitive values are preserved as-is. +/// +/// # Arguments +/// * `value` - The JSON value to sort +/// +/// # Returns +/// A new JSON value with all object keys sorted alphabetically +pub fn sort_json_keys(value: Value) -> Value { + match value { + Value::Object(map) => { + let mut sorted_map = Map::new(); + // Collect keys and sort them + let mut keys: Vec = map.keys().cloned().collect(); + keys.sort(); + + // Insert values in sorted key order, recursively sorting nested values + for key in keys { + if let Some(val) = map.get(&key) { + sorted_map.insert(key, sort_json_keys(val.clone())); + } + } + Value::Object(sorted_map) + } + Value::Array(arr) => { + // Recursively sort keys in array elements, but don't sort the array itself + Value::Array(arr.into_iter().map(sort_json_keys).collect()) + } + // Primitive values pass through unchanged + other => other, + } +} + +/// Serializes a value to a pretty-printed JSON string with sorted keys +/// +/// This function is a drop-in replacement for `serde_json::to_string_pretty` +/// that ensures all object keys are sorted alphabetically for consistent output. +/// +/// # Arguments +/// * `value` - Any serializable value +/// +/// # Returns +/// A Result containing the pretty-printed JSON string with sorted keys, +/// or a serialization error +/// +/// # Examples +/// ```ignore +/// use crate::utilities::json::to_string_pretty_sorted; +/// +/// let data = MyStruct { ... }; +/// let json = to_string_pretty_sorted(&data)?; +/// std::fs::write("output.json", json)?; +/// ``` +pub fn to_string_pretty_sorted(value: &T) -> serde_json::Result { + // First serialize to a JSON value + let json_value = serde_json::to_value(value)?; + + // Sort all keys recursively + let sorted_value = sort_json_keys(json_value); + + // Serialize to pretty string + serde_json::to_string_pretty(&sorted_value) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_sort_simple_object() { + let input = json!({ + "zebra": 1, + "apple": 2, + "mango": 3 + }); + + let sorted = sort_json_keys(input); + let output = serde_json::to_string(&sorted).unwrap(); + + // Keys should be in alphabetical order + assert!(output.find("apple").unwrap() < output.find("mango").unwrap()); + assert!(output.find("mango").unwrap() < output.find("zebra").unwrap()); + } + + #[test] + fn test_sort_nested_objects() { + let input = json!({ + "outer_z": { + "inner_z": 1, + "inner_a": 2 + }, + "outer_a": { + "inner_z": 3, + "inner_a": 4 + } + }); + + let sorted = sort_json_keys(input); + let output = serde_json::to_string(&sorted).unwrap(); + + // Outer keys should be sorted + assert!(output.find("outer_a").unwrap() < output.find("outer_z").unwrap()); + } + + #[test] + fn test_arrays_preserve_order() { + let input = json!({ + "items": [ + {"name": "zebra", "id": 1}, + {"name": "apple", "id": 2} + ] + }); + + let sorted = sort_json_keys(input); + + // Array order should be preserved + if let Value::Object(map) = &sorted { + if let Some(Value::Array(items)) = map.get("items") { + assert_eq!(items.len(), 2); + assert_eq!(items[0]["id"], 1); // zebra still first + assert_eq!(items[1]["id"], 2); // apple still second + } else { + panic!("Expected array"); + } + } else { + panic!("Expected object"); + } + } + + #[test] + fn test_to_string_pretty_sorted() { + use serde::Serialize; + use std::collections::HashMap; + + #[derive(Serialize)] + struct TestStruct { + zebra: String, + apple: String, + mango: HashMap, + } + + let mut map = HashMap::new(); + map.insert("z_key".to_string(), 1); + map.insert("a_key".to_string(), 2); + + let test_data = TestStruct { + zebra: "last".to_string(), + apple: "first".to_string(), + mango: map, + }; + + let output = to_string_pretty_sorted(&test_data).unwrap(); + + // apple should appear before zebra in the output + assert!(output.find("apple").unwrap() < output.find("zebra").unwrap()); + + // Nested keys should also be sorted + assert!(output.find("a_key").unwrap() < output.find("z_key").unwrap()); + } +} From 3592dd9d543909f85fb20ab6891d9ea3cc1f2b96 Mon Sep 17 00:00:00 2001 From: George Leung Date: Tue, 18 Nov 2025 18:31:57 -0800 Subject: [PATCH 33/59] fix `table_settings` with Some(default_value) and None (#3012) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > [!NOTE] > Adjust ClickHouse diffing to compare table_settings with defaults (handling None vs Some(default)) and return aggregated changes; add a clarifying TODO in infra map. > > - **ClickHouse diff strategy (`infrastructure/olap/clickhouse/diff_strategy.rs`)**: > - Compare `table_settings` using readonly defaults to handle `None` vs `Some(default)` equivalence. > - Accumulate results in a `changes` vector, allowing `SettingsChanged` and/or `Updated` to be emitted together instead of early returns. > - Keep existing drop+create paths for engine/ORDER BY/PARTITION BY; unchanged behavior otherwise. > - **Infra map (`framework/core/infrastructure_map.rs`)**: > - Add TODO noting `table_settings` aren’t checked in the outer change-condition (handled by ClickHouse strategy). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit b067ac5bb7e0f7fc03ed580adf58951dc8cc3cba. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/framework/core/infrastructure_map.rs | 1 + .../olap/clickhouse/diff_strategy.rs | 42 ++++++++++--------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 6a96220c6a..6cb1945c1b 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1904,6 +1904,7 @@ impl InfrastructureMap { // Only process changes if there are actual differences to report // Note: cluster_name changes are intentionally excluded - they don't trigger operations + // TODO: table_settings is not checked in the if condition, but checked by ClickHouseTableDiffStrategy if !column_changes.is_empty() || order_by_changed || partition_by_changed diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index ea133f775d..cee8dd2fb7 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -536,18 +536,18 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { OlapChange::Table(TableChange::Added(after.clone())), ]; } - + let mut changes = Vec::new(); // Check if only table settings have changed if before.table_settings != after.table_settings { // List of readonly settings that cannot be modified after table creation // Source: ClickHouse/src/Storages/MergeTree/MergeTreeSettings.cpp::isReadonlySetting - const READONLY_SETTINGS: &[&str] = &[ - "index_granularity", - "index_granularity_bytes", - "enable_mixed_granularity_parts", - "add_minmax_index_for_numeric_columns", - "add_minmax_index_for_string_columns", - "table_disk", + const READONLY_SETTINGS: &[(&str, &str)] = &[ + ("index_granularity", "8192"), + ("index_granularity_bytes", "10485760"), + ("enable_mixed_granularity_parts", "1"), + ("add_minmax_index_for_numeric_columns", "0"), + ("add_minmax_index_for_string_columns", "0"), + ("table_disk", "0"), ]; // Check if any readonly settings have changed @@ -555,9 +555,13 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { let before_settings = before.table_settings.as_ref().unwrap_or(&empty_settings); let after_settings = after.table_settings.as_ref().unwrap_or(&empty_settings); - for readonly_setting in READONLY_SETTINGS { - let before_value = before_settings.get(*readonly_setting); - let after_value = after_settings.get(*readonly_setting); + for (readonly_setting, default) in READONLY_SETTINGS { + let before_value = before_settings + .get(*readonly_setting) + .map_or(*default, |v| v); + let after_value = after_settings + .get(*readonly_setting) + .map_or(*default, |v| v); if before_value != after_value { log::warn!( @@ -579,12 +583,12 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { before.name ); // Return the explicit SettingsChanged variant for clarity - return vec![OlapChange::Table(TableChange::SettingsChanged { + changes.push(OlapChange::Table(TableChange::SettingsChanged { name: before.name.clone(), before_settings: before.table_settings.clone(), after_settings: after.table_settings.clone(), table: after.clone(), - })]; + })); } // Check if this is an S3Queue table with column changes @@ -617,18 +621,18 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // For other changes, ClickHouse can handle them via ALTER TABLE. // If there are no column/index/sample_by changes, return an empty vector. let sample_by_changed = before.sample_by != after.sample_by; - if column_changes.is_empty() && before.indexes == after.indexes && !sample_by_changed { - vec![] - } else { - vec![OlapChange::Table(TableChange::Updated { + if !column_changes.is_empty() || before.indexes != after.indexes || sample_by_changed { + changes.push(OlapChange::Table(TableChange::Updated { name: before.name.clone(), column_changes, order_by_change, partition_by_change, before: before.clone(), after: after.clone(), - })] - } + })) + }; + + changes } } From 4afdf1dd0ca2f61bb5d0a12b0c8441536baef32a Mon Sep 17 00:00:00 2001 From: George Leung Date: Wed, 19 Nov 2025 11:49:10 -0800 Subject: [PATCH 34/59] compare engine type before comparing engine hash (#3008) this would've discovered the bug fixed in #3003 the backwards compatibility e2e test will pass after that fix is released --- > [!NOTE] > Update ClickHouse engine change detection to compare engine variant first, then hashes/values, ensuring correct drop+create when engine type changes. > > - **ClickHouse table diff**: > - Engine change detection now compares engine type (discriminant) before using `engine_params_hash` or full engine comparison, ensuring proper drop+create when the engine variant changes. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 86818bc7e991296e21cbd6e542b353dc4505a70f. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../olap/clickhouse/diff_strategy.rs | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index cee8dd2fb7..46168bcc69 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -13,6 +13,7 @@ use crate::framework::core::infrastructure_map::{ }; use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; use std::collections::HashMap; +use std::mem::discriminant; /// Generates a formatted error message for database field changes. /// @@ -510,20 +511,22 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { ]; } - // First check if we can use hash comparison for engine changes - let engine_changed = if let (Some(before_hash), Some(after_hash)) = - (&before.engine_params_hash, &after.engine_params_hash) - { - // If both tables have hashes, compare them for change detection - // This includes credentials and other non-alterable parameters - before_hash != after_hash - } else { - // Fallback to direct engine comparison if hashes are not available - // Note: Tables are already normalized at this point (None -> Some(MergeTree)) - // via normalize_inframap_engines() in the remote plan flow, so we can - // safely use direct comparison - before.engine != after.engine - }; + // First make sure the engine type is the kind + // then check if we can use hash comparison for engine changes + let engine_changed = discriminant(&before.engine) != discriminant(&after.engine) + || if let (Some(before_hash), Some(after_hash)) = + (&before.engine_params_hash, &after.engine_params_hash) + { + // If both tables have hashes, compare them for change detection + // This includes credentials and other non-alterable parameters + before_hash != after_hash + } else { + // Fallback to direct engine comparison if hashes are not available + // Note: Tables are already normalized at this point (None -> Some(MergeTree)) + // via normalize_inframap_engines() in the remote plan flow, so we can + // safely use direct comparison + before.engine != after.engine + }; // Check if engine has changed (using hash comparison when available) if engine_changed { From 87c0fecf2abfc5cd94dbb70b58a0b5a33fed08b3 Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:01:31 -0800 Subject: [PATCH 35/59] add query syntax checking, formatting for code, and prettifying (#2998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this is a stacked PR - review & merge #2996 first Screenshot 2025-11-14 at 9 54 25 PM --- > [!NOTE] > Enhances `moose query` with SQL syntax validation and code formatting (python/ts) with optional prettify, plus tests/docs updates and `sqlparser` bump. > > - **CLI (`apps/framework-cli`)**: > - `query` command: adds `-c/--format-query ` (with `py`/`ts` aliases) to output code literals instead of executing, and `-p/--prettify` (requires `--format-query`). > - **Routines**: > - New `routines/format_query.rs`: SQL validation (ClickHouse dialect), prettify via `sqlparser`, and formatting to Python raw strings or TypeScript template literals. > - `routines/query.rs`: validates SQL; supports format-only flow; unchanged path executes and streams JSON. > - **Tests** (`apps/framework-cli-e2e/test/cli-query.test.ts`): > - E2E coverage for python/ts formatting, file input, aliases, invalid language, multiline/regex handling, prettify behavior, and flag requirement. > - **Docs** (`framework-docs` and `framework-docs-v2`): > - Document new `query` formatting flags, examples, and usage notes. > - **Dependencies**: > - Bump `sqlparser` to `0.59` (manifest and lockfile). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 3887907f8d6019e6edbcd26cf270529d45ecf3ab. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: Claude --- Cargo.lock | 4 +- apps/framework-cli-e2e/test/cli-query.test.ts | 269 +++++++++++++++ apps/framework-cli/Cargo.toml | 2 +- apps/framework-cli/src/cli.rs | 12 +- apps/framework-cli/src/cli/commands.rs | 8 + .../src/cli/routines/format_query.rs | 318 ++++++++++++++++++ apps/framework-cli/src/cli/routines/mod.rs | 1 + apps/framework-cli/src/cli/routines/query.rs | 27 ++ .../content/moosestack/moose-cli.mdx | 56 +++ .../src/pages/moose/moose-cli.mdx | 56 +++ 10 files changed, 749 insertions(+), 4 deletions(-) create mode 100644 apps/framework-cli/src/cli/routines/format_query.rs diff --git a/Cargo.lock b/Cargo.lock index c0ac1d1df9..af9abd18f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5183,9 +5183,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.58.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" +checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", diff --git a/apps/framework-cli-e2e/test/cli-query.test.ts b/apps/framework-cli-e2e/test/cli-query.test.ts index f28be5b1d7..5748d4b204 100644 --- a/apps/framework-cli-e2e/test/cli-query.test.ts +++ b/apps/framework-cli-e2e/test/cli-query.test.ts @@ -202,4 +202,273 @@ describe("moose query command", () => { console.log("✓ Query errors handled gracefully"); } }); + + describe("format query flag", () => { + it("should format query as Python code", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing Python formatting ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python "SELECT * FROM users WHERE email REGEXP '[a-z]+'"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include( + "SELECT * FROM users WHERE email REGEXP '[a-z]+'", + ); + expect(stdout).to.include('"""'); + expect(stdout).not.to.include("{"); // Should not have JSON output + + console.log("✓ Python formatting works"); + }); + + it("should format query as TypeScript code", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing TypeScript formatting ---"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c typescript "SELECT * FROM users"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include("`"); + expect(stdout).to.include("SELECT * FROM users"); + expect(stdout).not.to.include("{"); // Should not have JSON output + + console.log("✓ TypeScript formatting works"); + }); + + it("should format query from file", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing format from file ---"); + + const queryFile = path.join(testProjectDir, "format-test.sql"); + fs.writeFileSync(queryFile, "SELECT count(*) as total FROM events"); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f format-test.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("SELECT count(*) as total FROM events"); + + console.log("✓ Format from file works"); + }); + + it("should reject invalid language", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing invalid language ---"); + + try { + await execAsync(`"${CLI_PATH}" query -c java "SELECT 1"`, { + cwd: testProjectDir, + }); + expect.fail("Should have thrown an error"); + } catch (error: any) { + expect(error.message).to.include("Unsupported language"); + console.log("✓ Invalid language rejected"); + } + }); + + it("should accept language aliases", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing language aliases ---"); + + const pyResult = await execAsync(`"${CLI_PATH}" query -c py "SELECT 1"`, { + cwd: testProjectDir, + }); + expect(pyResult.stdout).to.include('r"""'); + + const tsResult = await execAsync(`"${CLI_PATH}" query -c ts "SELECT 1"`, { + cwd: testProjectDir, + }); + expect(tsResult.stdout).to.include("`"); + + console.log("✓ Language aliases work"); + }); + + it("should format multi-line SQL with proper indentation", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing multi-line SQL ---"); + + const queryFile = path.join(testProjectDir, "multiline-query.sql"); + const multilineSQL = `SELECT + user_id, + email, + created_at +FROM users +WHERE status = 'active' +ORDER BY created_at DESC`; + fs.writeFileSync(queryFile, multilineSQL); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f multiline-query.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include(" user_id,"); + expect(stdout).to.include("ORDER BY created_at DESC"); + expect(stdout).to.include('"""'); + + console.log("✓ Multi-line SQL preserved correctly"); + }); + + it("should format SQL with complex regex patterns", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing complex regex patterns ---"); + + const complexQuery = `SELECT * FROM logs WHERE message REGEXP '\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\s+\\\\w+'`; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python "${complexQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + // Raw strings should preserve backslashes + expect(stdout).to.include("\\d{4}"); + expect(stdout).to.include("REGEXP"); + + console.log("✓ Complex regex patterns preserved"); + }); + + it("should format SQL with email regex pattern", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing email regex pattern ---"); + + const emailQuery = `SELECT * FROM users WHERE email REGEXP '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\\\.[a-zA-Z]{2,}$'`; + + const pyResult = await execAsync( + `"${CLI_PATH}" query -c python "${emailQuery}"`, + { cwd: testProjectDir }, + ); + + expect(pyResult.stdout).to.include('r"""'); + expect(pyResult.stdout).to.include("[a-zA-Z0-9._%+-]+"); + + const tsResult = await execAsync( + `"${CLI_PATH}" query -c typescript "${emailQuery}"`, + { cwd: testProjectDir }, + ); + + expect(tsResult.stdout).to.include("`"); + expect(tsResult.stdout).to.include("[a-zA-Z0-9._%+-]+"); + + console.log("✓ Email regex pattern preserved"); + }); + + it("should handle queries with single quotes and backslashes", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing quotes and backslashes ---"); + + const queryFile = path.join(testProjectDir, "complex-pattern.sql"); + const complexSQL = `SELECT * FROM data WHERE pattern REGEXP '\\\\b(foo|bar)\\\\b' AND name = 'test'`; + fs.writeFileSync(queryFile, complexSQL); + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -f complex-pattern.sql`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("name = 'test'"); + expect(stdout).to.include("\\b(foo|bar)\\b"); + + console.log("✓ Quotes and backslashes preserved"); + }); + + it("should prettify SQL when --prettify flag is used", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify functionality ---"); + + const messyQuery = + "SELECT id, name FROM users WHERE active = 1 ORDER BY name LIMIT 10"; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c python -p "${messyQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include('r"""'); + expect(stdout).to.include("SELECT"); + expect(stdout).to.include("FROM"); + expect(stdout).to.include("WHERE"); + expect(stdout).to.include("ORDER BY"); + // Should have line breaks (prettified) + const lines = stdout.split("\n"); + expect(lines.length).to.be.greaterThan(3); + + console.log("✓ Prettify works"); + }); + + it("should prettify complex SQL with TypeScript", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify with TypeScript ---"); + + const complexQuery = + "SELECT u.id, u.name, o.total FROM users u LEFT JOIN orders o ON u.id = o.user_id WHERE u.active = 1 AND o.total > 100 ORDER BY o.total DESC"; + + const { stdout } = await execAsync( + `"${CLI_PATH}" query -c typescript -p "${complexQuery}"`, + { cwd: testProjectDir }, + ); + + console.log("Format output:", stdout); + + expect(stdout).to.include("`"); + expect(stdout).to.include("SELECT"); + expect(stdout).to.include("LEFT JOIN"); + expect(stdout).to.include("WHERE"); + expect(stdout).to.include("ORDER BY"); + + console.log("✓ Prettify with TypeScript works"); + }); + + it("should require format-query flag when using prettify", async function () { + this.timeout(TIMEOUTS.MIGRATION_MS); + + console.log("\n--- Testing prettify requires format-query ---"); + + try { + await execAsync(`"${CLI_PATH}" query -p "SELECT 1"`, { + cwd: testProjectDir, + }); + expect.fail("Should have thrown an error"); + } catch (error: any) { + // clap should enforce this requirement + expect(error.message).to.match( + /requires.*format-query|required argument/i, + ); + console.log("✓ Prettify requires format-query flag"); + } + }); + }); }); diff --git a/apps/framework-cli/Cargo.toml b/apps/framework-cli/Cargo.toml index 99c977e266..065114d98d 100644 --- a/apps/framework-cli/Cargo.toml +++ b/apps/framework-cli/Cargo.toml @@ -12,7 +12,7 @@ homepage = "https://www.fiveonefour.com/moose" [dependencies] posthog514client-rs = { path = "../../packages/posthog514client-rs" } -sqlparser = "0.58.0" +sqlparser = "0.59" itertools = "0.13.0" openssl = { version = "0.10", features = ["vendored"] } clap = { version = "4.3.17", features = ["derive"] } diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index 4d3e306caf..a93717a5ab 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -1368,6 +1368,8 @@ pub async fn top_command_handler( query: sql, file, limit, + format_query, + prettify, } => { info!("Running query command"); @@ -1382,7 +1384,15 @@ pub async fn top_command_handler( HashMap::new(), ); - let result = query(project_arc, sql.clone(), file.clone(), *limit).await; + let result = query( + project_arc, + sql.clone(), + file.clone(), + *limit, + format_query.clone(), + *prettify, + ) + .await; wait_for_usage_capture(capture_handle).await; diff --git a/apps/framework-cli/src/cli/commands.rs b/apps/framework-cli/src/cli/commands.rs index 5a76bce88c..5c62990cc2 100644 --- a/apps/framework-cli/src/cli/commands.rs +++ b/apps/framework-cli/src/cli/commands.rs @@ -205,6 +205,14 @@ pub enum Commands { /// Maximum number of rows to return (applied via ClickHouse settings) #[arg(short, long, default_value = "10000")] limit: u64, + + /// Format query as code literal (python|typescript). Skips execution. + #[arg(short = 'c', long = "format-query", value_name = "LANGUAGE")] + format_query: Option, + + /// Prettify SQL before formatting (only with --format-query) + #[arg(short = 'p', long = "prettify", requires = "format_query")] + prettify: bool, }, } diff --git a/apps/framework-cli/src/cli/routines/format_query.rs b/apps/framework-cli/src/cli/routines/format_query.rs new file mode 100644 index 0000000000..f113f7564c --- /dev/null +++ b/apps/framework-cli/src/cli/routines/format_query.rs @@ -0,0 +1,318 @@ +//! Module for formatting SQL queries as code literals. +//! +//! Supports formatting SQL queries as Python raw strings or TypeScript template literals +//! for easy copy-pasting into application code. + +use crate::cli::display::Message; +use crate::cli::routines::RoutineFailure; +use sqlparser::ast::Statement; +use sqlparser::dialect::ClickHouseDialect; +use sqlparser::parser::Parser; + +/// Supported languages for code formatting +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CodeLanguage { + Python, + TypeScript, +} + +impl CodeLanguage { + /// Parse language string into CodeLanguage enum + pub fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "python" | "py" => Ok(CodeLanguage::Python), + "typescript" | "ts" => Ok(CodeLanguage::TypeScript), + _ => Err(RoutineFailure::error(Message::new( + "Format Query".to_string(), + format!( + "Unsupported language: '{}'. Supported: python, typescript", + s + ), + ))), + } + } +} + +/// Parse SQL using ClickHouse dialect +fn parse_sql(sql: &str) -> Result, RoutineFailure> { + let dialect = ClickHouseDialect {}; + Parser::parse_sql(&dialect, sql).map_err(|e| { + RoutineFailure::error(Message::new( + "SQL Parsing".to_string(), + format!("Invalid SQL syntax: {}", e), + )) + }) +} + +/// Validate SQL syntax using sqlparser. +/// +/// Parses the SQL query to ensure it's syntactically valid before formatting or execution. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to validate +/// +/// # Returns +/// +/// * `Result<(), RoutineFailure>` - Ok if valid, error with helpful message if invalid +pub fn validate_sql(sql: &str) -> Result<(), RoutineFailure> { + parse_sql(sql)?; + Ok(()) +} + +/// Prettify SQL query using sqlparser's pretty printing. +/// +/// Parses the SQL and formats it with proper indentation and line breaks. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to prettify +/// +/// # Returns +/// +/// * `Result` - Prettified SQL string or error +fn prettify_sql(sql: &str) -> Result { + let statements = parse_sql(sql)?; + + // Format all statements with pretty printing + let formatted: Vec = statements + .iter() + .map(|stmt| format!("{:#}", stmt)) + .collect(); + + Ok(formatted.join(";\n")) +} + +/// Format SQL query as a code literal for the specified language. +/// +/// # Arguments +/// +/// * `sql` - The SQL query string to format +/// * `language` - Target language (Python or TypeScript) +/// * `prettify` - Whether to prettify SQL before formatting +/// +/// # Returns +/// +/// * `Result` - Formatted code literal or error +pub fn format_as_code( + sql: &str, + language: CodeLanguage, + prettify: bool, +) -> Result { + let sql_to_format = if prettify { + prettify_sql(sql)? + } else { + sql.to_string() + }; + + let formatted = match language { + CodeLanguage::Python => format_python(&sql_to_format), + CodeLanguage::TypeScript => format_typescript(&sql_to_format), + }; + + Ok(formatted) +} + +/// Format SQL as Python raw triple-quoted string +fn format_python(sql: &str) -> String { + format!("r\"\"\"\n{}\n\"\"\"", sql.trim()) +} + +/// Format SQL as TypeScript template literal +fn format_typescript(sql: &str) -> String { + format!("`\n{}\n`", sql.trim()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_language_from_str() { + assert_eq!( + CodeLanguage::from_str("python").unwrap(), + CodeLanguage::Python + ); + assert_eq!(CodeLanguage::from_str("py").unwrap(), CodeLanguage::Python); + assert_eq!( + CodeLanguage::from_str("typescript").unwrap(), + CodeLanguage::TypeScript + ); + assert_eq!( + CodeLanguage::from_str("ts").unwrap(), + CodeLanguage::TypeScript + ); + assert!(CodeLanguage::from_str("java").is_err()); + } + + #[test] + fn test_format_python() { + let sql = "SELECT * FROM users\nWHERE id = 1"; + let result = format_python(sql); + assert_eq!(result, "r\"\"\"\nSELECT * FROM users\nWHERE id = 1\n\"\"\""); + } + + #[test] + fn test_format_python_with_regex() { + let sql = r"SELECT * FROM users WHERE email REGEXP '[a-z]+'"; + let result = format_python(sql); + assert!(result.starts_with("r\"\"\"")); + assert!(result.contains(r"REGEXP '[a-z]+'")); + } + + #[test] + fn test_format_typescript() { + let sql = "SELECT * FROM users\nWHERE id = 1"; + let result = format_typescript(sql); + assert_eq!(result, "`\nSELECT * FROM users\nWHERE id = 1\n`"); + } + + #[test] + fn test_format_as_code_python() { + let sql = "SELECT 1"; + let result = format_as_code(sql, CodeLanguage::Python, false).unwrap(); + assert_eq!(result, "r\"\"\"\nSELECT 1\n\"\"\""); + } + + #[test] + fn test_format_as_code_typescript() { + let sql = "SELECT 1"; + let result = format_as_code(sql, CodeLanguage::TypeScript, false).unwrap(); + assert_eq!(result, "`\nSELECT 1\n`"); + } + + #[test] + fn test_format_python_multiline_complex() { + let sql = r#"SELECT + user_id, + email, + created_at +FROM users +WHERE email REGEXP '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' + AND status = 'active' +ORDER BY created_at DESC"#; + let result = format_python(sql); + assert!(result.starts_with("r\"\"\"")); + assert!(result.ends_with("\"\"\"")); + assert!(result.contains("REGEXP")); + assert!(result.contains("ORDER BY")); + // Verify backslashes are preserved as-is in raw string + assert!(result.contains(r"[a-zA-Z0-9._%+-]+")); + } + + #[test] + fn test_format_python_complex_regex_patterns() { + // Test various regex special characters + let sql = r"SELECT * FROM logs WHERE message REGEXP '\\d{4}-\\d{2}-\\d{2}\\s+\\w+'"; + let result = format_python(sql); + assert!(result.contains(r"\\d{4}-\\d{2}-\\d{2}\\s+\\w+")); + + // Test with character classes and quantifiers + let sql2 = r"SELECT * FROM data WHERE field REGEXP '[A-Z]{3,5}\-\d+'"; + let result2 = format_python(sql2); + assert!(result2.contains(r"[A-Z]{3,5}\-\d+")); + } + + #[test] + fn test_format_typescript_multiline_complex() { + let sql = r#"SELECT + order_id, + customer_email, + total_amount +FROM orders +WHERE customer_email REGEXP '[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}' + AND total_amount > 100 +LIMIT 50"#; + let result = format_typescript(sql); + assert!(result.starts_with("`")); + assert!(result.ends_with("`")); + assert!(result.contains("REGEXP")); + assert!(result.contains("LIMIT 50")); + } + + #[test] + fn test_format_preserves_indentation() { + let sql = "SELECT *\n FROM users\n WHERE id = 1"; + let python_result = format_python(sql); + let typescript_result = format_typescript(sql); + + // Both should preserve the indentation + assert!(python_result.contains(" FROM users")); + assert!(python_result.contains(" WHERE id = 1")); + assert!(typescript_result.contains(" FROM users")); + assert!(typescript_result.contains(" WHERE id = 1")); + } + + #[test] + fn test_format_python_with_quotes_and_backslashes() { + // SQL with single quotes and backslashes + let sql = r"SELECT * FROM data WHERE pattern REGEXP '\\b(foo|bar)\\b' AND name = 'test'"; + let result = format_python(sql); + // Raw strings should preserve everything as-is + assert!(result.contains(r"\\b(foo|bar)\\b")); + assert!(result.contains("name = 'test'")); + } + + #[test] + fn test_prettify_sql_basic() { + let sql = "SELECT id, name FROM users WHERE active = 1 ORDER BY name"; + let result = prettify_sql(sql).unwrap(); + + assert!(result.contains("SELECT")); + assert!(result.contains("FROM")); + assert!(result.contains("users")); + assert!(result.contains("WHERE")); + // Should have line breaks with sqlparser formatting + assert!(result.contains('\n')); + } + + #[test] + fn test_prettify_sql_preserves_values() { + let sql = "SELECT * FROM users WHERE email = 'test@example.com'"; + let result = prettify_sql(sql).unwrap(); + + // Should preserve the email value + assert!(result.contains("test@example.com")); + } + + #[test] + fn test_format_as_code_with_prettify() { + let sql = "SELECT id, name FROM users WHERE active = 1"; + + // With prettify + let result = format_as_code(sql, CodeLanguage::Python, true).unwrap(); + assert!(result.starts_with("r\"\"\"")); + assert!(result.contains('\n')); + assert!(result.contains("SELECT")); + + // Without prettify + let result_no_prettify = format_as_code(sql, CodeLanguage::Python, false).unwrap(); + assert!(result_no_prettify.starts_with("r\"\"\"")); + assert!(result_no_prettify.contains("SELECT id, name FROM users")); + } + + #[test] + fn test_prettify_with_complex_query() { + let sql = "SELECT u.id, u.name, o.total FROM users u LEFT JOIN orders o ON u.id = o.user_id WHERE u.active = 1 AND o.total > 100 ORDER BY o.total DESC LIMIT 10"; + let result = prettify_sql(sql).unwrap(); + + assert!(result.contains("SELECT")); + assert!(result.contains("FROM")); + assert!(result.contains("users")); + assert!(result.contains("JOIN")); + assert!(result.contains("WHERE")); + assert!(result.contains("LIMIT")); + } + + #[test] + fn test_validate_sql_valid() { + let sql = "SELECT * FROM users WHERE id = 1"; + assert!(validate_sql(sql).is_ok()); + } + + #[test] + fn test_validate_sql_invalid() { + let sql = "INVALID SQL SYNTAX ;;; NOT VALID"; + assert!(validate_sql(sql).is_err()); + } +} diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index e508cf6219..5ff8f8e115 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -161,6 +161,7 @@ pub mod clean; pub mod code_generation; pub mod dev; pub mod docker_packager; +pub mod format_query; pub mod kafka_pull; pub mod logs; pub mod ls; diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs index f6b5d59284..06cb651a41 100644 --- a/apps/framework-cli/src/cli/routines/query.rs +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -72,6 +72,8 @@ fn get_sql_input(sql: Option, file: Option) -> Result, file: Option, limit: u64, + format_query: Option, + prettify: bool, ) -> Result { let sql_query = get_sql_input(sql, file)?; info!("Executing SQL: {}", sql_query); + // Validate SQL syntax before any operation + use crate::cli::routines::format_query::validate_sql; + validate_sql(&sql_query)?; + + // If format_query flag is present, format and exit without executing + if let Some(lang_str) = format_query { + use crate::cli::routines::format_query::{format_as_code, CodeLanguage}; + + let language = CodeLanguage::from_str(&lang_str)?; + let formatted = format_as_code(&sql_query, language, prettify)?; + + println!("{}", formatted); + + return Ok(RoutineSuccess::success(Message::new( + "Format Query".to_string(), + format!( + "Formatted as {} code{}", + lang_str, + if prettify { " (prettified)" } else { "" } + ), + ))); + } + // Get ClickHouse connection pool let pool = get_pool(&project.clickhouse_config); diff --git a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx index a07bad239a..908ee0f3ea 100644 --- a/apps/framework-docs-v2/content/moosestack/moose-cli.mdx +++ b/apps/framework-docs-v2/content/moosestack/moose-cli.mdx @@ -191,6 +191,62 @@ moose query "SELECT * FROM events" --limit 100 - One JSON object per row - Row count summary at end +#### Formatting Queries for Code + +Use the `-c/--format-query` flag to format SQL queries as code literals instead of executing them: + +```bash +# Format as Python (raw string) +moose query -c python "SELECT * FROM users WHERE email REGEXP '[a-z]+'" +# Output: +# r""" +# SELECT * FROM users WHERE email REGEXP '[a-z]+' +# """ + +# Format as TypeScript (template literal) +moose query -c typescript "SELECT * FROM events" +# Output: +# ` +# SELECT * FROM events +# ` + +# Works with file input +moose query -c python -f my_query.sql + +# Prettify SQL before formatting (adds line breaks and indentation) +moose query -c python -p "SELECT id, name FROM users WHERE active = 1 ORDER BY name" +# Output: +# r""" +# SELECT id, name +# FROM users +# WHERE active = 1 +# ORDER BY name +# """ + +# Use heredoc for multi-line SQL queries (no need to escape quotes) +moose query -c python -p < '2024-01-01' +GROUP BY b.id, b.name, b.email +HAVING COUNT(o.id) > 5 +ORDER BY total_spent DESC +LIMIT 50 +EOF + +# Supported languages: python (py), typescript (ts) +# Prettify flag: -p, --prettify (only works with --format-query) +``` + +**Use case:** Iterate on SQL queries in the CLI, then format and paste into your application code without manual escaping. Use `--prettify` to clean up messy one-line queries. + ## Generation Commands ### Generate Hash Token diff --git a/apps/framework-docs/src/pages/moose/moose-cli.mdx b/apps/framework-docs/src/pages/moose/moose-cli.mdx index cdcf28a8e2..85ef07b9bc 100644 --- a/apps/framework-docs/src/pages/moose/moose-cli.mdx +++ b/apps/framework-docs/src/pages/moose/moose-cli.mdx @@ -190,6 +190,62 @@ moose query "SELECT * FROM events" --limit 100 - One JSON object per row - Row count summary at end +#### Formatting Queries for Code + +Use the `-c/--format-query` flag to format SQL queries as code literals instead of executing them: + +```bash +# Format as Python (raw string) +moose query -c python "SELECT * FROM users WHERE email REGEXP '[a-z]+'" +# Output: +# r""" +# SELECT * FROM users WHERE email REGEXP '[a-z]+' +# """ + +# Format as TypeScript (template literal) +moose query -c typescript "SELECT * FROM events" +# Output: +# ` +# SELECT * FROM events +# ` + +# Works with file input +moose query -c python -f my_query.sql + +# Prettify SQL before formatting (adds line breaks and indentation) +moose query -c python -p "SELECT id, name FROM users WHERE active = 1 ORDER BY name" +# Output: +# r""" +# SELECT id, name +# FROM users +# WHERE active = 1 +# ORDER BY name +# """ + +# Use heredoc for multi-line SQL queries (no need to escape quotes) +moose query -c python -p < '2024-01-01' +GROUP BY b.id, b.name, b.email +HAVING COUNT(o.id) > 5 +ORDER BY total_spent DESC +LIMIT 50 +EOF + +# Supported languages: python (py), typescript (ts) +# Prettify flag: -p, --prettify (only works with --format-query) +``` + +**Use case:** Iterate on SQL queries in the CLI, then format and paste into your application code without manual escaping. Use `--prettify` to clean up messy one-line queries. + ## Generation Commands ### Generate Hash Token From 1cef3c203cb06698d1d9c41ec40dd0a5c93a13d2 Mon Sep 17 00:00:00 2001 From: George Leung Date: Wed, 19 Nov 2025 13:25:39 -0800 Subject: [PATCH 36/59] fix moose seed (#3010) > [!NOTE] > Enhances ClickHouse seeding to handle per-table databases, validate presence on remote across multiple DBs, and refactors batching/order-by logic with expanded tests. > > - **CLI/Seeding (ClickHouse)** in `apps/framework-cli/src/cli/routines/seed_data.rs`: > - **Multi-DB awareness**: > - `build_remote_tables_query` now queries `system.tables` for `(database, name)` and supports multiple databases via `other_dbs`. > - `parse_remote_tables_response` returns `HashSet<(String, String)>` for `(db, table)`. > - `get_remote_tables` accepts `other_dbs` and returns `(db, table)` set. > - **Table selection & validation**: > - `get_tables_to_seed` returns `Vec<&Table>` and filters internal tables. > - `should_skip_table` considers per-table `database` (fallback to remote default) when checking remote existence. > - **Seeding logic refactor**: > - `seed_single_table` now takes `&Table`, derives DB from table if present, and improves logging/errors. > - `build_order_by_clause` takes `&Table` and enforces ORDER BY when batching. > - End-to-end flow (`seed_clickhouse_tables`, `seed_clickhouse_operation`) updated to use new signatures and validation path. > - **Tests**: Extensive new unit tests for multi-DB queries/parsing, skip logic, ORDER BY handling, query builders, and batching behavior; helper builders for `Table` and `InfrastructureMap` added. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 9f4994b76b2dce02c3ca02d7ed7d78f4997597f7. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/cli/routines/seed_data.rs | 328 +++++++++++++----- 1 file changed, 239 insertions(+), 89 deletions(-) diff --git a/apps/framework-cli/src/cli/routines/seed_data.rs b/apps/framework-cli/src/cli/routines/seed_data.rs index 20124a23d6..a222a2ccb6 100644 --- a/apps/framework-cli/src/cli/routines/seed_data.rs +++ b/apps/framework-cli/src/cli/routines/seed_data.rs @@ -13,6 +13,7 @@ use crate::project::Project; use crate::utilities::constants::KEY_REMOTE_CLICKHOUSE_URL; use crate::utilities::keyring::{KeyringSecretRepository, SecretRepository}; +use crate::framework::core::infrastructure::table::Table; use log::{debug, info, warn}; use std::cmp::min; use std::collections::HashSet; @@ -36,26 +37,54 @@ fn build_remote_tables_query( remote_user: &str, remote_password: &str, remote_db: &str, + other_dbs: &[&str], ) -> String { + let mut databases = vec![remote_db]; + databases.extend(other_dbs); + + let db_list = databases + .iter() + .map(|db| format!("'{}'", db)) + .collect::>() + .join(", "); + format!( - "SELECT name FROM remoteSecure('{}', 'system', 'tables', '{}', '{}') WHERE database = '{}'", - remote_host_and_port, remote_user, remote_password, remote_db + "SELECT database, name FROM remoteSecure('{}', 'system', 'tables', '{}', '{}') WHERE database IN ({})", + remote_host_and_port, remote_user, remote_password, db_list ) } -/// Parses the response from remote tables query into a HashSet -fn parse_remote_tables_response(response: &str) -> HashSet { +/// Parses the response from remote tables query into a HashSet of (database, table) tuples +fn parse_remote_tables_response(response: &str) -> HashSet<(String, String)> { response .lines() - .map(|line| line.trim().to_string()) - .filter(|table| !table.is_empty()) + .filter_map(|line| { + let line = line.trim(); + if line.is_empty() { + return None; + } + // Split by tab or whitespace to get database and table + let parts: Vec<&str> = line.split('\t').collect(); + if parts.len() >= 2 { + Some((parts[0].trim().to_string(), parts[1].trim().to_string())) + } else { + None + } + }) .collect() } /// Determines if a table should be skipped during seeding -fn should_skip_table(table_name: &str, remote_tables: &Option>) -> bool { +/// db being None means "use the remote default" +fn should_skip_table( + db: &Option, + table_name: &str, + remote_db: &str, + remote_tables: &Option>, +) -> bool { if let Some(ref remote_table_set) = remote_tables { - !remote_table_set.contains(table_name) + let db_to_check = db.as_deref().unwrap_or(remote_db); + !remote_table_set.contains(&(db_to_check.to_string(), table_name.to_string())) } else { false } @@ -128,20 +157,13 @@ async fn load_infrastructure_map(project: &Project) -> Result, total_rows: usize, batch_size: usize, ) -> Result { match order_by { None => { - let table = infra_map.tables.get(table_name).ok_or_else(|| { - RoutineFailure::error(Message::new( - "Seed".to_string(), - format!("{table_name} not found."), - )) - })?; let clause = match &table.order_by { crate::framework::core::infrastructure::table::OrderBy::Fields(v) => v .iter() @@ -159,7 +181,7 @@ fn build_order_by_clause( } else { Err(RoutineFailure::error(Message::new( "Seed".to_string(), - format!("Table {table_name} without ORDER BY. Supply ordering with --order-by to prevent the same row fetched in multiple batches."), + format!("Table {} without ORDER BY. Supply ordering with --order-by to prevent the same row fetched in multiple batches.", table.name), ))) } } @@ -215,23 +237,23 @@ async fn get_remote_table_count( /// Seeds a single table with batched copying async fn seed_single_table( - infra_map: &InfrastructureMap, local_clickhouse: &ClickHouseClient, remote_config: &ClickHouseConfig, - table_name: &str, + table: &Table, limit: Option, order_by: Option<&str>, ) -> Result { let remote_host_and_port = format!("{}:{}", remote_config.host, remote_config.native_port); - let local_db = &local_clickhouse.config().db_name; + let db = table.database.as_deref(); + let local_db = db.unwrap_or(&local_clickhouse.config().db_name); let batch_size: usize = 50_000; // Get total row count let remote_total = get_remote_table_count( local_clickhouse, &remote_host_and_port, - &remote_config.db_name, - table_name, + db.unwrap_or(&remote_config.db_name), + &table.name, &remote_config.user, &remote_config.password, ) @@ -243,7 +265,7 @@ async fn seed_single_table( } else { RoutineFailure::error(Message::new( "SeedSingleTable".to_string(), - format!("Failed to get row count for {table_name}: {e:?}"), + format!("Failed to get row count for {}: {e:?}", table.name), )) } })?; @@ -253,8 +275,7 @@ async fn seed_single_table( Some(l) => min(remote_total, l), }; - let order_by_clause = - build_order_by_clause(table_name, infra_map, order_by, total_rows, batch_size)?; + let order_by_clause = build_order_by_clause(table, order_by, total_rows, batch_size)?; let mut copied_total: usize = 0; let mut i: usize = 0; @@ -268,9 +289,9 @@ async fn seed_single_table( let sql = build_seeding_query(&SeedingQueryParams { local_db, - table_name, + table_name: &table.name, remote_host_and_port: &remote_host_and_port, - remote_db: &remote_config.db_name, + remote_db: db.unwrap_or(&remote_config.db_name), remote_user: &remote_config.user, remote_password: &remote_config.password, order_by_clause: &order_by_clause, @@ -278,43 +299,44 @@ async fn seed_single_table( offset: copied_total, }); - debug!("Executing SQL: table={table_name}, offset={copied_total}, limit={batch_limit}"); + debug!( + "Executing SQL: table={}, offset={copied_total}, limit={batch_limit}", + table.name + ); match local_clickhouse.execute_sql(&sql).await { Ok(_) => { copied_total += batch_limit; - debug!("{table_name}: copied batch {i}"); + debug!("{}: copied batch {i}", table.name); } Err(e) => { return Err(RoutineFailure::error(Message::new( "SeedSingleTable".to_string(), - format!("Failed to copy batch for {table_name}: {e}"), + format!("Failed to copy batch for {}: {e}", table.name), ))); } } } - Ok(format!("✓ {table_name}: copied from remote")) + Ok(format!("✓ {}: copied from remote", table.name)) } /// Gets the list of tables to seed based on parameters -fn get_tables_to_seed(infra_map: &InfrastructureMap, table_name: Option) -> Vec { - if let Some(ref t) = table_name { - info!("Seeding single table: {}", t); - vec![t.clone()] - } else { - let table_list: Vec = infra_map - .tables - .keys() - .filter(|table| !table.starts_with("_MOOSE")) - .cloned() - .collect(); - info!( - "Seeding {} tables (excluding internal Moose tables)", - table_list.len() - ); - table_list - } +fn get_tables_to_seed(infra_map: &InfrastructureMap, table_name: Option) -> Vec<&Table> { + let table_list: Vec<_> = infra_map + .tables + .values() + .filter(|table| match &table_name { + None => !table.name.starts_with("_MOOSE"), + Some(name) => &table.name == name, + }) + .collect(); + info!( + "Seeding {} tables (excluding internal Moose tables)", + table_list.len() + ); + + table_list } /// Performs the complete ClickHouse seeding operation including infrastructure loading, @@ -371,10 +393,12 @@ async fn seed_clickhouse_operation( } /// Get list of available tables from remote ClickHouse database +/// Returns a set of (database, table_name) tuples async fn get_remote_tables( local_clickhouse: &ClickHouseClient, remote_config: &ClickHouseConfig, -) -> Result, RoutineFailure> { + other_dbs: &[&str], +) -> Result, RoutineFailure> { let remote_host_and_port = format!("{}:{}", remote_config.host, remote_config.native_port); let sql = build_remote_tables_query( @@ -382,6 +406,7 @@ async fn get_remote_tables( &remote_config.user, &remote_config.password, &remote_config.db_name, + other_dbs, ); debug!("Querying remote tables: {}", sql); @@ -481,13 +506,23 @@ pub async fn seed_clickhouse_tables( // Get the list of tables to seed let tables = get_tables_to_seed(infra_map, table_name.clone()); + let other_dbs: Vec<&str> = tables + .iter() + .filter_map(|t| t.database.as_deref()) + .collect(); // Get available remote tables for validation (unless specific table is requested) - let remote_tables = if table_name.is_some() { + let remote_tables = if let Some(name) = table_name { + if tables.is_empty() { + return Err(RoutineFailure::error(Message::new( + "Table".to_string(), + format!("{name} not found."), + ))); + } // Skip validation if user specified a specific table None } else { - match get_remote_tables(local_clickhouse, remote_config).await { + match get_remote_tables(local_clickhouse, remote_config, &other_dbs).await { Ok(tables) => Some(tables), Err(e) => { warn!("Failed to query remote tables for validation: {:?}", e); @@ -504,28 +539,24 @@ pub async fn seed_clickhouse_tables( }; // Process each table - for table_name in tables { + for table in tables { // Check if table should be skipped due to validation - if should_skip_table(&table_name, &remote_tables) { - debug!( + if should_skip_table( + &table.database, + &table.name, + &remote_config.db_name, + &remote_tables, + ) { + info!( "Table '{}' exists locally but not on remote - skipping", - table_name + table.name ); - summary.push(format!("⚠️ {}: skipped (not found on remote)", table_name)); + summary.push(format!("⚠️ {}: skipped (not found on remote)", table.name)); continue; } // Attempt to seed the single table - match seed_single_table( - infra_map, - local_clickhouse, - remote_config, - &table_name, - limit, - order_by, - ) - .await - { + match seed_single_table(local_clickhouse, remote_config, table, limit, order_by).await { Ok(success_msg) => { summary.push(success_msg); } @@ -534,14 +565,14 @@ pub async fn seed_clickhouse_tables( // Table not found on remote, skip gracefully debug!( "Table '{}' not found on remote database - skipping", - table_name + table.name ); - summary.push(format!("⚠️ {}: skipped (not found on remote)", table_name)); + summary.push(format!("⚠️ {}: skipped (not found on remote)", table.name)); } else { // Other errors should be added as failures summary.push(format!( "✗ {}: failed to copy - {}", - table_name, e.message.details + table.name, e.message.details )); } } @@ -555,6 +586,56 @@ pub async fn seed_clickhouse_tables( #[cfg(test)] mod tests { use super::*; + use crate::framework::core::infrastructure::table::OrderBy; + use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; + use crate::framework::core::partial_infrastructure_map::LifeCycle; + use crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine; + use std::collections::HashMap; + + /// Helper function to create a minimal test Table + fn create_test_table(name: &str, database: Option) -> Table { + Table { + name: name.to_string(), + columns: vec![], + order_by: OrderBy::Fields(vec!["id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::default_for_deserialization(), + indexes: vec![], + database, + engine_params_hash: None, + table_settings: None, + table_ttl_setting: None, + cluster_name: None, + } + } + + /// Helper function to create a minimal test InfrastructureMap + fn create_test_infra_map(tables: HashMap) -> InfrastructureMap { + InfrastructureMap { + default_database: "default".to_string(), + topics: HashMap::new(), + api_endpoints: HashMap::new(), + tables, + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: crate::framework::core::infrastructure::olap_process::OlapProcess {}, + consumption_api_web_server: crate::framework::core::infrastructure::consumption_webserver::ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + } + } #[test] fn test_validate_database_name_valid() { @@ -574,19 +655,32 @@ mod tests { #[test] fn test_build_remote_tables_query() { - let query = build_remote_tables_query("host:9440", "user", "pass", "mydb"); - let expected = "SELECT name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database = 'mydb'"; + let query = build_remote_tables_query("host:9440", "user", "pass", "mydb", &[]); + let expected = "SELECT database, name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database IN ('mydb')"; + assert_eq!(query, expected); + } + + #[test] + fn test_build_remote_tables_query_with_other_dbs() { + let query = build_remote_tables_query( + "host:9440", + "user", + "pass", + "mydb", + &["otherdb1", "otherdb2"], + ); + let expected = "SELECT database, name FROM remoteSecure('host:9440', 'system', 'tables', 'user', 'pass') WHERE database IN ('mydb', 'otherdb1', 'otherdb2')"; assert_eq!(query, expected); } #[test] fn test_parse_remote_tables_response_valid() { - let response = "table1\ntable2\n table3 \n\n"; + let response = "db1\ttable1\ndb1\ttable2\ndb2\ttable3\n\n"; let result = parse_remote_tables_response(response); assert_eq!(result.len(), 3); - assert!(result.contains("table1")); - assert!(result.contains("table2")); - assert!(result.contains("table3")); + assert!(result.contains(&("db1".to_string(), "table1".to_string()))); + assert!(result.contains(&("db1".to_string(), "table2".to_string()))); + assert!(result.contains(&("db2".to_string(), "table3".to_string()))); } #[test] @@ -599,16 +693,64 @@ mod tests { #[test] fn test_should_skip_table_when_not_in_remote() { let mut remote_tables = HashSet::new(); - remote_tables.insert("table1".to_string()); - remote_tables.insert("table2".to_string()); - - assert!(!should_skip_table("table1", &Some(remote_tables.clone()))); - assert!(should_skip_table("table3", &Some(remote_tables))); + remote_tables.insert(("mydb".to_string(), "table1".to_string())); + remote_tables.insert(("mydb".to_string(), "table2".to_string())); + + // Table exists in remote (using default db) + assert!(!should_skip_table( + &None, + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table exists in remote (with explicit db) + assert!(!should_skip_table( + &Some("mydb".to_string()), + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table doesn't exist in remote + assert!(should_skip_table( + &None, + "table3", + "mydb", + &Some(remote_tables) + )); } #[test] fn test_should_skip_table_when_no_validation() { - assert!(!should_skip_table("any_table", &None)); + assert!(!should_skip_table(&None, "any_table", "mydb", &None)); + } + + #[test] + fn test_should_skip_table_with_other_db() { + let mut remote_tables = HashSet::new(); + remote_tables.insert(("mydb".to_string(), "table1".to_string())); + remote_tables.insert(("otherdb".to_string(), "table2".to_string())); + + // Table exists in default db + assert!(!should_skip_table( + &None, + "table1", + "mydb", + &Some(remote_tables.clone()) + )); + // Table exists in other db + assert!(!should_skip_table( + &Some("otherdb".to_string()), + "table2", + "mydb", + &Some(remote_tables.clone()) + )); + // Table doesn't exist in specified db (even though it exists in default db) + assert!(should_skip_table( + &Some("otherdb".to_string()), + "table1", + "mydb", + &Some(remote_tables) + )); } #[test] @@ -638,34 +780,42 @@ mod tests { #[test] fn test_build_order_by_clause_with_provided_order() { - let infra_map = InfrastructureMap::default(); + let table = create_test_table("my_table", None); - let result = build_order_by_clause("my_table", &infra_map, Some("id ASC"), 1000, 500); + let result = build_order_by_clause(&table, Some("id ASC"), 1000, 500); assert!(result.is_ok()); assert_eq!(result.unwrap(), "ORDER BY id ASC"); } #[test] - fn test_build_order_by_clause_table_not_found() { - let infra_map = InfrastructureMap::default(); + fn test_build_order_by_clause_without_order_by_and_no_provided_order() { + let mut table = create_test_table("my_table", None); + table.order_by = OrderBy::Fields(vec![]); // No ORDER BY fields - let result = build_order_by_clause("nonexistent_table", &infra_map, None, 1000, 500); + let result = build_order_by_clause(&table, None, 1000, 500); assert!(result.is_err()); if let Err(e) = result { assert_eq!(e.message.action, "Seed"); - assert!(e.message.details.contains("not found")); + assert!(e.message.details.contains("without ORDER BY")); } } #[test] fn test_get_tables_to_seed_single_table() { - let infra_map = InfrastructureMap::default(); + let mut tables = HashMap::new(); + tables.insert( + "specific_table".to_string(), + create_test_table("specific_table", None), + ); + + let infra_map = create_test_infra_map(tables); let result = get_tables_to_seed(&infra_map, Some("specific_table".to_string())); assert_eq!(result.len(), 1); - assert_eq!(result[0], "specific_table"); + assert_eq!(result[0].name, "specific_table"); + assert_eq!(result[0].database, None); } #[test] From 0502afa7160817df9b722dec9c0ffb184a3a5f50 Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Wed, 19 Nov 2025 14:26:32 -0800 Subject: [PATCH 37/59] fix sequence of log messages (#3016) > [!NOTE] > Reorders the "Executing SQL" log to occur only after format-query handling (i.e., only when actually executing), keeping validation before execution. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 7f61b6b7e08ead05ed9d8836b55d96a42fe03751. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- apps/framework-cli/src/cli/routines/query.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs index 06cb651a41..bb060310ff 100644 --- a/apps/framework-cli/src/cli/routines/query.rs +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -87,7 +87,6 @@ pub async fn query( prettify: bool, ) -> Result { let sql_query = get_sql_input(sql, file)?; - info!("Executing SQL: {}", sql_query); // Validate SQL syntax before any operation use crate::cli::routines::format_query::validate_sql; @@ -112,6 +111,8 @@ pub async fn query( ))); } + info!("Executing SQL: {}", sql_query); + // Get ClickHouse connection pool let pool = get_pool(&project.clickhouse_config); From 6a15e595925cc3fd0c70bb9a2db044a9354013ce Mon Sep 17 00:00:00 2001 From: George Leung Date: Wed, 19 Nov 2025 17:15:36 -0800 Subject: [PATCH 38/59] fix order by change detection (#3017) old code fails when `target_table.order_by` == `target_table` primary key == actual table primary key, actual table `order_by` == `[]` --- > [!NOTE] > Adds `order_by_with_fallback` and uses it to compare/normalize ORDER BY (fallback to primary key for MergeTree), fixing false diffs; updates diff logic and tests. > > - **Core Infrastructure (`apps/framework-cli/src/framework/core/infrastructure/table.rs`)**: > - Add `Table::order_by_with_fallback()` to derive ORDER BY from primary keys when empty for MergeTree engines. > - Update `Table::order_by_equals()` to compare using fallback. > - **Infra Map (`apps/framework-cli/src/framework/core/infrastructure_map.rs`)**: > - Replace ad-hoc ORDER BY diff logic with `table.order_by_equals(target_table)`. > - `InfrastructureMap::normalize()` now sets `table.order_by` via `order_by_with_fallback()` when empty. > - **Tests**: > - Add test for ORDER BY equality with implicit primary key and for non-MergeTree (S3) behavior. > - Adjust existing diff paths to rely on new equality/normalization. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit eaa16da9aaabce8fc8e4a79c48ca11f9d2f93cb5. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../framework/core/infrastructure/table.rs | 169 ++++++++++++++++-- .../src/framework/core/infrastructure_map.rs | 73 +------- 2 files changed, 164 insertions(+), 78 deletions(-) diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index 200251a56b..a21336a8e4 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -402,18 +402,27 @@ impl Table { .collect() } + pub fn order_by_with_fallback(&self) -> OrderBy { + // table (in infra map created by older version of moose) may leave order_by unspecified, + // but the implicit order_by from primary keys can be the same + // ONLY for the MergeTree family + // S3 supports ORDER BY but does not auto set ORDER BY from PRIMARY KEY + // Buffer, S3Queue, and Distributed don't support ORDER BY + if self.order_by.is_empty() && self.engine.is_merge_tree_family() { + OrderBy::Fields( + self.primary_key_columns() + .iter() + .map(|c| c.to_string()) + .collect(), + ) + } else { + self.order_by.clone() + } + } + pub fn order_by_equals(&self, target: &Table) -> bool { self.order_by == target.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - || (target.order_by.is_empty() - && target.engine.supports_order_by() - && matches!( - &self.order_by, - OrderBy::Fields(v) if v.iter().map(String::as_str).collect::>() == target.primary_key_columns() - )) + || self.order_by_with_fallback() == target.order_by_with_fallback() } pub fn to_proto(&self) -> ProtoTable { @@ -1712,4 +1721,144 @@ mod tests { }; assert_eq!(table7.id(DEFAULT_DATABASE_NAME), "local_users_1_0"); } + + #[test] + fn test_order_by_equals_with_implicit_primary_key() { + use crate::framework::core::infrastructure_map::PrimitiveTypes; + + // Test case: actual table has empty order_by (implicit primary key), + // target table has explicit order_by that matches the primary key. + // This should be considered equal for MergeTree engines. + + let columns = vec![ + Column { + name: "id".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: true, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }, + Column { + name: "name".to_string(), + data_type: ColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + annotations: vec![], + comment: None, + ttl: None, + }, + ]; + + // Actual table from database: empty order_by (implicitly uses primary key) + let actual_table = Table { + name: "test_table".to_string(), + columns: columns.clone(), + order_by: OrderBy::Fields(vec![]), // Empty - will fall back to primary key + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name: None, + }; + + // Target table from code: explicit order_by that matches primary key + let target_table = Table { + name: "test_table".to_string(), + columns: columns.clone(), + order_by: OrderBy::Fields(vec!["id".to_string()]), // Explicit order_by + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + version: None, + source_primitive: PrimitiveSignature { + name: "test".to_string(), + primitive_type: PrimitiveTypes::DataModel, + }, + metadata: None, + life_cycle: LifeCycle::FullyManaged, + engine_params_hash: None, + table_settings: None, + indexes: vec![], + database: None, + table_ttl_setting: None, + cluster_name: None, + }; + + // These should be equal because: + // - actual_table has empty order_by but MergeTree engine + // - actual_table.order_by_with_fallback() returns ["id"] (from primary key) + // - target_table.order_by is ["id"] + // - target_table.order_by_with_fallback() returns ["id"] + // - ["id"] == ["id"] + assert!( + actual_table.order_by_equals(&target_table), + "actual table with empty order_by should equal target with explicit primary key order_by" + ); + + // Reverse direction should also work + assert!( + target_table.order_by_equals(&actual_table), + "target table with explicit primary key order_by should equal actual with empty order_by" + ); + + // Test with different order_by - should NOT be equal + let different_target = Table { + order_by: OrderBy::Fields(vec!["name".to_string()]), + ..target_table.clone() + }; + assert!( + !actual_table.order_by_equals(&different_target), + "tables with different order_by should not be equal" + ); + + // Test with non-MergeTree engine (S3) - empty order_by should stay empty + let actual_s3 = Table { + engine: ClickhouseEngine::S3 { + path: "s3://bucket/path".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + partition_strategy: None, + partition_columns_in_data_file: None, + }, + ..actual_table.clone() + }; + + let target_s3 = Table { + engine: ClickhouseEngine::S3 { + path: "s3://bucket/path".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + partition_strategy: None, + partition_columns_in_data_file: None, + }, + ..target_table.clone() + }; + + // For S3 engine, empty order_by doesn't fall back to primary key + assert!( + !actual_s3.order_by_equals(&target_s3), + "S3 engine should not infer order_by from primary key" + ); + } } diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 6cb1945c1b..d1e5670839 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -1826,34 +1826,7 @@ impl InfrastructureMap { before: normalized_table.partition_by.clone(), after: normalized_target.partition_by.clone(), }; - - // Compute ORDER BY changes - fn order_by_from_primary_key(target_table: &Table) -> Vec { - target_table - .columns - .iter() - .filter_map(|c| { - if c.primary_key { - Some(c.name.clone()) - } else { - None - } - }) - .collect() - } - - let order_by_changed = table.order_by != target_table.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - && !(target_table.order_by.is_empty() - && target_table.engine.supports_order_by() - && matches!( - &table.order_by, - OrderBy::Fields(v) - if *v == order_by_from_primary_key(target_table) - )); + let order_by_changed = !table.order_by_equals(target_table); // Detect engine change (e.g., MergeTree -> ReplacingMergeTree) let engine_changed = table.engine != target_table.engine; @@ -2130,32 +2103,7 @@ impl InfrastructureMap { } } - fn order_by_from_primary_key(target_table: &Table) -> Vec { - target_table - .columns - .iter() - .filter_map(|c| { - if c.primary_key { - Some(c.name.clone()) - } else { - None - } - }) - .collect() - } - - let order_by_changed = table.order_by != target_table.order_by - // target may leave order_by unspecified, - // but the implicit order_by from primary keys can be the same - // ONLY for engines that support ORDER BY (MergeTree family and S3) - // Buffer, S3Queue, and Distributed don't support ORDER BY - && !(target_table.order_by.is_empty() - && target_table.engine.supports_order_by() - && matches!( - &table.order_by, - crate::framework::core::infrastructure::table::OrderBy::Fields(v) - if *v == order_by_from_primary_key(target_table) - )); + let order_by_changed = !table.order_by_equals(target_table); let order_by_change = if order_by_changed { OrderByChange { @@ -2578,7 +2526,7 @@ impl InfrastructureMap { /// This is needed because older CLI versions didn't persist order_by when it was /// derived from primary key columns. pub fn normalize(mut self) -> Self { - use crate::framework::core::infrastructure::table::{ColumnType, OrderBy}; + use crate::framework::core::infrastructure::table::ColumnType; self.tables = self .tables @@ -2587,19 +2535,8 @@ impl InfrastructureMap { // Fall back to primary key columns if order_by is empty for MergeTree engines // This ensures backward compatibility when order_by isn't explicitly set // We only do this for MergeTree family to avoid breaking S3 tables - if table.order_by.is_empty() && table.engine.is_merge_tree_family() { - let primary_key_columns: Vec = table - .columns - .iter() - .filter_map(|c| { - if c.primary_key { - Some(c.name.clone()) - } else { - None - } - }) - .collect(); - table.order_by = OrderBy::Fields(primary_key_columns); + if table.order_by.is_empty() { + table.order_by = table.order_by_with_fallback(); } // Normalize columns: ClickHouse doesn't support Nullable(Array(...)) From 4c189bd1f7ab85e26e3f3c9edfd698ab4e29e46a Mon Sep 17 00:00:00 2001 From: George Leung Date: Fri, 21 Nov 2025 15:05:15 -0800 Subject: [PATCH 39/59] stop auto parsing to `Date` which drops microseconds (#3018) > [!NOTE] > Introduce schema-driven datetime parsing to avoid JS Date truncation, add string-based DateTime types, update runners for precision and at-least-once processing, and add tests/docs. > > - **TypeScript runtime/lib**: > - Add `DateTimeString` and `DateTime64String

    ` types and export them. > - Introduce schema-driven JSON parsing (`utilities/json.ts`): build/apply field mutations to parse only true DateTime fields; annotate string-based date fields with `stringDate`. > - Update `typeConvert` to mark `date-time` string fields with `stringDate` and handle precisions. > - Update streaming `runner` to precompute field mutations from source stream columns and apply them per message; stop using global date reviver. > - Enhance `dmv2/internal.getStreamingFunctions` to return source `Column[]` alongside handlers. > - **Python runtime**: > - Kafka consumer sets `enable_auto_commit=False`, flushes producer and commits after processing; add per-message partition/offset logging. > - **Templates/Tests**: > - Add DateTime precision models/transforms for TS and PY; new e2e tests verify microsecond/nanosecond handling and string preservation. > - Minor template updates (geometry/array/DLQ unaffected). > - **Docs**: > - Expand TS reference with Date/DateTime guidance, examples, and comparison table for `DateTime*` types. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 8e8504e3eb092cdb0dbe9a641502f0cee272dca3. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- apps/framework-cli-e2e/test/templates.test.ts | 216 +++++++++++++++ .../pages/moose/reference/ts-moose-lib.mdx | 113 ++++++++ .../streaming/streaming_function_runner.py | 10 + .../ts-moose-lib/src/browserCompatible.ts | 2 + .../src/dataModels/typeConvert.ts | 3 + packages/ts-moose-lib/src/dataModels/types.ts | 9 + packages/ts-moose-lib/src/dmv2/internal.ts | 15 +- .../src/streaming-functions/runner.ts | 72 +++-- packages/ts-moose-lib/src/utilities/json.ts | 257 ++++++++++++++++++ templates/python-tests/src/ingest/models.py | 125 +++++++-- .../python-tests/src/ingest/transforms.py | 54 +++- .../typescript-tests/src/ingest/models.ts | 38 +++ .../typescript-tests/src/ingest/transforms.ts | 72 +++++ 13 files changed, 948 insertions(+), 38 deletions(-) diff --git a/apps/framework-cli-e2e/test/templates.test.ts b/apps/framework-cli-e2e/test/templates.test.ts index 20d50826fb..96eaf2f805 100644 --- a/apps/framework-cli-e2e/test/templates.test.ts +++ b/apps/framework-cli-e2e/test/templates.test.ts @@ -1078,6 +1078,107 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { throw new Error("JSON payload not stored as expected"); } }); + + // DateTime precision test for TypeScript + it("should preserve microsecond precision with DateTime64String types via streaming transform", async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + const testId = randomUUID(); + const now = new Date(); + // Create ISO string with microseconds: 2024-01-15T10:30:00.123456Z + const timestampWithMicroseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456Z"); + // Nanoseconds + const timestampWithNanoseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456789Z"); + + console.log( + `Testing DateTime precision with timestamp: ${timestampWithMicroseconds}`, + ); + + // Ingest to DateTimePrecisionInput (which has a transform to Output) + const response = await fetch( + `${SERVER_CONFIG.url}/ingest/DateTimePrecisionInput`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + id: testId, + createdAt: now.toISOString(), + timestampMs: now.toISOString(), + timestampUsDate: timestampWithMicroseconds, + timestampUsString: timestampWithMicroseconds, + timestampNs: timestampWithNanoseconds, + createdAtString: now.toISOString(), + }), + }, + ); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Failed to ingest DateTimePrecisionInput: ${response.status}: ${text}`, + ); + } + + // Wait for transform to process and write to output table + await waitForDBWrite( + devProcess!, + "DateTimePrecisionOutput", + 1, + 60_000, + "local", + ); + + // Query the output data and verify precision + const client = createClient(CLICKHOUSE_CONFIG); + const result = await client.query({ + query: ` + SELECT + id, + toString(createdAt) as createdAt, + toString(timestampMs) as timestampMs, + toString(timestampUsDate) as timestampUsDate, + toString(timestampUsString) as timestampUsString, + toString(timestampNs) as timestampNs, + toString(createdAtString) as createdAtString + FROM local.DateTimePrecisionOutput + WHERE id = '${testId}' + `, + format: "JSONEachRow", + }); + + const data: any[] = await result.json(); + + if (data.length === 0) { + throw new Error( + `No data found for DateTimePrecisionOutput with id ${testId}`, + ); + } + + const row = data[0]; + console.log("Retrieved row:", row); + + // Verify that DateTime64String<6> preserves microseconds + if (!row.timestampUsString.includes(".123456")) { + throw new Error( + `Expected timestampUsString to preserve microseconds (.123456), got: ${row.timestampUsString}`, + ); + } + + // Verify that DateTime64String<9> preserves nanoseconds + if (!row.timestampNs.includes(".123456789")) { + throw new Error( + `Expected timestampNs to preserve nanoseconds (.123456789), got: ${row.timestampNs}`, + ); + } + + console.log( + "✅ DateTime precision test passed - microseconds preserved", + ); + }); } } else { it("should successfully ingest data and verify through consumption API", async function () { @@ -1341,6 +1442,121 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { const apiData = await apiResponse.json(); expect(apiData).to.be.an("array"); }); + + // DateTime precision test for Python + it("should preserve microsecond precision with clickhouse_datetime64 annotations via streaming transform (PY)", async function () { + this.timeout(TIMEOUTS.TEST_SETUP_MS); + + const testId = randomUUID(); + const now = new Date(); + // Create ISO string with microseconds: 2024-01-15T10:30:00.123456Z + const timestampWithMicroseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456Z"); + // Nanoseconds + const timestampWithNanoseconds = now + .toISOString() + .replace(/\.\d{3}Z$/, ".123456789Z"); + + console.log( + `Testing DateTime precision (Python) with timestamp: ${timestampWithMicroseconds}`, + ); + + const payload = { + id: testId, + created_at: now.toISOString(), + timestamp_ms: timestampWithMicroseconds, + timestamp_us: timestampWithMicroseconds, + timestamp_ns: timestampWithNanoseconds, + }; + console.log("Sending payload:", JSON.stringify(payload, null, 2)); + + // Ingest to DateTimePrecisionInput (which has a transform to Output) + const response = await fetch( + `${SERVER_CONFIG.url}/ingest/datetimeprecisioninput`, + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }, + ); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `Failed to ingest DateTimePrecisionInput (Python): ${response.status}: ${text}`, + ); + } + + // Wait for transform to process and write to output table + await waitForDBWrite( + devProcess!, + "DateTimePrecisionOutput", + 1, + 60_000, + "local", + ); + + // Query the output data and verify precision + const client = createClient(CLICKHOUSE_CONFIG); + const result = await client.query({ + query: ` + SELECT + id, + toString(created_at) as created_at, + toString(timestamp_ms) as timestamp_ms, + toString(timestamp_us) as timestamp_us, + toString(timestamp_ns) as timestamp_ns + FROM local.DateTimePrecisionOutput + WHERE id = '${testId}' + `, + format: "JSONEachRow", + }); + + const data: any[] = await result.json(); + + if (data.length === 0) { + throw new Error( + `No data found for DateTimePrecisionOutput (Python) with id ${testId}`, + ); + } + + const row = data[0]; + console.log("Retrieved row (Python):", JSON.stringify(row, null, 2)); + + // Verify that datetime with clickhouse_datetime64(6) preserves microseconds + if (!row.timestamp_us.includes(".123456")) { + throw new Error( + `Expected timestamp_us to preserve microseconds (.123456), got: ${row.timestamp_us}`, + ); + } + + // Note: Python datetime truncates nanoseconds to microseconds, so we expect .123456 not .123456789 + // Log if nanoseconds were truncated (expected behavior) + if (row.timestamp_ns.includes(".123456789")) { + console.log( + "✅ Nanoseconds preserved in ClickHouse:", + row.timestamp_ns, + ); + } else if (row.timestamp_ns.includes(".123456")) { + console.log( + "⚠️ Nanoseconds truncated to microseconds (expected Python behavior):", + row.timestamp_ns, + ); + } else { + console.log( + "❌ No sub-second precision found in timestamp_ns:", + row.timestamp_ns, + ); + throw new Error( + `Expected timestamp_ns to have at least microseconds (.123456), got: ${row.timestamp_ns}`, + ); + } + + console.log( + "✅ DateTime precision test passed (Python) - microseconds preserved", + ); + }); } } }); diff --git a/apps/framework-docs/src/pages/moose/reference/ts-moose-lib.mdx b/apps/framework-docs/src/pages/moose/reference/ts-moose-lib.mdx index 854b9f05f7..a99aa60063 100644 --- a/apps/framework-docs/src/pages/moose/reference/ts-moose-lib.mdx +++ b/apps/framework-docs/src/pages/moose/reference/ts-moose-lib.mdx @@ -40,6 +40,119 @@ interface ApiUtil { } ``` +## Date and Time Types + +Type aliases that map JavaScript temporal data to ClickHouse DateTime types. +Choose between Date objects or strings based on the precision you need to persist in ClickHouse. + +### `DateTime` +A type alias for JavaScript's native `Date` object that maps to ClickHouse `DateTime` (second precision). + +```ts +interface Event { + // Stored as ClickHouse DateTime (second precision) + // Runtime: JavaScript Date object + createdAt: DateTime; +} +``` + +**Important:** While JavaScript `Date` objects can hold millisecond precision, only the second portion will be persisted in ClickHouse since the database type is `DateTime`. + +**Use when:** +- Second precision is sufficient for your use case +- You need standard JavaScript Date objects in your application +- Working with user-visible timestamps where sub-second precision isn't critical + +### `DateTime64

    ` +A type for datetime values with configurable precision (0-9 decimal places) that maps to ClickHouse `DateTime64(P)`. +At runtime, JavaScript `Date` objects are used. + +```ts +interface Event { + // ✅ Safe: ClickHouse DateTime64(3), JavaScript Date holds milliseconds + timestamp: DateTime64<3>; + + // ⚠️ Precision loss: ClickHouse stores microseconds, but Date truncates to milliseconds + highPrecisionTimestamp: DateTime64<6>; +} +``` + +**Precision Limits:** +- **P ≤ 3** (milliseconds): JavaScript `Date` can represent this precision - **use this safely** +- **P > 3** (microseconds and beyond): ClickHouse stores the full precision, but `Date` objects truncate to milliseconds during JSON parsing - **precision loss occurs** + +**Use when:** +- You need millisecond precision (P ≤ 3) with Date objects +- You're okay with Date API in your application code + +**Don't use when:** +- You need microsecond or nanosecond precision - use `DateTime64String` instead + +### `DateTimeString` +A string type that maps to ClickHouse `DateTime` (second precision) but stays as a string at runtime instead of being parsed into a `Date` object. + +```ts +interface Event { + // Stored as ClickHouse DateTime (second precision) + // Runtime: ISO 8601 string (e.g., "2024-01-15T10:30:00Z") + createdAt: DateTimeString; +} +``` + +**Use when:** +- You prefer string-based datetime handling in your application code +- You don't need Date object methods + +### `DateTime64String

    ` +A string type that maps to ClickHouse `DateTime64(P)` and preserves full precision by keeping datetimes as strings at runtime instead of parsing into `Date` objects. + +```ts +interface Event { + // ✅ Full precision preserved: ClickHouse stores microseconds, runtime keeps string + highPrecisionTimestamp: DateTime64String<6>; // e.g., "2024-01-15T10:30:00.123456Z" + + // ✅ Full precision preserved: ClickHouse stores nanoseconds, runtime keeps string + veryHighPrecisionTimestamp: DateTime64String<9>; // e.g., "2024-01-15T10:30:00.123456789Z" +} +``` + +**Use when:** +- **You need microsecond (P≥4) or higher precision** - this is the only type that preserves it +- You're ingesting high-precision timestamps from external systems +- You're okay working with ISO 8601 strings instead of Date objects + +**Why strings?** JavaScript `Date` objects truncate to milliseconds. By keeping timestamps as strings, you preserve the full precision that ClickHouse stores. + +### Comparison Table + +| Type | ClickHouse | Runtime Type | Precision | Use Case | +|------|-----------|--------------|-----------|----------| +| `DateTime` | `DateTime` | `Date` | 1 second | Standard timestamps | +| `DateTime64<3>` | `DateTime64(3)` | `Date` | 1 millisecond | JavaScript-compatible high precision | +| `DateTime64<6>` | `DateTime64(6)` | `Date` | ⚠️ ~1 millisecond* | Not recommended - use DateTime64String | +| `DateTimeString` | `DateTime` | `string` | 1 second | String-based datetime handling | +| `DateTime64String<6>` | `DateTime64(6)` | `string` | 1 microsecond | High-precision requirements | +| `DateTime64String<9>` | `DateTime64(9)` | `string` | 1 nanosecond | Ultra-high-precision requirements | + +*Loses microseconds during JSON parsing + +### Example: Choosing the Right Type +```ts +interface Telemetry { + // User-visible timestamp - Date object is fine + createdAt: DateTime; + + // High-frequency sensor data - need microseconds + sensorTimestamp: DateTime64String<6>; + + // API response timestamp - millisecond precision + processedAt: DateTime64<3>; + + // Compliance log - preserve exact string format + auditTimestamp: DateTimeString<9>; +} +``` + ## Infrastructure Components ### `OlapTable` diff --git a/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py b/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py index b8bf7e522a..87075aa027 100644 --- a/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py +++ b/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py @@ -295,6 +295,7 @@ def create_consumer() -> KafkaConsumer: Create a Kafka consumer configured for the source topic. Handles SASL authentication if configured. + Disables auto-commit to ensure at-least-once processing semantics. Returns: Configured KafkaConsumer instance @@ -316,6 +317,7 @@ def _sr_json_deserializer(m: bytes): sasl_password=sasl_config.get("password"), sasl_mechanism=sasl_config.get("mechanism"), security_protocol=args.security_protocol, + enable_auto_commit=False, # Disable auto-commit for at-least-once semantics ) consumer = get_kafka_consumer(**kwargs) return consumer @@ -426,6 +428,7 @@ def process_messages(): # Process each partition's messages for partition_messages in messages.values(): for message in partition_messages: + log(f"Message partition={message.partition} offset={message.offset}") if not running.is_set(): return @@ -488,6 +491,13 @@ def process_messages(): with metrics_lock: metrics['bytes_count'] += len(record) metrics['count_out'] += 1 + + # Flush producer to ensure messages are sent before committing + producer.flush() + + # Commit offset only after successful processing and flushing + # This ensures at-least-once delivery semantics + consumer.commit() except Exception as e: cli_log(CliLogData(action="Function", message=str(e), message_type="Error")) diff --git a/packages/ts-moose-lib/src/browserCompatible.ts b/packages/ts-moose-lib/src/browserCompatible.ts index c181e855ab..d998aeda2b 100644 --- a/packages/ts-moose-lib/src/browserCompatible.ts +++ b/packages/ts-moose-lib/src/browserCompatible.ts @@ -65,6 +65,8 @@ export { // Added friendly aliases and numeric helpers DateTime, DateTime64, + DateTimeString, + DateTime64String, FixedString, Float32, Float64, diff --git a/packages/ts-moose-lib/src/dataModels/typeConvert.ts b/packages/ts-moose-lib/src/dataModels/typeConvert.ts index fd57ca36b5..e66da7b6a9 100644 --- a/packages/ts-moose-lib/src/dataModels/typeConvert.ts +++ b/packages/ts-moose-lib/src/dataModels/typeConvert.ts @@ -21,6 +21,7 @@ import { MapType, } from "./dataModelTypes"; import { ClickHouseNamedTuple, DecimalRegex } from "./types"; +import { STRING_DATE_ANNOTATION } from "../utilities/json"; const dateType = (checker: TypeChecker) => checker @@ -466,6 +467,8 @@ const handleStringType = ( precision = precisionType.value; } } + // Mark this as a string-based date field so it won't be parsed to Date at runtime + annotations.push([STRING_DATE_ANNOTATION, true]); return `DateTime(${precision})`; } else if (isStringLiteral(valueTypeLiteral, checker, "date")) { let size = 4; diff --git a/packages/ts-moose-lib/src/dataModels/types.ts b/packages/ts-moose-lib/src/dataModels/types.ts index 31a725d929..25180b5bd0 100644 --- a/packages/ts-moose-lib/src/dataModels/types.ts +++ b/packages/ts-moose-lib/src/dataModels/types.ts @@ -46,6 +46,15 @@ export type LowCardinality = { export type DateTime = Date; export type DateTime64

    = Date & ClickHousePrecision

    ; +export type DateTimeString = string & tags.Format<"date-time">; +/** + * JS Date objects cannot hold microsecond precision. + * Use string as the runtime type to avoid losing information. + */ +export type DateTime64String

    = string & + tags.Format<"date-time"> & + ClickHousePrecision

    ; + // Numeric convenience tags mirroring ClickHouse integer and float families export type Float32 = number & ClickHouseFloat<"float32">; export type Float64 = number & ClickHouseFloat<"float64">; diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index 060adc12fc..75c67c52a4 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -963,7 +963,7 @@ const loadIndex = () => { * * @returns A Map where keys are unique identifiers for transformations/consumers * (e.g., "sourceStream_destStream_version", "sourceStream__version") - * and values are the corresponding handler functions. + * and values are tuples containing: [handler function, config, source stream columns] */ export const getStreamingFunctions = async () => { loadIndex(); @@ -971,7 +971,11 @@ export const getStreamingFunctions = async () => { const registry = getMooseInternal(); const transformFunctions = new Map< string, - [(data: unknown) => unknown, TransformConfig | ConsumerConfig] + [ + (data: unknown) => unknown, + TransformConfig | ConsumerConfig, + Column[], + ] >(); registry.streams.forEach((stream) => { @@ -979,7 +983,11 @@ export const getStreamingFunctions = async () => { transforms.forEach(([_, transform, config]) => { const transformFunctionKey = `${stream.name}_${destinationName}${config.version ? `_${config.version}` : ""}`; compilerLog(`getStreamingFunctions: ${transformFunctionKey}`); - transformFunctions.set(transformFunctionKey, [transform, config]); + transformFunctions.set(transformFunctionKey, [ + transform, + config, + stream.columnArray, + ]); }); }); @@ -988,6 +996,7 @@ export const getStreamingFunctions = async () => { transformFunctions.set(consumerFunctionKey, [ consumer.consumer, consumer.config, + stream.columnArray, ]); }); }); diff --git a/packages/ts-moose-lib/src/streaming-functions/runner.ts b/packages/ts-moose-lib/src/streaming-functions/runner.ts index bd492d4586..67e937a743 100755 --- a/packages/ts-moose-lib/src/streaming-functions/runner.ts +++ b/packages/ts-moose-lib/src/streaming-functions/runner.ts @@ -35,7 +35,12 @@ import { import { Cluster } from "../cluster-utils"; import { getStreamingFunctions } from "../dmv2/internal"; import type { ConsumerConfig, TransformConfig, DeadLetterQueue } from "../dmv2"; -import { jsonDateReviver } from "../utilities/json"; +import { + buildFieldMutationsFromColumns, + mutateParsedJson, + type FieldMutations, +} from "../utilities/json"; +import type { Column } from "../dataModels/dataModelTypes"; const HOSTNAME = process.env.HOSTNAME; const AUTO_COMMIT_INTERVAL_MS = 5000; @@ -458,15 +463,17 @@ const stopConsumer = async ( * @param streamingFunctionWithConfigList - functions (with their configs) that transforms input message data * @param message - Kafka message to be processed * @param producer - Kafka producer for sending dead letter + * @param fieldMutations - Pre-built field mutations for data transformations * @returns Promise resolving to array of transformed messages or undefined if processing fails * * The function will: * 1. Check for null/undefined message values - * 2. Parse the message value as JSON with date handling - * 3. Pass parsed data through the streaming function - * 4. Convert transformed data back to string format - * 5. Handle both single and array return values - * 6. Log any processing errors + * 2. Parse the message value as JSON + * 3. Apply field mutations (e.g., date parsing) using pre-built configuration + * 4. Pass parsed data through the streaming function + * 5. Convert transformed data back to string format + * 6. Handle both single and array return values + * 7. Log any processing errors */ const handleMessage = async ( logger: Logger, @@ -475,6 +482,7 @@ const handleMessage = async ( streamingFunctionWithConfigList: [StreamingFunction, TransformConfig][], message: KafkaMessage, producer: Producer, + fieldMutations?: FieldMutations, ): Promise => { if (message.value === undefined || message.value === null) { logger.log(`Received message with no value, skipping...`); @@ -491,7 +499,9 @@ const handleMessage = async ( ) { payloadBuffer = payloadBuffer.subarray(5); } - const parsedData = JSON.parse(payloadBuffer.toString(), jsonDateReviver); + // Parse JSON then apply field mutations using pre-built configuration + const parsedData = JSON.parse(payloadBuffer.toString()); + mutateParsedJson(parsedData, fieldMutations); const transformedData = await Promise.all( streamingFunctionWithConfigList.map(async ([fn, config]) => { try { @@ -735,15 +745,18 @@ function loadStreamingFunction(functionFilePath: string) { async function loadStreamingFunctionV2( sourceTopic: TopicConfig, targetTopic?: TopicConfig, -) { +): Promise<{ + functions: [StreamingFunction, TransformConfig | ConsumerConfig][]; + fieldMutations: FieldMutations | undefined; +}> { const transformFunctions = await getStreamingFunctions(); const transformFunctionKey = `${topicNameToStreamName(sourceTopic)}_${targetTopic ? topicNameToStreamName(targetTopic) : ""}`; - const matchingFunctions = Array.from(transformFunctions.entries()) - .filter(([key]) => key.startsWith(transformFunctionKey)) - .map(([_, fn]) => fn); + const matchingEntries = Array.from(transformFunctions.entries()).filter( + ([key]) => key.startsWith(transformFunctionKey), + ); - if (matchingFunctions.length === 0) { + if (matchingEntries.length === 0) { const message = `No functions found for ${transformFunctionKey}`; cliLog({ action: "Function", @@ -753,7 +766,19 @@ async function loadStreamingFunctionV2( throw new Error(message); } - return matchingFunctions; + // Extract functions and configs, and get columns from the first entry + // (all functions for the same source topic will have the same columns) + const functions = matchingEntries.map(([_, [fn, config]]) => [ + fn, + config, + ]) as [StreamingFunction, TransformConfig | ConsumerConfig][]; + const [_key, firstEntry] = matchingEntries[0]; + const sourceColumns = firstEntry[2]; + + // Pre-build field mutations once for all messages + const fieldMutations = buildFieldMutationsFromColumns(sourceColumns); + + return { functions, fieldMutations }; } /** @@ -810,13 +835,23 @@ const startConsumer = async ( // We preload the function to not have to load it for each message // Note: Config types use 'any' as generics because they handle various // data model types determined at runtime, not compile time - const streamingFunctions: [ + let streamingFunctions: [ StreamingFunction, TransformConfig | ConsumerConfig, - ][] = - args.isDmv2 ? - await loadStreamingFunctionV2(args.sourceTopic, args.targetTopic) - : [[loadStreamingFunction(args.functionFilePath), {}]]; + ][]; + let fieldMutations: FieldMutations | undefined; + + if (args.isDmv2) { + const result = await loadStreamingFunctionV2( + args.sourceTopic, + args.targetTopic, + ); + streamingFunctions = result.functions; + fieldMutations = result.fieldMutations; + } else { + streamingFunctions = [[loadStreamingFunction(args.functionFilePath), {}]]; + fieldMutations = undefined; + } await consumer.subscribe({ topics: [args.sourceTopic.name], // Use full topic name for Kafka operations @@ -859,6 +894,7 @@ const startConsumer = async ( streamingFunctions, message, producer, + fieldMutations, ); }, { diff --git a/packages/ts-moose-lib/src/utilities/json.ts b/packages/ts-moose-lib/src/utilities/json.ts index 8b0e83661d..7282768876 100644 --- a/packages/ts-moose-lib/src/utilities/json.ts +++ b/packages/ts-moose-lib/src/utilities/json.ts @@ -1,3 +1,52 @@ +import type { + Column, + DataType, + Nested, + ArrayType, +} from "../dataModels/dataModelTypes"; + +/** + * Annotation key used to mark DateTime fields that should remain as strings + * rather than being parsed into Date objects at runtime. + */ +export const STRING_DATE_ANNOTATION = "stringDate"; + +/** + * Type guard to check if a DataType is a nullable wrapper + */ +function isNullableType(dt: DataType): dt is { nullable: DataType } { + return ( + typeof dt === "object" && + dt !== null && + "nullable" in dt && + typeof dt.nullable !== "undefined" + ); +} + +/** + * Type guard to check if a DataType is a Nested type + */ +function isNestedType(dt: DataType): dt is Nested { + return ( + typeof dt === "object" && + dt !== null && + "columns" in dt && + Array.isArray(dt.columns) + ); +} + +/** + * Type guard to check if a DataType is an ArrayType + */ +function isArrayType(dt: DataType): dt is ArrayType { + return ( + typeof dt === "object" && + dt !== null && + "elementType" in dt && + typeof dt.elementType !== "undefined" + ); +} + /** * Revives ISO 8601 date strings into Date objects during JSON parsing * This is useful for automatically converting date strings to Date objects @@ -12,3 +61,211 @@ export function jsonDateReviver(key: string, value: unknown): unknown { return value; } + +/** + * Checks if a DataType represents a datetime column (not just date) + * AND if the column should be parsed from string to Date at runtime + * + * Note: Date and Date16 are date-only types and should remain as strings. + * Only DateTime types are candidates for parsing to JavaScript Date objects. + */ +function isDateType(dataType: DataType, annotations: [string, any][]): boolean { + // Check if this is marked as a string-based date (from typia.tags.Format) + // If so, it should remain as a string, not be parsed to Date + if ( + annotations.some( + ([key, value]) => key === STRING_DATE_ANNOTATION && value === true, + ) + ) { + return false; + } + + if (typeof dataType === "string") { + // Only DateTime types should be parsed to Date objects + // Date and Date16 are date-only and should stay as strings + return dataType === "DateTime" || dataType.startsWith("DateTime("); + } + // Handle nullable wrapper + if (isNullableType(dataType)) { + return isDateType(dataType.nullable, annotations); + } + return false; +} + +/** + * Type of mutation to apply to a field during parsing + */ +export type Mutation = "parseDate"; // | "parseBigInt" - to be added later + +/** + * Recursive tuple array structure representing field mutation operations + * Each entry is [fieldName, mutation]: + * - mutation is Mutation[] for leaf fields that need operations applied + * - mutation is FieldMutations for nested objects/arrays (auto-applies to array elements) + */ +export type FieldMutations = [string, Mutation[] | FieldMutations][]; + +/** + * Recursively builds field mutations from column definitions + * + * @param columns - Array of Column definitions + * @returns Tuple array of field mutations + */ +function buildFieldMutations(columns: Column[]): FieldMutations { + const mutations: FieldMutations = []; + + for (const column of columns) { + const dataType = column.data_type; + + // Check if this is a date field that should be converted + if (isDateType(dataType, column.annotations)) { + mutations.push([column.name, ["parseDate"]]); + continue; + } + + // Handle nested structures + if (typeof dataType === "object" && dataType !== null) { + // Handle nullable wrapper + let unwrappedType: DataType = dataType; + if (isNullableType(dataType)) { + unwrappedType = dataType.nullable; + } + + // Handle nested objects + if (isNestedType(unwrappedType)) { + const nestedMutations = buildFieldMutations(unwrappedType.columns); + if (nestedMutations.length > 0) { + mutations.push([column.name, nestedMutations]); + } + continue; + } + + // Handle arrays with nested columns + // The mutations will be auto-applied to each array element at runtime + if (isArrayType(unwrappedType)) { + const elementType = unwrappedType.elementType; + if (isNestedType(elementType)) { + const nestedMutations = buildFieldMutations(elementType.columns); + if (nestedMutations.length > 0) { + mutations.push([column.name, nestedMutations]); + } + continue; + } + } + } + } + + return mutations; +} + +/** + * Applies a mutation operation to a field value + * + * @param value - The value to handle + * @param mutation - The mutation operation to apply + * @returns The handled value + */ +function applyMutation(value: any, mutation: Mutation): any { + if (mutation === "parseDate") { + if (typeof value === "string") { + try { + const date = new Date(value); + return !isNaN(date.getTime()) ? date : value; + } catch { + return value; + } + } + } + return value; +} + +/** + * Recursively mutates an object by applying field mutations + * + * @param obj - The object to mutate + * @param mutations - The field mutations to apply + */ +function applyFieldMutations(obj: any, mutations: FieldMutations): void { + if (!obj || typeof obj !== "object") { + return; + } + + for (const [fieldName, mutation] of mutations) { + if (!(fieldName in obj)) { + continue; + } + + if (Array.isArray(mutation)) { + // Check if it's Mutation[] (leaf) or FieldMutations (nested) + if (mutation.length > 0 && typeof mutation[0] === "string") { + // It's Mutation[] - apply operations to this field + const operations = mutation as Mutation[]; + for (const operation of operations) { + obj[fieldName] = applyMutation(obj[fieldName], operation); + } + } else { + // It's FieldMutations - recurse into nested structure + const nestedMutations = mutation as FieldMutations; + const fieldValue = obj[fieldName]; + + if (Array.isArray(fieldValue)) { + // Auto-apply to each array element + for (const item of fieldValue) { + applyFieldMutations(item, nestedMutations); + } + } else if (fieldValue && typeof fieldValue === "object") { + // Apply to nested object + applyFieldMutations(fieldValue, nestedMutations); + } + } + } + } +} + +/** + * Pre-builds field mutations from column schema for efficient reuse + * + * @param columns - Column definitions from the Stream schema + * @returns Field mutations tuple array, or undefined if no columns + * + * @example + * ```typescript + * const fieldMutations = buildFieldMutationsFromColumns(stream.columnArray); + * // Reuse fieldMutations for every message + * ``` + */ +export function buildFieldMutationsFromColumns( + columns: Column[] | undefined, +): FieldMutations | undefined { + if (!columns || columns.length === 0) { + return undefined; + } + const mutations = buildFieldMutations(columns); + return mutations.length > 0 ? mutations : undefined; +} + +/** + * Applies field mutations to parsed data + * Mutates the object in place for performance + * + * @param data - The parsed JSON object to mutate + * @param fieldMutations - Pre-built field mutations from buildFieldMutationsFromColumns + * + * @example + * ```typescript + * const fieldMutations = buildFieldMutationsFromColumns(stream.columnArray); + * const data = JSON.parse(jsonString); + * mutateParsedJson(data, fieldMutations); + * // data now has transformations applied per the field mutations + * ``` + */ +export function mutateParsedJson( + data: any, + fieldMutations: FieldMutations | undefined, +): void { + if (!fieldMutations || !data) { + return; + } + + applyFieldMutations(data, fieldMutations); +} diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index 512f8e2518..70124db444 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -1,7 +1,9 @@ # This file was auto-generated by the framework. You can add data models or change the existing ones from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon -from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, OlapTable, OlapConfig, MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, simple_aggregated, ClickhouseSize, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 +from moose_lib import Key, IngestPipeline, IngestPipelineConfig, StringToEnumMixin, clickhouse_default, OlapTable, \ + OlapConfig, MergeTreeEngine, ReplacingMergeTreeEngine, AggregatingMergeTreeEngine, simple_aggregated, \ + ClickhouseSize, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, ClickhousePrecision from datetime import datetime, date from typing import Optional, Annotated, Any from pydantic import BaseModel, BeforeValidator, ConfigDict @@ -9,7 +11,6 @@ from enum import IntEnum, auto - class Baz(StringToEnumMixin, IntEnum): QUX = auto() QUUX = auto() @@ -49,6 +50,7 @@ class Bar(BaseModel): from typing import List, Tuple from pydantic import Field + # Test 1: Basic primitive types class BasicTypes(BaseModel): id: Key[str] @@ -59,6 +61,7 @@ class BasicTypes(BaseModel): optional_string: Optional[str] = None nullable_number: Optional[float] = None + # Test 2: Simple arrays of primitives class SimpleArrays(BaseModel): id: Key[str] @@ -69,57 +72,68 @@ class SimpleArrays(BaseModel): optional_string_array: Optional[List[str]] = None mixed_optional_array: Optional[List[str]] = None + # Test 3: Nested objects class Coordinates(BaseModel): lat: float lng: float + class Address(BaseModel): street: str city: str coordinates: Coordinates + class Settings(BaseModel): theme: str notifications: bool + class Config(BaseModel): enabled: bool settings: Settings + class Metadata(BaseModel): tags: List[str] priority: int config: Config + class NestedObjects(BaseModel): id: Key[str] timestamp: datetime address: Address metadata: Metadata + # Test 4: Arrays of objects class User(BaseModel): name: str age: int active: bool + class TransactionMetadata(BaseModel): category: str tags: List[str] + class Transaction(BaseModel): id: str amount: float currency: str metadata: TransactionMetadata + class ArraysOfObjects(BaseModel): id: Key[str] timestamp: datetime users: List[User] transactions: List[Transaction] + # Test 5: Deeply nested arrays (main focus for ENG-875) - ClickHouse compatible class SimplifiedItem(BaseModel): name: str @@ -129,10 +143,12 @@ class SimplifiedItem(BaseModel): metric_names: List[str] metric_values: List[float] + class ComplexNestedCategory(BaseModel): category: str items: List[SimplifiedItem] + class DeeplyNestedArrays(BaseModel): id: Key[str] timestamp: datetime @@ -145,6 +161,7 @@ class DeeplyNestedArrays(BaseModel): # Simplified nested: Reduce nesting depth to avoid ClickHouse issues complex_nested: List[ComplexNestedCategory] + # Test 6: Mixed complex types (ClickHouse-safe) class ClickEvent(BaseModel): type: str = Field(default="click") @@ -152,20 +169,24 @@ class ClickEvent(BaseModel): coordinate_x: float coordinate_y: float + class FlattenedData(BaseModel): required: str optional_data: List[str] # Flattened, no nested optionals tags: List[str] values: List[float] + class Column(BaseModel): col: int values: List[str] + class ComplexMatrixRow(BaseModel): row: int columns: List[Column] + class MixedComplexTypes(BaseModel): id: Key[str] timestamp: datetime @@ -176,34 +197,42 @@ class MixedComplexTypes(BaseModel): # Multi-dimensional with objects (safe) complex_matrix: List[ComplexMatrixRow] + # Test 7: Edge cases and boundary conditions (ClickHouse-compatible) class EmptyObject(BaseModel): id: str + class Property(BaseModel): key: str value: str tags: List[str] + class Metric(BaseModel): name: str values: List[float] + class ModerateLevel2(BaseModel): data: List[str] values: List[float] + class ModerateLevel1(BaseModel): level2: List[ModerateLevel2] + class ModerateNesting(BaseModel): level1: List[ModerateLevel1] + class SimplifiedComplexItem(BaseModel): id: str properties: List[Property] metrics: List[Metric] + class EdgeCases(BaseModel): id: Key[str] timestamp: datetime @@ -218,6 +247,7 @@ class EdgeCases(BaseModel): # Simplified complex arrays complex_array: List[SimplifiedComplexItem] + # =======JSON Types Test========= class JsonInner(BaseModel): model_config = ConfigDict(extra='allow') @@ -225,6 +255,7 @@ class JsonInner(BaseModel): name: str count: int + class JsonTest(BaseModel): id: Key[str] timestamp: datetime @@ -238,6 +269,7 @@ class JsonTest(BaseModel): # Test JSON with paths but without configuration payload_basic: Annotated[JsonInner, ClickHouseJson()] + # =======Pipeline Configurations for Test Models========= basic_types_model = IngestPipeline[BasicTypes]("BasicTypes", IngestPipelineConfig( @@ -296,6 +328,7 @@ class JsonTest(BaseModel): dead_letter_queue=True )) + # =======Optional Nested Fields with ClickHouse Defaults Test========= # Test class with optional nested fields and ClickHouse defaults @@ -303,12 +336,14 @@ class TestNested(BaseModel): name: Optional[str] = None age: Optional[float] = None + class OptionalNestedTest(BaseModel): id: Key[str] timestamp: datetime nested: List[TestNested] other: Annotated[str, clickhouse_default("''")] = "" + optional_nested_test_model = IngestPipeline[OptionalNestedTest]("OptionalNestedTest", IngestPipelineConfig( ingest_api=True, stream=True, @@ -316,6 +351,7 @@ class OptionalNestedTest(BaseModel): dead_letter_queue=True )) + # =======Geometry Types========= class GeoTypes(BaseModel): @@ -336,6 +372,7 @@ class GeoTypes(BaseModel): dead_letter_queue=True )) + # =======Versioned OlapTables Test========= # Test versioned OlapTables - same name, different versions # This demonstrates the OlapTable versioning functionality @@ -347,6 +384,7 @@ class UserEventV1(BaseModel): timestamp: float metadata: Optional[str] = None + class UserEventV2(BaseModel): """Version 2.0 of user events - enhanced with session tracking.""" user_id: Key[str] @@ -356,6 +394,7 @@ class UserEventV2(BaseModel): session_id: str user_agent: Optional[str] = None + # Version 1.0 - MergeTree engine user_events_v1 = OlapTable[UserEventV1]( "UserEvents", @@ -376,6 +415,7 @@ class UserEventV2(BaseModel): ) ) + # =======SimpleAggregateFunction Test========= # Test SimpleAggregateFunction support for aggregated metrics # This demonstrates using SimpleAggregateFunction with AggregatingMergeTree @@ -389,6 +429,7 @@ class SimpleAggTest(BaseModel): min_value: simple_aggregated('min', int) last_updated: simple_aggregated('anyLast', datetime) + simple_agg_test_table = OlapTable[SimpleAggTest]( "SimpleAggTest", OlapConfig( @@ -397,12 +438,14 @@ class SimpleAggTest(BaseModel): ) ) + # =======Index Extraction Test Table======= class IndexTest(BaseModel): u64: Key[UInt64] i32: Int32 s: str + index_test_table = OlapTable[IndexTest]( "IndexTest", OlapConfig( @@ -411,15 +454,19 @@ class IndexTest(BaseModel): indexes=[ OlapConfig.TableIndex(name="idx1", expression="u64", type="bloom_filter", arguments=[], granularity=3), OlapConfig.TableIndex(name="idx2", expression="u64 * i32", type="minmax", arguments=[], granularity=3), - OlapConfig.TableIndex(name="idx3", expression="u64 * length(s)", type="set", arguments=["1000"], granularity=4), + OlapConfig.TableIndex(name="idx3", expression="u64 * length(s)", type="set", arguments=["1000"], + granularity=4), OlapConfig.TableIndex(name="idx4", expression="(u64, i32)", type="MinMax", arguments=[], granularity=1), OlapConfig.TableIndex(name="idx5", expression="(u64, i32)", type="minmax", arguments=[], granularity=1), - OlapConfig.TableIndex(name="idx6", expression="toString(i32)", type="ngrambf_v1", arguments=["2", "256", "1", "123"], granularity=1), - OlapConfig.TableIndex(name="idx7", expression="s", type="nGraMbf_v1", arguments=["3", "256", "1", "123"], granularity=1), + OlapConfig.TableIndex(name="idx6", expression="toString(i32)", type="ngrambf_v1", + arguments=["2", "256", "1", "123"], granularity=1), + OlapConfig.TableIndex(name="idx7", expression="s", type="nGraMbf_v1", arguments=["3", "256", "1", "123"], + granularity=1), ], ), ) + # =======Numeric Type Aliases Test========= # Demonstrates usage of Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Float32, Float64 @@ -430,21 +477,22 @@ class NumericTypesTest(BaseModel): timestamp: datetime # Unsigned integers - for values that are always positive - user_count: UInt32 # User counts (0 to 4B) - page_views: UInt64 # Large counters - status_code: UInt8 # HTTP status codes (0-255) - port: UInt16 # Network ports (0-65535) + user_count: UInt32 # User counts (0 to 4B) + page_views: UInt64 # Large counters + status_code: UInt8 # HTTP status codes (0-255) + port: UInt16 # Network ports (0-65535) # Signed integers - for values that can be negative - temperature: Int16 # Temperature in celsius (-32K to +32K) - balance: Int64 # Financial balances (can be negative) - offset: Int8 # Small offsets (-128 to +127) - delta: Int32 # Deltas/differences + temperature: Int16 # Temperature in celsius (-32K to +32K) + balance: Int64 # Financial balances (can be negative) + offset: Int8 # Small offsets (-128 to +127) + delta: Int32 # Deltas/differences # Float types - latitude: Float64 # High precision coordinates - confidence_score: Float32 # ML confidence scores (lower precision ok) - price: Float64 # Financial amounts need precision + latitude: Float64 # High precision coordinates + confidence_score: Float32 # ML confidence scores (lower precision ok) + price: Float64 # Financial amounts need precision + numeric_types_test_model = IngestPipeline[NumericTypesTest]("NumericTypesTest", IngestPipelineConfig( ingest_api=True, @@ -453,6 +501,7 @@ class NumericTypesTest(BaseModel): dead_letter_queue=True )) + # =======Real-World Production Patterns (District Cannabis Inspired)========= # Test 8: Complex discount structure with mixed nullability @@ -462,6 +511,7 @@ class DiscountInfo(BaseModel): discount_reason: Optional[str] = None amount: float # Required field + # Test 9: Transaction item with complex nested structure class ProductItem(BaseModel): product_id: Optional[int] = None @@ -471,6 +521,7 @@ class ProductItem(BaseModel): unit_cost: Optional[float] = None package_id: Optional[str] = None + # Test 10: Complex transaction with multiple array types and ReplacingMergeTree class ComplexTransaction(BaseModel): transaction_id: Key[int] # Primary key @@ -492,6 +543,7 @@ class ComplexTransaction(BaseModel): is_void: bool is_tax_inclusive: Optional[bool] = None + # Test 11: Base type pattern with extension (common pattern) class BaseProduct(BaseModel): product_id: Optional[int] = None @@ -500,6 +552,7 @@ class BaseProduct(BaseModel): category_id: Optional[int] = None tags: List[str] # Remove optional - arrays cannot be nullable in ClickHouse + class ProductWithLocation(BaseModel): # Simulate Omit pattern by redefining fields product_id: int # Make required (was optional in base) @@ -511,6 +564,7 @@ class ProductWithLocation(BaseModel): location: str inventory_id: Key[int] + # Test 12: Engine and ordering configuration test class EngineTest(BaseModel): id: Key[str] @@ -519,6 +573,7 @@ class EngineTest(BaseModel): category: str value: float + # =======Pipeline Configurations for Production Patterns========= from moose_lib import OlapConfig, ReplacingMergeTreeEngine, MergeTreeEngine @@ -553,6 +608,7 @@ class EngineTest(BaseModel): dead_letter_queue=True )) + # =======Array Transform Test Models========= # Test models for verifying that transforms returning arrays produce multiple Kafka messages @@ -587,7 +643,44 @@ class ArrayOutput(BaseModel): # Create a Stream that writes to the OlapTable from moose_lib import Stream, StreamConfig + array_output_stream = Stream[ArrayOutput]( "ArrayOutput", StreamConfig(destination=array_output_table) ) + + +# =======DateTime Precision Test Models========= +# Test models for verifying DateTime precision handling (microseconds) +# Tests ENG-1453: Ensure microsecond precision is preserved + +class DateTimePrecisionTestData(BaseModel): + """Input model with datetime fields.""" + id: Key[str] + created_at: datetime + timestamp_ms: Annotated[datetime, ClickhousePrecision(3)] + timestamp_us: Annotated[datetime, ClickhousePrecision(6)] + timestamp_ns: Annotated[datetime, ClickhousePrecision(9)] + + +# Input pipeline (no table, just stream) +datetime_precision_input_model = IngestPipeline[DateTimePrecisionTestData]( + "DateTimePrecisionInput", + IngestPipelineConfig( + ingest_api=True, + stream=True, + table=False, + dead_letter_queue=True + )) + +# Output table +datetime_precision_output_table = OlapTable[DateTimePrecisionTestData]( + "DateTimePrecisionOutput", + OlapConfig(order_by_fields=["id"]) +) + +# Output stream +datetime_precision_output_stream = Stream[DateTimePrecisionTestData]( + "DateTimePrecisionOutput", + StreamConfig(destination=datetime_precision_output_table) +) diff --git a/templates/python-tests/src/ingest/transforms.py b/templates/python-tests/src/ingest/transforms.py index c407fa1b77..da00f10610 100644 --- a/templates/python-tests/src/ingest/transforms.py +++ b/templates/python-tests/src/ingest/transforms.py @@ -4,7 +4,6 @@ def foo_to_bar(foo: Foo): - """Transform Foo events to Bar events with error handling and caching. Normal flow: @@ -60,6 +59,7 @@ def print_foo_event(foo): fooModel.get_stream().add_consumer(print_foo_event) + # DLQ consumer for handling failed events (alternate flow) def print_messages(dead_letter: DeadLetterModel[Foo]): print("dead letter:", dead_letter) @@ -71,6 +71,7 @@ def print_messages(dead_letter: DeadLetterModel[Foo]): # Test transform that returns a list - each element should be sent as a separate Kafka message from src.ingest.models import array_input_model, array_output_stream, ArrayInput, ArrayOutput + def array_transform(input_data: ArrayInput) -> list[ArrayOutput]: """Transform that explodes an input array into individual output records. @@ -94,3 +95,54 @@ def array_transform(input_data: ArrayInput) -> list[ArrayOutput]: destination=array_output_stream, transformation=array_transform, ) + +# Test transform for DateTime precision - verifies that Python datetime preserves microseconds +from src.ingest.models import ( + datetime_precision_input_model, + datetime_precision_output_stream, + DateTimePrecisionTestData, +) + + +def datetime_precision_transform(input_data: DateTimePrecisionTestData) -> DateTimePrecisionTestData: + """Transform that verifies Python datetime objects preserve microsecond precision. + + Unlike JavaScript, Python's datetime natively supports microsecond precision, + so all datetime fields should be datetime objects with microseconds preserved. + """ + + print("DateTime precision transform (Python) - input types and values:") + print(f" created_at: {type(input_data.created_at)} = {input_data.created_at} (µs: {input_data.created_at.microsecond})") + print(f" timestamp_ms: {type(input_data.timestamp_ms)} = {input_data.timestamp_ms} (µs: {input_data.timestamp_ms.microsecond})") + print(f" timestamp_us: {type(input_data.timestamp_us)} = {input_data.timestamp_us} (µs: {input_data.timestamp_us.microsecond})") + print(f" timestamp_ns: {type(input_data.timestamp_ns)} = {input_data.timestamp_ns} (µs: {input_data.timestamp_ns.microsecond})") + + # Verify all are datetime objects + if not isinstance(input_data.created_at, datetime): + raise TypeError(f"Expected created_at to be datetime, got {type(input_data.created_at)}") + if not isinstance(input_data.timestamp_ms, datetime): + raise TypeError(f"Expected timestamp_ms to be datetime, got {type(input_data.timestamp_ms)}") + if not isinstance(input_data.timestamp_us, datetime): + raise TypeError(f"Expected timestamp_us to be datetime, got {type(input_data.timestamp_us)}") + if not isinstance(input_data.timestamp_ns, datetime): + raise TypeError(f"Expected timestamp_ns to be datetime, got {type(input_data.timestamp_ns)}") + + # Verify microseconds are present + if input_data.timestamp_us.microsecond == 0: + print(f"WARNING: timestamp_us has no microseconds: {input_data.timestamp_us}") + else: + print(f"✓ timestamp_us has microseconds: {input_data.timestamp_us.microsecond}") + + if input_data.timestamp_ns.microsecond == 0: + print(f"WARNING: timestamp_ns has no microseconds: {input_data.timestamp_ns}") + else: + print(f"✓ timestamp_ns has microseconds: {input_data.timestamp_ns.microsecond}") + + # Pass through unchanged + return input_data + + +datetime_precision_input_model.get_stream().add_transform( + destination=datetime_precision_output_stream, + transformation=datetime_precision_transform, +) diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index fc0e2b3044..7dc1e25332 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -5,6 +5,9 @@ import { OlapTable, DeadLetterModel, DateTime, + DateTime64, + DateTimeString, + DateTime64String, ClickHouseDefault, ClickHousePoint, ClickHouseRing, @@ -646,3 +649,38 @@ export const largeMessageOutputStream = new Stream( destination: LargeMessageOutputTable, }, ); + +/** =======DateTime Precision Test Models========= */ +// Test models for verifying DateTime precision handling (microseconds) +// Tests ENG-1453: Ensure microsecond precision is preserved + +/** Input model with datetime strings */ +export interface DateTimePrecisionTestData { + id: Key; + createdAt: DateTime; + timestampMs: DateTime64<3>; + timestampUsDate: DateTime64<6>; + timestampUsString: DateTime64String<6>; + timestampNs: DateTime64String<9>; + createdAtString: DateTimeString; +} + +// Input pipeline (no table, just stream) +export const DateTimePrecisionInputPipeline = + new IngestPipeline("DateTimePrecisionInput", { + table: false, + stream: true, + ingestApi: true, + }); + +// Output table +export const DateTimePrecisionOutputTable = + new OlapTable("DateTimePrecisionOutput", { + orderByFields: ["id"], + }); + +// Output stream +export const dateTimePrecisionOutputStream = + new Stream("DateTimePrecisionOutput", { + destination: DateTimePrecisionOutputTable, + }); diff --git a/templates/typescript-tests/src/ingest/transforms.ts b/templates/typescript-tests/src/ingest/transforms.ts index 15e46b9b66..69ec0eb25f 100644 --- a/templates/typescript-tests/src/ingest/transforms.ts +++ b/templates/typescript-tests/src/ingest/transforms.ts @@ -118,3 +118,75 @@ LargeMessageInputPipeline.stream!.addTransform( deadLetterQueue: LargeMessageInputPipeline.deadLetterQueue, }, ); + +// Test transform for DateTime precision - verifies field mutations work correctly +import { + DateTimePrecisionInputPipeline, + dateTimePrecisionOutputStream, + DateTimePrecisionTestData, +} from "./models"; + +DateTimePrecisionInputPipeline.stream!.addTransform( + dateTimePrecisionOutputStream, + (input: DateTimePrecisionTestData): DateTimePrecisionTestData => { + // This transform verifies that field mutations have correctly parsed datetime strings + // - DateTime and DateTime64<3> should be Date objects (parsed from strings) + // - DateTime64<6> should be Date objects (but loses microseconds in JS) + // - DateTime64String<6> should remain as strings (preserves microseconds) + // - DateTime64String<9> should remain as strings (preserves nanoseconds) + // - DateTimeString should remain as strings + + console.log("DateTime precision transform - input types:"); + console.log(` createdAt: ${typeof input.createdAt} = ${input.createdAt}`); + console.log( + ` timestampMs: ${typeof input.timestampMs} = ${input.timestampMs}`, + ); + console.log( + ` timestampUsDate: ${typeof input.timestampUsDate} = ${input.timestampUsDate}`, + ); + console.log( + ` timestampUsString: ${typeof input.timestampUsString} = ${input.timestampUsString}`, + ); + console.log( + ` timestampNs: ${typeof input.timestampNs} = ${input.timestampNs}`, + ); + console.log( + ` createdAtString: ${typeof input.createdAtString} = ${input.createdAtString}`, + ); + + // Verify types at runtime + if (!(input.createdAt instanceof Date)) { + throw new Error( + `Expected createdAt to be Date, got ${typeof input.createdAt}`, + ); + } + if (!(input.timestampMs instanceof Date)) { + throw new Error( + `Expected timestampMs to be Date, got ${typeof input.timestampMs}`, + ); + } + if (!(input.timestampUsDate instanceof Date)) { + throw new Error( + `Expected timestampUsDate to be Date, got ${typeof input.timestampUsDate}`, + ); + } + if (typeof input.timestampUsString !== "string") { + throw new Error( + `Expected timestampUsString to be string, got ${typeof input.timestampUsString}`, + ); + } + if (typeof input.timestampNs !== "string") { + throw new Error( + `Expected timestampNs to be string, got ${typeof input.timestampNs}`, + ); + } + if (typeof input.createdAtString !== "string") { + throw new Error( + `Expected createdAtString to be string, got ${typeof input.createdAtString}`, + ); + } + + // Pass through unchanged + return input; + }, +); From 8c3713c64d2b28a9764206af6522ebab98eec752 Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Sun, 23 Nov 2025 10:24:37 -0800 Subject: [PATCH 40/59] experimental: add LSP autocomplete to table columns in python f-strings (#3024) much of these changes are formatting if preferred i can undo those auto-applied formatting changes > [!NOTE] > Introduces MooseModel for class-level column access with IDE autocomplete and adds Column formatting for safe SQL interpolation, plus tests and an experimental template demonstrating usage. > > - **Library (dmv2)**: > - **MooseModel**: New BaseModel subclass with metaclass adding class-level `Column` descriptors and `.cols` namespace for LSP autocomplete (`moose_lib/dmv2/moose_model.py`); exported via `dmv2.__init__`. > - **OlapTable**: Still exposes `.cols`; works with both `BaseModel` and `MooseModel` and documents access patterns. > - **Data Modeling**: > - `Column` gains `__str__`/`__format__` to output quoted identifiers for f-strings (e.g., `{col:col}`), enabling safe SQL interpolation (`data_models.py`). > - **Docs**: > - README adds "Column Autocomplete with MooseModel" usage snippet. > - **Tests**: > - Add suites covering `MooseModel` behavior, backward compatibility with `BaseModel`, OLAP integration, and `Column` formatting. > - **Templates**: > - New `python-experimental` template demonstrating MooseModel autocomplete in APIs, ingest models, views, and workflows, with setup files and editor settings. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 33a906393adb60d99e608605067abbe90c79083c. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- packages/py-moose-lib/README.md | 17 + .../py-moose-lib/moose_lib/data_models.py | 219 +++++++---- .../py-moose-lib/moose_lib/dmv2/__init__.py | 142 ++++--- .../moose_lib/dmv2/moose_model.py | 165 +++++++++ .../py-moose-lib/moose_lib/dmv2/olap_table.py | 348 ++++++++++-------- .../tests/test_backward_compatibility.py | 85 +++++ .../tests/test_column_formatting.py | 80 ++++ .../py-moose-lib/tests/test_moose_model.py | 153 ++++++++ .../tests/test_olap_table_moosemodel.py | 89 +++++ templates/python-experimental/.gitignore | 57 +++ .../.vscode/extensions.json | 9 + .../python-experimental/.vscode/settings.json | 17 + templates/python-experimental/README.md | 104 ++++++ templates/python-experimental/app/__init__.py | 0 .../python-experimental/app/apis/__init__.py | 0 templates/python-experimental/app/apis/bar.py | 147 ++++++++ .../app/ingest/__init__.py | 0 .../python-experimental/app/ingest/models.py | 50 +++ .../app/ingest/transforms.py | 69 ++++ templates/python-experimental/app/main.py | 5 + .../python-experimental/app/views/__init__.py | 0 .../app/views/bar_aggregated.py | 36 ++ .../app/workflows/__init__.py | 0 .../app/workflows/generator.py | 52 +++ .../python-experimental/moose.config.toml | 57 +++ .../python-experimental/requirements.txt | 7 + templates/python-experimental/setup.py | 13 + .../python-experimental/template.config.toml | 33 ++ 28 files changed, 1659 insertions(+), 295 deletions(-) create mode 100644 packages/py-moose-lib/moose_lib/dmv2/moose_model.py create mode 100644 packages/py-moose-lib/tests/test_backward_compatibility.py create mode 100644 packages/py-moose-lib/tests/test_column_formatting.py create mode 100644 packages/py-moose-lib/tests/test_moose_model.py create mode 100644 packages/py-moose-lib/tests/test_olap_table_moosemodel.py create mode 100644 templates/python-experimental/.gitignore create mode 100644 templates/python-experimental/.vscode/extensions.json create mode 100644 templates/python-experimental/.vscode/settings.json create mode 100644 templates/python-experimental/README.md create mode 100644 templates/python-experimental/app/__init__.py create mode 100644 templates/python-experimental/app/apis/__init__.py create mode 100644 templates/python-experimental/app/apis/bar.py create mode 100644 templates/python-experimental/app/ingest/__init__.py create mode 100644 templates/python-experimental/app/ingest/models.py create mode 100644 templates/python-experimental/app/ingest/transforms.py create mode 100644 templates/python-experimental/app/main.py create mode 100644 templates/python-experimental/app/views/__init__.py create mode 100644 templates/python-experimental/app/views/bar_aggregated.py create mode 100644 templates/python-experimental/app/workflows/__init__.py create mode 100644 templates/python-experimental/app/workflows/generator.py create mode 100644 templates/python-experimental/moose.config.toml create mode 100644 templates/python-experimental/requirements.txt create mode 100644 templates/python-experimental/setup.py create mode 100644 templates/python-experimental/template.config.toml diff --git a/packages/py-moose-lib/README.md b/packages/py-moose-lib/README.md index 0c7c244290..3567504413 100644 --- a/packages/py-moose-lib/README.md +++ b/packages/py-moose-lib/README.md @@ -1,3 +1,20 @@ # Python Moose Lib Python package which contains moose utils + +## Column Autocomplete with MooseModel + +For LSP autocomplete when working with columns, use `MooseModel` instead of `BaseModel`: + +```python +from moose_lib import MooseModel, OlapTable + +class User(MooseModel): + user_id: int + email: str + +# Autocomplete works when typing User.user_id +query = f"SELECT {User.user_id:col}, {User.email:col} FROM users" +``` + +See [MooseModel Autocomplete Guide](docs/moose-model-autocomplete.md) for details. diff --git a/packages/py-moose-lib/moose_lib/data_models.py b/packages/py-moose-lib/moose_lib/data_models.py index 6e0687857b..dcf3c287db 100644 --- a/packages/py-moose-lib/moose_lib/data_models.py +++ b/packages/py-moose-lib/moose_lib/data_models.py @@ -6,8 +6,19 @@ from uuid import UUID from datetime import datetime, date -from typing import Literal, Tuple, Union, Any, get_origin, get_args, TypeAliasType, Annotated, Type, _BaseGenericAlias, \ - GenericAlias +from typing import ( + Literal, + Tuple, + Union, + Any, + get_origin, + get_args, + TypeAliasType, + Annotated, + Type, + _BaseGenericAlias, + GenericAlias, +) from pydantic import BaseModel, Field, PlainSerializer, GetCoreSchemaHandler, ConfigDict from pydantic_core import CoreSchema, core_schema import ipaddress @@ -30,7 +41,9 @@ type Float64 = Annotated[float, "float64"] -@dataclasses.dataclass(frozen=True) # a BaseModel in the annotations will confuse pydantic +@dataclasses.dataclass( + frozen=True +) # a BaseModel in the annotations will confuse pydantic class ClickhousePrecision: precision: int @@ -106,11 +119,14 @@ def FixedString(size: int) -> ClickhouseFixedStringSize: def aggregated[T]( - result_type: Type[T], - agg_func: str, - param_types: list[type | GenericAlias | _BaseGenericAlias] + result_type: Type[T], + agg_func: str, + param_types: list[type | GenericAlias | _BaseGenericAlias], ) -> Type[T]: - return Annotated[result_type, AggregateFunction(agg_func=agg_func, param_types=tuple(param_types))] + return Annotated[ + result_type, + AggregateFunction(agg_func=agg_func, param_types=tuple(param_types)), + ] @dataclasses.dataclass(frozen=True) @@ -123,14 +139,11 @@ def to_dict(self): "functionName": self.agg_func, "argumentTypes": [ py_type_to_column_type(t, [])[2] for t in self.param_types - ] + ], } -def simple_aggregated[T]( - agg_func: str, - arg_type: Type[T] -) -> Type[T]: +def simple_aggregated[T](agg_func: str, arg_type: Type[T]) -> Type[T]: """Helper to create a SimpleAggregateFunction type annotation. SimpleAggregateFunction is a ClickHouse type for storing aggregated values directly @@ -152,7 +165,9 @@ def simple_aggregated[T]( last_status: simple_aggregated("anyLast", str) ``` """ - return Annotated[arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type)] + return Annotated[ + arg_type, SimpleAggregateFunction(agg_func=agg_func, arg_type=arg_type) + ] @dataclasses.dataclass(frozen=True) @@ -163,7 +178,7 @@ class SimpleAggregateFunction: def to_dict(self): return { "functionName": self.agg_func, - "argumentType": py_type_to_column_type(self.arg_type, [])[2] + "argumentType": py_type_to_column_type(self.arg_type, [])[2], } @@ -176,7 +191,9 @@ def enum_value_serializer(value: int | str): class EnumValue(BaseModel): name: str - value: Annotated[int | str, PlainSerializer(enum_value_serializer, return_type=dict)] + value: Annotated[ + int | str, PlainSerializer(enum_value_serializer, return_type=dict) + ] class DataEnum(BaseModel): @@ -212,7 +229,9 @@ class JsonOptions(BaseModel): skip_regexps: list[str] = [] -type DataType = str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions +type DataType = ( + str | DataEnum | ArrayType | Nested | NamedTupleType | MapType | JsonOptions +) def handle_jwt(field_type: type) -> Tuple[bool, type]: @@ -256,8 +275,51 @@ class Column(BaseModel): def to_expr(self): # Lazy import to avoid circular dependency at import time from .query_builder import ColumnRef + return ColumnRef(self) + def __str__(self) -> str: + """Return properly quoted identifier for SQL interpolation. + + This enables Column objects to be used directly in f-strings and + string concatenation for SQL query construction. + + Returns: + Backtick-quoted identifier safe for ClickHouse SQL. + + Example: + >>> col = Column(name="user_id", ...) + >>> f"SELECT {col} FROM users" + "SELECT `user_id` FROM users" + """ + from .utilities.sql import quote_identifier + + return quote_identifier(self.name) + + def __format__(self, format_spec: str) -> str: + """Format Column for f-string interpolation with format specifiers. + + Supports format specs: + - 'col', 'c', 'column': Returns quoted identifier + - '' (empty): Returns quoted identifier (default) + + Args: + format_spec: Format specification string + + Returns: + Backtick-quoted identifier + + Example: + >>> col = Column(name="email", ...) + >>> f"SELECT {col:col} FROM users" + "SELECT `email` FROM users" + """ + # All format specs return quoted identifier + # This provides flexibility for user preference + from .utilities.sql import quote_identifier + + return quote_identifier(self.name) + def _is_point_type(t: type) -> bool: origin = get_origin(t) @@ -296,8 +358,8 @@ def _validate_geometry_type(requested: str, t: type) -> None: ) case "MultiPolygon": if not _is_list_of( - lambda x: _is_list_of(lambda y: _is_list_of(_is_point_type, y), x), - t, + lambda x: _is_list_of(lambda y: _is_list_of(_is_point_type, y), x), + t, ): raise ValueError( "MultiPolygon must be typed as list[list[list[tuple[float, float]]]]" @@ -317,8 +379,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da if t is str: # Check for FixedString annotation fixed_string_size = next( - (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), - None + (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None ) if fixed_string_size: data_type = f"FixedString({fixed_string_size})" @@ -327,8 +388,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da elif t is bytes: # Check for FixedString annotation fixed_string_size = next( - (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), - None + (md.size for md in mds if isinstance(md, ClickhouseFixedStringSize)), None ) if fixed_string_size: data_type = f"FixedString({fixed_string_size})" @@ -337,7 +397,10 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da data_type = "String" elif t is int: # Check for int size annotations - int_size = next((md for md in mds if isinstance(md, str) and re.match(r'^u?int\d+$', md)), None) + int_size = next( + (md for md in mds if isinstance(md, str) and re.match(r"^u?int\d+$", md)), + None, + ) if int_size: data_type = int_size.replace("u", "U").replace("i", "I") else: @@ -345,7 +408,14 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da elif t is float: size = next((md for md in mds if isinstance(md, ClickhouseSize)), None) if size is None: - bit_size = next((md for md in mds if isinstance(md, str) and re.match(r'^float\d+$', md)), None) + bit_size = next( + ( + md + for md in mds + if isinstance(md, str) and re.match(r"^float\d+$", md) + ), + None, + ) if bit_size: if bit_size == "float32": data_type = "Float32" @@ -363,12 +433,16 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da raise ValueError(f"Unsupported float size {size.size}") elif t is Decimal: precision = next((md.max_digits for md in mds if hasattr(md, "max_digits")), 10) - scale = next((md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0) + scale = next( + (md.decimal_places for md in mds if hasattr(md, "decimal_places")), 0 + ) data_type = f"Decimal({precision}, {scale})" elif t is bool: data_type = "Boolean" elif t is datetime: - precision = next((md for md in mds if isinstance(md, ClickhousePrecision)), None) + precision = next( + (md for md in mds if isinstance(md, ClickhousePrecision)), None + ) if precision is None: data_type = "DateTime" else: @@ -385,22 +459,31 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da data_type = "IPv4" elif t is ipaddress.IPv6Address: data_type = "IPv6" - elif any(md in [ # this check has to happen before t is matched against tuple/list - "Point", - "Ring", - "LineString", - "MultiLineString", - "Polygon", - "MultiPolygon", - ] for md in mds): - data_type = next(md for md in mds if md in [ + elif any( + md + in [ # this check has to happen before t is matched against tuple/list "Point", "Ring", "LineString", "MultiLineString", "Polygon", "MultiPolygon", - ]) + ] + for md in mds + ): + data_type = next( + md + for md in mds + if md + in [ + "Point", + "Ring", + "LineString", + "MultiLineString", + "Polygon", + "MultiPolygon", + ] + ) _validate_geometry_type(data_type, t) elif get_origin(t) is list: inner_optional, _, inner_type = py_type_to_column_type(get_args(t)[0], []) @@ -413,7 +496,9 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da # For dict types, we assume keys are required and values match their type data_type = MapType(key_type=key_type, value_type=value_type) else: - raise ValueError(f"Dict type must have exactly 2 type arguments, got {len(args)}") + raise ValueError( + f"Dict type must have exactly 2 type arguments, got {len(args)}" + ) elif t is UUID: data_type = "UUID" elif t is Any: @@ -427,7 +512,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da "Default in inner field. Put ClickHouseDefault in top level field." ) # Enforce extra='allow' for JSON-mapped models - if t.model_config.get('extra') != 'allow': + if t.model_config.get("extra") != "allow": raise ValueError( f"Model {t.__name__} with ClickHouseJson must have model_config with extra='allow'. " "Add: model_config = ConfigDict(extra='allow')" @@ -440,11 +525,11 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da typed_paths.append((c.name, c.data_type)) has_any_option = ( - opts.max_dynamic_paths is not None or - opts.max_dynamic_types is not None or - len(typed_paths) > 0 or - len(opts.skip_paths) > 0 or - len(opts.skip_regexps) > 0 + opts.max_dynamic_paths is not None + or opts.max_dynamic_types is not None + or len(typed_paths) > 0 + or len(opts.skip_paths) > 0 + or len(opts.skip_regexps) > 0 ) if not has_any_option: @@ -471,10 +556,7 @@ def py_type_to_column_type(t: type, mds: list[Any]) -> Tuple[bool, list[Any], Da ) if any(md == "ClickHouseNamedTuple" for md in mds): data_type = NamedTupleType( - fields=[( - column.name, - column.data_type - ) for column in columns], + fields=[(column.name, column.data_type) for column in columns], ) else: data_type = Nested( @@ -493,7 +575,7 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: """Convert Pydantic model fields to Column definitions.""" columns = [] # Get raw annotations from the model class to preserve type aliases - raw_annotations = getattr(model, '__annotations__', {}) + raw_annotations = getattr(model, "__annotations__", {}) for field_name, field_info in model.model_fields.items(): # Use raw annotation if available (preserves type aliases and their metadata) @@ -504,22 +586,24 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: primary_key, field_type = handle_key(field_type) is_jwt, field_type = handle_jwt(field_type) - optional, mds, data_type = py_type_to_column_type(field_type, field_info.metadata) + optional, mds, data_type = py_type_to_column_type( + field_type, field_info.metadata + ) annotations = [] for md in mds: - if isinstance(md, AggregateFunction) and all(key != "aggregationFunction" for (key, _) in annotations): - annotations.append( - ("aggregationFunction", md.to_dict()) - ) - if isinstance(md, SimpleAggregateFunction) and all(key != "simpleAggregationFunction" for (key, _) in annotations): - annotations.append( - ("simpleAggregationFunction", md.to_dict()) - ) - if md == "LowCardinality" and all(key != "LowCardinality" for (key, _) in annotations): - annotations.append( - ("LowCardinality", True) - ) + if isinstance(md, AggregateFunction) and all( + key != "aggregationFunction" for (key, _) in annotations + ): + annotations.append(("aggregationFunction", md.to_dict())) + if isinstance(md, SimpleAggregateFunction) and all( + key != "simpleAggregationFunction" for (key, _) in annotations + ): + annotations.append(("simpleAggregationFunction", md.to_dict())) + if md == "LowCardinality" and all( + key != "LowCardinality" for (key, _) in annotations + ): + annotations.append(("LowCardinality", True)) column_name = field_name if field_info.alias is None else field_info.alias @@ -552,7 +636,9 @@ def _to_columns(model: type[BaseModel]) -> list[Column]: class StringToEnumMixin: @classmethod - def __get_pydantic_core_schema__(cls, _source_type: Any, _handler: GetCoreSchemaHandler) -> CoreSchema: + def __get_pydantic_core_schema__( + cls, _source_type: Any, _handler: GetCoreSchemaHandler + ) -> CoreSchema: def validate(value: Any, _: Any) -> Any: if isinstance(value, str): try: @@ -561,14 +647,15 @@ def validate(value: Any, _: Any) -> Any: raise ValueError(f"Invalid enum name: {value}") return cls(value) # fallback to default enum validation - return core_schema.with_info_before_validator_function(validate, core_schema.enum_schema(cls, list(cls))) + return core_schema.with_info_before_validator_function( + validate, core_schema.enum_schema(cls, list(cls)) + ) def is_array_nested_type(data_type: DataType) -> bool: """Type guard to check if a data type is Array(Nested(...)).""" - return ( - isinstance(data_type, ArrayType) and - isinstance(data_type.element_type, Nested) + return isinstance(data_type, ArrayType) and isinstance( + data_type.element_type, Nested ) diff --git a/packages/py-moose-lib/moose_lib/dmv2/__init__.py b/packages/py-moose-lib/moose_lib/dmv2/__init__.py index cdc61e8f85..4aebad4940 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/__init__.py +++ b/packages/py-moose-lib/moose_lib/dmv2/__init__.py @@ -15,6 +15,10 @@ ZeroOrMany, ) +from .moose_model import ( + MooseModel, +) + from .olap_table import ( OlapConfig, OlapTable, @@ -114,88 +118,80 @@ __all__ = [ # Types - 'BaseTypedResource', - 'TypedMooseResource', - 'Columns', - 'T', - 'U', - 'T_none', - 'U_none', - 'ZeroOrMany', - + "BaseTypedResource", + "TypedMooseResource", + "Columns", + "MooseModel", + "T", + "U", + "T_none", + "U_none", + "ZeroOrMany", # OLAP Tables - 'OlapConfig', - 'OlapTable', - 'InsertOptions', - + "OlapConfig", + "OlapTable", + "InsertOptions", # Streams - 'StreamConfig', - 'TransformConfig', - 'ConsumerConfig', - 'Stream', - 'DeadLetterModel', - 'DeadLetterQueue', - 'SubjectLatest', - 'SubjectVersion', - 'SchemaById', - 'KafkaSchemaConfig', - + "StreamConfig", + "TransformConfig", + "ConsumerConfig", + "Stream", + "DeadLetterModel", + "DeadLetterQueue", + "SubjectLatest", + "SubjectVersion", + "SchemaById", + "KafkaSchemaConfig", # Ingestion - 'IngestConfig', - 'IngestConfigWithDestination', - 'IngestPipelineConfig', - 'IngestApi', - 'IngestPipeline', - + "IngestConfig", + "IngestConfigWithDestination", + "IngestPipelineConfig", + "IngestApi", + "IngestPipeline", # Consumption - 'ApiConfig', - 'Api', - 'get_moose_base_url', - 'set_moose_base_url', + "ApiConfig", + "Api", + "get_moose_base_url", + "set_moose_base_url", # Backward compatibility aliases (deprecated) - 'ConsumptionApi', - 'EgressConfig', - + "ConsumptionApi", + "EgressConfig", # SQL - 'SqlResource', - 'View', - 'MaterializedViewOptions', - 'MaterializedView', - + "SqlResource", + "View", + "MaterializedViewOptions", + "MaterializedView", # Workflow - 'TaskContext', - 'TaskConfig', - 'Task', - 'WorkflowConfig', - 'Workflow', - + "TaskContext", + "TaskConfig", + "Task", + "WorkflowConfig", + "Workflow", # Lifecycle - 'LifeCycle', - + "LifeCycle", # WebApp - 'WebApp', - 'WebAppConfig', - 'WebAppMetadata', - 'ApiUtil', - 'get_moose_utils', - 'get_moose_dependency', - + "WebApp", + "WebAppConfig", + "WebAppMetadata", + "ApiUtil", + "get_moose_utils", + "get_moose_dependency", # Registry - 'get_tables', - 'get_table', - 'get_streams', - 'get_stream', - 'get_ingest_apis', - 'get_ingest_api', - 'get_apis', - 'get_api', - 'get_sql_resources', - 'get_sql_resource', - 'get_workflows', - 'get_workflow', - 'get_web_apps', - 'get_web_app', + "get_tables", + "get_table", + "get_streams", + "get_stream", + "get_ingest_apis", + "get_ingest_api", + "get_apis", + "get_api", + "get_sql_resources", + "get_sql_resource", + "get_workflows", + "get_workflow", + "get_web_apps", + "get_web_app", # Backward compatibility aliases (deprecated) - 'get_consumption_apis', - 'get_consumption_api', + "get_consumption_apis", + "get_consumption_api", ] diff --git a/packages/py-moose-lib/moose_lib/dmv2/moose_model.py b/packages/py-moose-lib/moose_lib/dmv2/moose_model.py new file mode 100644 index 0000000000..8974c2936b --- /dev/null +++ b/packages/py-moose-lib/moose_lib/dmv2/moose_model.py @@ -0,0 +1,165 @@ +""" +MooseModel base class for data models with LSP-friendly column access. + +This module provides MooseModel, a Pydantic BaseModel subclass that adds +Column descriptors for each field, enabling LSP autocomplete when accessing +columns for SQL query construction. +""" + +from pydantic import BaseModel +from typing import TYPE_CHECKING +from ..data_models import Column, _to_columns + + +class ColsNamespace: + """ + Namespace object that provides column access via attributes. + + This is created at the class level for backward compatibility with + the existing table.cols.field_name pattern. + + Example: + >>> class User(MooseModel): + ... user_id: int + >>> User.cols.user_id # Returns Column object + """ + + def __init__(self, columns: list[Column]): + """ + Initialize cols namespace with columns. + + Args: + columns: List of Column objects for the model + """ + self._columns = {c.name: c for c in columns} + + # Set each column as an attribute for direct access + for col in columns: + setattr(self, col.name, col) + + def __getitem__(self, item: str) -> Column: + """ + Allow bracket notation access to columns. + + Args: + item: Column name + + Returns: + Column object + + Raises: + KeyError: If column name not found + + Example: + >>> User.cols['user_id'] # Returns Column object + """ + if item not in self._columns: + raise KeyError(f"{item} is not a valid column name") + return self._columns[item] + + def __getattr__(self, item: str) -> Column: + """ + Fallback for attribute access (shouldn't be needed due to setattr). + + Args: + item: Column name + + Returns: + Column object + + Raises: + AttributeError: If column name not found + """ + if item.startswith("_"): + # Allow access to private attributes + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{item}'" + ) + + if item in self._columns: + return self._columns[item] + raise AttributeError(f"{item} is not a valid column name") + + +class MooseModelMeta(type(BaseModel)): + """ + Metaclass for MooseModel that adds Column descriptors. + + This metaclass runs after Pydantic's metaclass creates the model class. + It adds Column objects as class attributes for each model field, enabling: + 1. Direct column access: Model.field_name returns Column object + 2. LSP autocomplete: LSP sees field_name from annotations + 3. Backward compatibility: Model.cols.field_name also works + + The Column descriptors coexist with Pydantic's instance fields because + Pydantic separates class-level attributes from instance-level fields. + """ + + def __new__(mcs, name, bases, namespace, **kwargs): + """ + Create new MooseModel class with Column descriptors. + + Args: + name: Name of the class being created + bases: Base classes + namespace: Class namespace dictionary + **kwargs: Additional keyword arguments + + Returns: + New class with Column descriptors added + """ + # Let Pydantic's metaclass create the class first + cls = super().__new__(mcs, name, bases, namespace, **kwargs) + + # Skip for MooseModel base class itself + if name == "MooseModel": + return cls + + # Add Column descriptors if this is a model with fields + if hasattr(cls, "model_fields") and cls.model_fields: + # Generate columns from model fields + columns = _to_columns(cls) + + # Add Column object for each field as class attribute + # This enables: Model.field_name → Column + for col in columns: + setattr(cls, col.name, col) + + # Add .cols namespace for backward compatibility + # This enables: Model.cols.field_name → Column + cls.cols = ColsNamespace(columns) + + return cls + + +class MooseModel(BaseModel, metaclass=MooseModelMeta): + """ + Base class for Moose data models with LSP-friendly column access. + + MooseModel extends Pydantic's BaseModel by adding Column descriptors + for each field, enabling autocomplete when constructing SQL queries. + + Usage Patterns: + + 1. Direct column access (NEW - with autocomplete): + >>> class User(MooseModel): + ... user_id: int + ... email: str + >>> query = f"SELECT {User.user_id:col}, {User.email:col} FROM users" + >>> # LSP provides autocomplete for User.user_id + + 2. Legacy .cols access (OLD - backward compatible): + >>> query = f"SELECT {User.cols.user_id} FROM users" + >>> # Works but requires type stubs for autocomplete + + 3. Pydantic instance behavior (unchanged): + >>> user = User(user_id=123, email="test@example.com") + >>> user.user_id # Returns 123 (int), not Column + + The metaclass ensures: + - Class attributes (Model.field) return Column objects + - Instance attributes (instance.field) return actual values + - Full Pydantic compatibility maintained + """ + + pass diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index 16d4097c7a..d8216fb9bd 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -4,6 +4,7 @@ This module provides classes for defining and configuring OLAP tables, particularly for ClickHouse. """ + import json import warnings from clickhouse_connect import get_client @@ -11,7 +12,17 @@ from clickhouse_connect.driver.exceptions import ClickHouseError from dataclasses import dataclass from pydantic import BaseModel -from typing import List, Optional, Any, Literal, Union, Tuple, TypeVar, Generic, Iterator +from typing import ( + List, + Optional, + Any, + Literal, + Union, + Tuple, + TypeVar, + Generic, + Iterator, +) from ..blocks import ClickHouseEngines, EngineConfig from ..commons import Logger from ..config.runtime import RuntimeClickHouseConfig @@ -33,6 +44,7 @@ class InsertOptions: validate: Whether to validate data against schema before insertion. skip_validation_on_retry: Whether to skip validation for individual records during retries. """ + allow_errors: Optional[int] = None allow_errors_ratio: Optional[float] = None strategy: Literal["fail-fast", "discard", "isolate"] = "fail-fast" @@ -49,6 +61,7 @@ class FailedRecord(Generic[T]): error: The error message describing why the insertion failed. index: Optional index of this record in the original batch. """ + record: T error: str index: Optional[int] = None @@ -64,6 +77,7 @@ class ValidationError: index: Optional index of this record in the original batch. path: Optional path to the field that failed validation. """ + record: Any error: str index: Optional[int] = None @@ -79,6 +93,7 @@ class ValidationResult(Generic[T]): invalid: Records that failed validation with detailed error information. total: Total number of records processed. """ + valid: List[T] invalid: List[ValidationError] total: int @@ -94,6 +109,7 @@ class InsertResult(Generic[T]): total: Total number of records processed. failed_records: Detailed information about failed records (if record isolation was used). """ + successful: int failed: int total: int @@ -153,13 +169,23 @@ class TableIndex(BaseModel): def model_post_init(self, __context): has_fields = bool(self.order_by_fields) - has_expr = isinstance(self.order_by_expression, str) and len(self.order_by_expression) > 0 + has_expr = ( + isinstance(self.order_by_expression, str) + and len(self.order_by_expression) > 0 + ) if has_fields and has_expr: - raise ValueError("Provide either order_by_fields or order_by_expression, not both.") + raise ValueError( + "Provide either order_by_fields or order_by_expression, not both." + ) # Validate that non-MergeTree engines don't have unsupported clauses if self.engine: - from ..blocks import S3Engine, S3QueueEngine, BufferEngine, DistributedEngine + from ..blocks import ( + S3Engine, + S3QueueEngine, + BufferEngine, + DistributedEngine, + ) # S3QueueEngine, BufferEngine, and DistributedEngine don't support ORDER BY # Note: S3Engine DOES support ORDER BY (unlike S3Queue) @@ -174,7 +200,12 @@ def model_post_init(self, __context): ) # All non-MergeTree engines don't support SAMPLE BY - engines_without_sample_by = (S3Engine, S3QueueEngine, BufferEngine, DistributedEngine) + engines_without_sample_by = ( + S3Engine, + S3QueueEngine, + BufferEngine, + DistributedEngine, + ) if isinstance(self.engine, engines_without_sample_by): engine_name = type(self.engine).__name__ @@ -186,7 +217,11 @@ def model_post_init(self, __context): # Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY # S3Engine DOES support PARTITION BY - engines_without_partition_by = (S3QueueEngine, BufferEngine, DistributedEngine) + engines_without_partition_by = ( + S3QueueEngine, + BufferEngine, + DistributedEngine, + ) if isinstance(self.engine, engines_without_partition_by): engine_name = type(self.engine).__name__ @@ -212,6 +247,7 @@ class OlapTable(TypedMooseResource, Generic[T]): model_type (type[T]): The Pydantic model associated with this table. kind: The kind of the table (e.g., "OlapTable"). """ + config: OlapConfig kind: str = "OlapTable" _memoized_client: Optional[Client] = None @@ -226,7 +262,15 @@ def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs): self.config = config self.metadata = config.metadata self._column_list = _to_columns(self._t) + + # Create Cols instance for backward compatibility + # This works for both BaseModel and MooseModel self._cols = Cols(self._column_list) + + # NOTE: For MooseModel types, columns are also accessible directly + # on the model class (e.g., MyModel.field_name) thanks to the metaclass. + # This provides LSP autocomplete without requiring .cols access. + registry_key = f"{name}_{config.version}" if config.version else name if registry_key in _tables: raise ValueError( @@ -243,13 +287,19 @@ def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs): ReplicatedSummingMergeTreeEngine, ) - if isinstance(config.engine, ( - ReplicatedMergeTreeEngine, - ReplicatedReplacingMergeTreeEngine, - ReplicatedAggregatingMergeTreeEngine, - ReplicatedSummingMergeTreeEngine, - )): - if config.engine.keeper_path is not None or config.engine.replica_name is not None: + if isinstance( + config.engine, + ( + ReplicatedMergeTreeEngine, + ReplicatedReplacingMergeTreeEngine, + ReplicatedAggregatingMergeTreeEngine, + ReplicatedSummingMergeTreeEngine, + ), + ): + if ( + config.engine.keeper_path is not None + or config.engine.replica_name is not None + ): raise ValueError( f"OlapTable {name}: Cannot specify both 'cluster' and explicit replication params " f"('keeper_path' or 'replica_name'). " @@ -286,7 +336,7 @@ def __init__(self, name: str, config: OlapConfig = OlapConfig(), **kwargs): f"Table '{name}' uses deprecated ClickHouseEngines enum. " f"Please migrate to engine configuration classes (e.g., {config.engine.value}Engine).", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) @property @@ -323,6 +373,7 @@ def _create_config_hash(self, clickhouse_config: RuntimeClickHouseConfig) -> str A 16-character hex hash of the configuration. """ import hashlib + config_string = ( f"{clickhouse_config.host}:{clickhouse_config.port}:" f"{clickhouse_config.username}:{clickhouse_config.password}:" @@ -361,7 +412,7 @@ def _get_memoized_client(self) -> Client: try: # Create new client with standard configuration - interface = 'https' if clickhouse_config.use_ssl else 'http' + interface = "https" if clickhouse_config.use_ssl else "http" client = get_client( interface=interface, host=clickhouse_config.host, @@ -404,7 +455,7 @@ def validate_record(self, record: Any) -> Tuple[Optional[T], Optional[str]]: Returns: Tuple of (validated_data, error_message). If validation succeeds, validated_data will be the validated record and error_message will be None. - If validation fails for any reason, validated_data will be None and error_message + If validation fails for any reason, validated_data will be None and error_message will contain the error details. """ try: @@ -430,23 +481,19 @@ def validate_records(self, data: List[Any]) -> ValidationResult[T]: if validated is not None: valid.append(validated) else: - invalid.append(ValidationError( - record=record, - error=error or "Validation failed", - index=i, - path="root" - )) - - return ValidationResult( - valid=valid, - invalid=invalid, - total=len(data) - ) + invalid.append( + ValidationError( + record=record, + error=error or "Validation failed", + index=i, + path="root", + ) + ) + + return ValidationResult(valid=valid, invalid=invalid, total=len(data)) def _validate_insert_parameters( - self, - data: Union[List[T], Iterator[T]], - options: Optional[InsertOptions] + self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions] ) -> Tuple[bool, str, bool]: """Validate input parameters and strategy compatibility. @@ -468,16 +515,18 @@ def _validate_insert_parameters( ) if is_stream and should_validate: - print("Warning: Validation is not supported with stream input. Validation will be skipped.") + print( + "Warning: Validation is not supported with stream input. Validation will be skipped." + ) return is_stream, strategy, should_validate def _perform_pre_insertion_validation( - self, - data: List[T], - should_validate: bool, - strategy: str, - options: Optional[InsertOptions] = None + self, + data: List[T], + should_validate: bool, + strategy: str, + options: Optional[InsertOptions] = None, ) -> Tuple[List[T], List[ValidationError]]: """Perform pre-insertion validation for array data. @@ -500,10 +549,7 @@ def _perform_pre_insertion_validation( if validation_errors: self._handle_validation_errors( - validation_errors, - strategy, - data, - options + validation_errors, strategy, data, options ) if strategy == "discard": @@ -522,11 +568,11 @@ def _perform_pre_insertion_validation( return data, [] def _handle_validation_errors( - self, - validation_errors: List[ValidationError], - strategy: str, - data: List[T], - options: Optional[InsertOptions] + self, + validation_errors: List[ValidationError], + strategy: str, + data: List[T], + options: Optional[InsertOptions], ) -> None: """Handle validation errors based on the specified strategy. @@ -542,17 +588,13 @@ def _handle_validation_errors( f"Validation failed for record at index {first_error.index}: {first_error.error}" ) elif strategy == "discard": - self._check_validation_thresholds( - validation_errors, - len(data), - options - ) + self._check_validation_thresholds(validation_errors, len(data), options) def _check_validation_thresholds( - self, - validation_errors: List[ValidationError], - total_records: int, - options: Optional[InsertOptions] + self, + validation_errors: List[ValidationError], + total_records: int, + options: Optional[InsertOptions], ) -> None: """Check if validation errors exceed configured thresholds. @@ -564,15 +606,21 @@ def _check_validation_thresholds( validation_failed_count = len(validation_errors) validation_failed_ratio = validation_failed_count / total_records - if (options and options.allow_errors is not None and - validation_failed_count > options.allow_errors): + if ( + options + and options.allow_errors is not None + and validation_failed_count > options.allow_errors + ): raise ValueError( f"Too many validation failures: {validation_failed_count} > {options.allow_errors}. " f"Errors: {', '.join(e.error for e in validation_errors)}" ) - if (options and options.allow_errors_ratio is not None and - validation_failed_ratio > options.allow_errors_ratio): + if ( + options + and options.allow_errors_ratio is not None + and validation_failed_ratio > options.allow_errors_ratio + ): raise ValueError( f"Validation failure ratio too high: {validation_failed_ratio:.3f} > " f"{options.allow_errors_ratio}. Errors: {', '.join(e.error for e in validation_errors)}" @@ -583,36 +631,44 @@ def _to_json_each_row(self, records: list[dict]) -> bytes: def _with_wait_end_settings(self, settings: dict) -> dict: """Add wait_end_of_query setting to ensure at least once delivery for INSERT operations. - + Args: settings: Base settings dictionary - + Returns: Settings dictionary with wait_end_of_query added """ return {**settings, "wait_end_of_query": 1} def _prepare_insert_options( - self, - table_name: str, - data: Union[List[T], Iterator[T]], - validated_data: List[T], - is_stream: bool, - strategy: str, - options: Optional[InsertOptions] + self, + table_name: str, + data: Union[List[T], Iterator[T]], + validated_data: List[T], + is_stream: bool, + strategy: str, + options: Optional[InsertOptions], ) -> tuple[str, bytes, dict]: """Prepare insert options for JSONEachRow raw SQL insert, returning settings dict.""" # Base settings for all inserts base_settings = { "date_time_input_format": "best_effort", - "max_insert_block_size": 100000 if is_stream else min(len(validated_data), 100000), + "max_insert_block_size": 100000 + if is_stream + else min(len(validated_data), 100000), "max_block_size": 65536, "async_insert": 1 if len(validated_data) > 1000 else 0, "wait_for_async_insert": 1, } settings = self._with_wait_end_settings(base_settings) - if (strategy == "discard" and options and - (options.allow_errors is not None or options.allow_errors_ratio is not None)): + if ( + strategy == "discard" + and options + and ( + options.allow_errors is not None + or options.allow_errors_ratio is not None + ) + ): if options.allow_errors is not None: settings["input_format_allow_errors_num"] = options.allow_errors if options.allow_errors_ratio is not None: @@ -625,7 +681,7 @@ def _prepare_insert_options( validated_data = [validated_data] dict_data = [] for record in validated_data: - if hasattr(record, 'model_dump'): + if hasattr(record, "model_dump"): record_dict = record.model_dump() else: record_dict = record @@ -637,13 +693,13 @@ def _prepare_insert_options( return quote_identifier(table_name), json_lines, settings def _create_success_result( - self, - data: Union[List[T], Iterator[T]], - validated_data: List[T], - validation_errors: List[ValidationError], - is_stream: bool, - should_validate: bool, - strategy: str + self, + data: Union[List[T], Iterator[T]], + validated_data: List[T], + validation_errors: List[ValidationError], + is_stream: bool, + should_validate: bool, + strategy: str, ) -> InsertResult[T]: """Create appropriate result based on input type. @@ -659,11 +715,7 @@ def _create_success_result( InsertResult with appropriate counts and error information. """ if is_stream: - return InsertResult( - successful=-1, - failed=0, - total=-1 - ) + return InsertResult(successful=-1, failed=0, total=-1) inserted_count = len(validated_data) total_processed = len(data) if not is_stream else inserted_count @@ -671,32 +723,30 @@ def _create_success_result( result = InsertResult( successful=inserted_count, failed=len(validation_errors) if should_validate else 0, - total=total_processed + total=total_processed, ) - if (should_validate and validation_errors and strategy == "discard"): + if should_validate and validation_errors and strategy == "discard": result.failed_records = [ FailedRecord( record=ve.record, error=f"Validation error: {ve.error}", - index=ve.index - ) for ve in validation_errors + index=ve.index, + ) + for ve in validation_errors ] return result def _retry_individual_records( - self, - client: Client, - records: List[T], - options: InsertOptions + self, client: Client, records: List[T], options: InsertOptions ) -> InsertResult[T]: successful: List[T] = [] failed: List[FailedRecord[T]] = [] table_name = quote_identifier(self._generate_table_name()) records_dict = [] for record in records: - if hasattr(record, 'model_dump'): + if hasattr(record, "model_dump"): record_dict = record.model_dump() else: record_dict = record @@ -705,51 +755,52 @@ def _retry_individual_records( RETRY_BATCH_SIZE = 10 for i in range(0, len(records_dict), RETRY_BATCH_SIZE): - batch = records_dict[i:i + RETRY_BATCH_SIZE] + batch = records_dict[i : i + RETRY_BATCH_SIZE] try: sql = f"INSERT INTO {table_name} FORMAT JSONEachRow" base_settings = { "date_time_input_format": "best_effort", "max_insert_block_size": RETRY_BATCH_SIZE, "max_block_size": RETRY_BATCH_SIZE, - "async_insert": 0 + "async_insert": 0, } settings = self._with_wait_end_settings(base_settings) json_lines = self._to_json_each_row(batch) client.command(sql, data=json_lines, settings=settings) - successful.extend(records[i:i + RETRY_BATCH_SIZE]) + successful.extend(records[i : i + RETRY_BATCH_SIZE]) except ClickHouseError as batch_error: for j, record_dict in enumerate(batch): try: sql = f"INSERT INTO {table_name} FORMAT JSONEachRow" - individual_settings = self._with_wait_end_settings({ - "date_time_input_format": "best_effort", - "async_insert": 0 - }) + individual_settings = self._with_wait_end_settings( + {"date_time_input_format": "best_effort", "async_insert": 0} + ) json_line = self._to_json_each_row([record_dict]) - client.command(sql, data=json_line, settings=individual_settings) + client.command( + sql, data=json_line, settings=individual_settings + ) successful.append(records[i + j]) except ClickHouseError as error: - failed.append(FailedRecord( - record=records[i + j], - error=str(error), - index=i + j - )) + failed.append( + FailedRecord( + record=records[i + j], error=str(error), index=i + j + ) + ) return InsertResult( successful=len(successful), failed=len(failed), total=len(records), - failed_records=failed if failed else None + failed_records=failed if failed else None, ) def _insert_array_data( - self, - client: Client, - table_name: str, - data: List[T], - should_validate: bool, - strategy: str, - options: Optional[InsertOptions] + self, + client: Client, + table_name: str, + data: List[T], + should_validate: bool, + strategy: str, + options: Optional[InsertOptions], ) -> InsertResult[T]: """Insert array data into the table with validation and error handling. @@ -765,19 +816,11 @@ def _insert_array_data( InsertResult with detailed success/failure information. """ validated_data, validation_errors = self._perform_pre_insertion_validation( - data, - should_validate, - strategy, - options + data, should_validate, strategy, options ) try: table_name, json_lines, settings = self._prepare_insert_options( - table_name, - data, - validated_data, - False, - strategy, - options + table_name, data, validated_data, False, strategy, options ) sql = f"INSERT INTO {table_name} FORMAT JSONEachRow" client.command(sql, data=json_lines, settings=settings) @@ -787,7 +830,7 @@ def _insert_array_data( validation_errors, False, should_validate, - strategy + strategy, ) except ClickHouseError as e: if strategy == "fail-fast": @@ -798,16 +841,16 @@ def _insert_array_data( return self._retry_individual_records( client, validated_data if not options.skip_validation_on_retry else data, - options + options, ) def _insert_stream( - self, - client: Client, - table_name: str, - data: Iterator[T], - strategy: str, - options: Optional[InsertOptions] + self, + client: Client, + table_name: str, + data: Iterator[T], + strategy: str, + options: Optional[InsertOptions], ) -> InsertResult[T]: """Insert data from an iterator into the table. @@ -825,17 +868,12 @@ def _insert_stream( total_inserted = 0 _, _, settings = self._prepare_insert_options( - table_name, - data, - [], - True, - strategy, - options + table_name, data, [], True, strategy, options ) for record in data: # Convert record to dict using model_dump if available - if hasattr(record, 'model_dump'): + if hasattr(record, "model_dump"): batch.append(record.model_dump()) else: batch.append(record) @@ -860,9 +898,7 @@ def _insert_stream( total_inserted += len(batch) return InsertResult( - successful=total_inserted, - failed=0, - total=total_inserted + successful=total_inserted, failed=0, total=total_inserted ) except ClickHouseError as e: if strategy == "fail-fast": @@ -870,9 +906,7 @@ def _insert_stream( raise ValueError(f"Too many errors during stream insert: {e}") def insert( - self, - data: Union[List[T], Iterator[T]], - options: Optional[InsertOptions] = None + self, data: Union[List[T], Iterator[T]], options: Optional[InsertOptions] = None ) -> InsertResult[T]: """Insert data into the table with validation and error handling. @@ -924,7 +958,9 @@ def user_stream(): ``` """ options = options or InsertOptions() - is_stream, strategy, should_validate = self._validate_insert_parameters(data, options) + is_stream, strategy, should_validate = self._validate_insert_parameters( + data, options + ) if (is_stream and not data) or (not is_stream and not data): return InsertResult(successful=0, failed=0, total=0) @@ -935,15 +971,12 @@ def user_stream(): return self._insert_stream(client, table_name, data, strategy, options) else: return self._insert_array_data( - client, - table_name, - data, - should_validate, - strategy, - options + client, table_name, data, should_validate, strategy, options ) - def _map_to_clickhouse_record(self, record: dict, columns: Optional[List[Column]] = None) -> dict: + def _map_to_clickhouse_record( + self, record: dict, columns: Optional[List[Column]] = None + ) -> dict: """ Recursively transforms a record to match ClickHouse's JSONEachRow requirements. @@ -972,8 +1005,9 @@ def _map_to_clickhouse_record(self, record: dict, columns: Optional[List[Column] if is_array_nested_type(data_type): # For Array(Nested(...)), wrap each item in its own array and recurse - if (isinstance(value, list) and - (len(value) == 0 or isinstance(value[0], dict))): + if isinstance(value, list) and ( + len(value) == 0 or isinstance(value[0], dict) + ): nested_columns = data_type.element_type.columns result[col.name] = [ [self._map_to_clickhouse_record(item, nested_columns)] @@ -982,7 +1016,9 @@ def _map_to_clickhouse_record(self, record: dict, columns: Optional[List[Column] elif is_nested_type(data_type): # For Nested struct (not array), recurse into it if value and isinstance(value, dict): - result[col.name] = self._map_to_clickhouse_record(value, data_type.columns) + result[col.name] = self._map_to_clickhouse_record( + value, data_type.columns + ) # All other types: leave as is return result diff --git a/packages/py-moose-lib/tests/test_backward_compatibility.py b/packages/py-moose-lib/tests/test_backward_compatibility.py new file mode 100644 index 0000000000..29dc776da7 --- /dev/null +++ b/packages/py-moose-lib/tests/test_backward_compatibility.py @@ -0,0 +1,85 @@ +""" +Tests ensuring MooseModel doesn't break existing BaseModel usage +""" + +from pydantic import BaseModel +from moose_lib.dmv2 import OlapTable, OlapConfig, MooseModel +from moose_lib.data_models import Column + + +def test_basemodel_olaptable_still_works(): + """Existing code using BaseModel should continue working""" + + class LegacyUser(BaseModel): + user_id: int + email: str + + # Old pattern still works + table = OlapTable[LegacyUser]("legacy_users") + + assert table.name == "legacy_users" + assert hasattr(table, "cols") + assert isinstance(table.cols.user_id, Column) + + +def test_moosemodel_and_basemodel_can_coexist(): + """Projects can mix MooseModel and BaseModel""" + + class NewModel(MooseModel): + new_field: int + + class OldModel(BaseModel): + old_field: str + + new_table = OlapTable[NewModel]("new_table") + old_table = OlapTable[OldModel]("old_table") + + # Both work + assert new_table.name == "new_table" + assert old_table.name == "old_table" + + # New model has direct column access + assert isinstance(NewModel.new_field, Column) + + # Old model doesn't (expected) + assert ( + not isinstance(OldModel.old_field, Column) + if hasattr(OldModel, "old_field") + else True + ) + + +def test_moosemodel_cols_matches_direct_access(): + """MooseModel.cols.field and MooseModel.field should return same Column""" + + class Analytics(MooseModel): + event_id: int + timestamp: str + + # Both access methods return the same Column + direct = Analytics.event_id + via_cols = Analytics.cols.event_id + + assert direct.name == via_cols.name + assert direct.data_type == via_cols.data_type + + +def test_existing_query_patterns_unchanged(): + """Existing query patterns should work identically""" + + class Metrics(MooseModel): + metric_id: int + value: float + + table = OlapTable[Metrics]("metrics") + + # Pattern 1: Using table.cols (existing pattern) + col_via_table = table.cols.metric_id + assert isinstance(col_via_table, Column) + + # Pattern 2: Using Model.cols (also existing) + col_via_model = Metrics.cols.metric_id + assert isinstance(col_via_model, Column) + + # Both are equivalent + assert col_via_table.name == col_via_model.name diff --git a/packages/py-moose-lib/tests/test_column_formatting.py b/packages/py-moose-lib/tests/test_column_formatting.py new file mode 100644 index 0000000000..c11d7ac4b1 --- /dev/null +++ b/packages/py-moose-lib/tests/test_column_formatting.py @@ -0,0 +1,80 @@ +"""Tests for Column string formatting and interpolation""" + +from moose_lib.data_models import Column + + +def test_column_str_returns_quoted_identifier(): + """Column.__str__() should return backtick-quoted identifier""" + col = Column( + name="user_id", + data_type="String", + required=True, + unique=False, + primary_key=False, + ) + + assert str(col) == "`user_id`" + + +def test_column_format_spec_col(): + """Column with :col format spec should return quoted identifier""" + col = Column( + name="email", data_type="String", required=True, unique=False, primary_key=False + ) + + result = f"{col:col}" + assert result == "`email`" + + +def test_column_format_spec_c(): + """Column with :c format spec should return quoted identifier""" + col = Column( + name="timestamp", + data_type="DateTime", + required=True, + unique=False, + primary_key=False, + ) + + result = f"{col:c}" + assert result == "`timestamp`" + + +def test_column_format_spec_empty(): + """Column with no format spec should return quoted identifier""" + col = Column( + name="count", data_type="Int64", required=True, unique=False, primary_key=False + ) + + result = f"{col}" + assert result == "`count`" + + +def test_column_with_special_chars(): + """Column names with hyphens should be quoted""" + col = Column( + name="user-id", + data_type="String", + required=True, + unique=False, + primary_key=False, + ) + + assert str(col) == "`user-id`" + + +def test_column_in_fstring_interpolation(): + """Column should work in f-string SQL construction""" + user_id_col = Column( + name="user_id", + data_type="String", + required=True, + unique=False, + primary_key=False, + ) + email_col = Column( + name="email", data_type="String", required=True, unique=False, primary_key=False + ) + + query = f"SELECT {user_id_col:col}, {email_col:col} FROM users" + assert query == "SELECT `user_id`, `email` FROM users" diff --git a/packages/py-moose-lib/tests/test_moose_model.py b/packages/py-moose-lib/tests/test_moose_model.py new file mode 100644 index 0000000000..c0fd932fa3 --- /dev/null +++ b/packages/py-moose-lib/tests/test_moose_model.py @@ -0,0 +1,153 @@ +"""Tests for MooseModel base class with column descriptors""" + +from pydantic import BaseModel +from moose_lib.dmv2.moose_model import MooseModel +from moose_lib.data_models import Column + + +def test_moosemodel_inherits_from_basemodel(): + """MooseModel should be a valid Pydantic BaseModel""" + + class User(MooseModel): + user_id: int + email: str + + # Should work as normal Pydantic model + instance = User(user_id=123, email="test@example.com") + assert instance.user_id == 123 + assert instance.email == "test@example.com" + + +def test_moosemodel_adds_column_descriptors(): + """MooseModel metaclass should add Column descriptors for each field""" + + class User(MooseModel): + user_id: int + email: str + age: int + + # Check Column descriptors exist at class level + assert hasattr(User, "user_id") + assert isinstance(User.user_id, Column) + assert User.user_id.name == "user_id" + + assert hasattr(User, "email") + assert isinstance(User.email, Column) + assert User.email.name == "email" + + assert hasattr(User, "age") + assert isinstance(User.age, Column) + assert User.age.name == "age" + + +def test_moosemodel_column_format_spec(): + """Column descriptors should support format specs""" + + class Product(MooseModel): + product_id: int + product_name: str + + # Test format spec + result = f"{Product.product_id:col}" + assert result == "`product_id`" + + result = f"{Product.product_name:c}" + assert result == "`product_name`" + + +def test_moosemodel_adds_cols_property(): + """MooseModel should add .cols property for backward compatibility""" + + class Order(MooseModel): + order_id: int + total: float + + # Check .cols property exists + assert hasattr(Order, "cols") + assert hasattr(Order.cols, "order_id") + assert hasattr(Order.cols, "total") + + # Verify .cols.field returns Column + assert isinstance(Order.cols.order_id, Column) + assert Order.cols.order_id.name == "order_id" + + +def test_moosemodel_instance_attributes_separate(): + """Instance attributes should be separate from class Column descriptors""" + + class User(MooseModel): + user_id: int + email: str + + # Class level: Column objects + assert isinstance(User.user_id, Column) + + # Instance level: actual values + instance = User(user_id=456, email="user@test.com") + assert instance.user_id == 456 + assert isinstance(instance.user_id, int) + assert instance.email == "user@test.com" + + +def test_moosemodel_backward_compatible_with_basemodel(): + """MooseModel should be usable wherever BaseModel is expected""" + + class User(MooseModel): + user_id: int + email: str + + # Check it's a BaseModel subclass + assert issubclass(User, BaseModel) + + # Check Pydantic features work + assert hasattr(User, "model_fields") + assert hasattr(User, "model_validate") + assert hasattr(User, "model_dump") + + instance = User(user_id=789, email="another@test.com") + dumped = instance.model_dump() + assert dumped == {"user_id": 789, "email": "another@test.com"} + + +def test_moosemodel_empty_model(): + """MooseModel should handle models with no fields""" + + class EmptyModel(MooseModel): + pass + + # Should not crash + instance = EmptyModel() + assert instance is not None + + +def test_moosemodel_cols_bracket_access(): + """MooseModel.cols should support bracket notation""" + + class User(MooseModel): + user_id: int + email: str + + # Bracket access + col = User.cols["user_id"] + assert isinstance(col, Column) + assert col.name == "user_id" + + col2 = User.cols["email"] + assert col2.name == "email" + + +def test_moosemodel_in_sql_fstring(): + """MooseModel columns should work in SQL f-strings""" + + class Analytics(MooseModel): + event_id: int + timestamp: str + value: float + + # Test complete SQL construction + query = f"SELECT {Analytics.event_id:col}, {Analytics.timestamp:col}, {Analytics.value:col} FROM analytics WHERE {Analytics.event_id:col} > 100" + + expected = ( + "SELECT `event_id`, `timestamp`, `value` FROM analytics WHERE `event_id` > 100" + ) + assert query == expected diff --git a/packages/py-moose-lib/tests/test_olap_table_moosemodel.py b/packages/py-moose-lib/tests/test_olap_table_moosemodel.py new file mode 100644 index 0000000000..85741ced15 --- /dev/null +++ b/packages/py-moose-lib/tests/test_olap_table_moosemodel.py @@ -0,0 +1,89 @@ +"""Tests for OlapTable with MooseModel integration""" + +from moose_lib.dmv2 import OlapTable, OlapConfig, MooseModel +from moose_lib.data_models import Column + + +def test_olaptable_works_with_moosemodel(): + """OlapTable should accept MooseModel types""" + + class User(MooseModel): + user_id: int + email: str + + table = OlapTable[User]("users", OlapConfig()) + + assert table.name == "users" + assert table.model_type == User + + +def test_olaptable_moosemodel_direct_column_access(): + """OlapTable with MooseModel should enable direct column access via model""" + + class Product(MooseModel): + product_id: int + name: str + price: float + + table = OlapTable[Product]("products") + + # Access columns through the model class + assert isinstance(Product.product_id, Column) + assert Product.product_id.name == "product_id" + + # Should work in f-strings + query = f"SELECT {Product.product_id:col}, {Product.name:col} FROM {table.name}" + assert query == "SELECT `product_id`, `name` FROM products" + + +def test_olaptable_moosemodel_cols_backward_compat(): + """OlapTable with MooseModel should maintain .cols backward compatibility""" + + class Order(MooseModel): + order_id: int + total: float + + table = OlapTable[Order]("orders") + + # OLD pattern still works + assert hasattr(Order, "cols") + assert isinstance(Order.cols.order_id, Column) + + # Can use in queries + query = f"SELECT {Order.cols.order_id} FROM orders" + assert "`order_id`" in query + + +def test_olaptable_with_basemodel_still_works(): + """OlapTable should still work with regular BaseModel (backward compat)""" + + from pydantic import BaseModel + + class LegacyModel(BaseModel): + legacy_id: int + legacy_name: str + + # Should not crash + table = OlapTable[LegacyModel]("legacy") + + # Old .cols pattern should still work + assert hasattr(table, "cols") + + # Note: LegacyModel.legacy_id won't be a Column (no metaclass) + # This is expected - only MooseModel gets the new feature + + +def test_olaptable_model_property(): + """OlapTable should provide access to the model class""" + + class Analytics(MooseModel): + event_id: int + timestamp: str + + table = OlapTable[Analytics]("analytics") + + # Should be able to access model type + assert table.model_type == Analytics + + # Can use for column access + assert isinstance(table.model_type.event_id, Column) diff --git a/templates/python-experimental/.gitignore b/templates/python-experimental/.gitignore new file mode 100644 index 0000000000..19dcb0d162 --- /dev/null +++ b/templates/python-experimental/.gitignore @@ -0,0 +1,57 @@ +# Moose specific +.moose +.sloan + +# Python bytecode and cache +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.hypothesis +.coverage + +# Python virtual environments +.Python +env +.venv +venv +ENV +env.bak + +# IDE and editor files +.spyderproject +.ropeproject +.idea +*.ipynb_checkpoints +.cache +.cursor + +# Build and distribution +*.so +*.egg +*.egg-info +dist +build +develop-eggs +downloads +eggs +lib +lib64 +parts +sdist +var +wheels +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Coverage reports +cover +*.cover + +# OS specific +.DS_Store + diff --git a/templates/python-experimental/.vscode/extensions.json b/templates/python-experimental/.vscode/extensions.json new file mode 100644 index 0000000000..dbc86167a6 --- /dev/null +++ b/templates/python-experimental/.vscode/extensions.json @@ -0,0 +1,9 @@ +{ + "recommendations": [ + "frigus02.vscode-sql-tagged-template-literals-syntax-only", + "mtxr.sqltools", + "ultram4rine.sqltools-clickhouse-driver", + "jeppeandersen.vscode-kafka", + "rangav.vscode-thunder-client" + ] +} diff --git a/templates/python-experimental/.vscode/settings.json b/templates/python-experimental/.vscode/settings.json new file mode 100644 index 0000000000..48a4637229 --- /dev/null +++ b/templates/python-experimental/.vscode/settings.json @@ -0,0 +1,17 @@ +{ + "sqltools.connections": [ + { + "server": "localhost", + "port": 18123, + "useHTTPS": false, + "database": "local", + "username": "panda", + "enableTls": false, + "password": "pandapass", + "driver": "ClickHouse", + "name": "moose clickhouse" + } + ], + "python.analysis.extraPaths": [".moose/versions"], + "python.analysis.typeCheckingMode": "basic" +} diff --git a/templates/python-experimental/README.md b/templates/python-experimental/README.md new file mode 100644 index 0000000000..9de49dd7ff --- /dev/null +++ b/templates/python-experimental/README.md @@ -0,0 +1,104 @@ +# Template: Python Experimental + +This is an experimental Python-based Moose template that demonstrates the new **MooseModel** feature, which provides LSP autocomplete for column names when constructing SQL queries. + +## What's New: MooseModel Autocomplete + +This template showcases the new `MooseModel` base class that enables IDE autocomplete for column names: + +```python +from moose_lib import MooseModel + +class BarAggregated(MooseModel): + day_of_month: int + total_rows: int + rows_with_text: int + +# NEW: Direct column access with autocomplete! +query = f""" +SELECT {BarAggregated.day_of_month:col}, + {BarAggregated.total_rows:col} +FROM bar_aggregated +""" +``` + +See `app/apis/bar.py` for complete examples using both f-string and Query builder patterns. + +[![PyPI Version](https://img.shields.io/pypi/v/moose-cli?logo=python)](https://pypi.org/project/moose-cli/) +[![Moose Community](https://img.shields.io/badge/slack-moose_community-purple.svg?logo=slack)](https://join.slack.com/t/moose-community/shared_invite/zt-2fjh5n3wz-cnOmM9Xe9DYAgQrNu8xKxg) +[![Docs](https://img.shields.io/badge/quick_start-docs-blue.svg)](https://docs.fiveonefour.com/moose/getting-started/quickstart) +[![MIT license](https://img.shields.io/badge/license-MIT-yellow.svg)](LICENSE) + +## Getting Started + +### Prerequisites + +* [Docker Desktop](https://www.docker.com/products/docker-desktop/) +* [Python](https://www.python.org/downloads/) (version 3.8+) +* [An Anthropic API Key](https://docs.anthropic.com/en/api/getting-started) +* [Cursor](https://www.cursor.com/) or [Claude Desktop](https://claude.ai/download) + +### Installation + +**⚠️ Important:** This experimental template requires the latest development version of `moose_lib` with MooseModel support. + +#### Option 1: Install from Local Development (Recommended for testing) + +If you're working from the moosestack repository: + +```bash +# 1. Create your project +moose init --template python-experimental + +# 2. Navigate to project +cd + +# 3. Create and activate virtual environment +python3 -m venv .venv +source .venv/bin/activate + +# 4. Install the local py-moose-lib in development mode +pip install -e ../../packages/py-moose-lib + +# 5. Install other dependencies +pip install -r requirements.txt + +# 6. Run Moose +moose dev +``` + +#### Option 2: Wait for Published Release + +The MooseModel feature will be available in the next published release of `moose-cli`. Once published, you can install normally: + +```bash +pip install moose-cli +moose init --template python-experimental +cd +pip install -r requirements.txt +moose dev +``` + +You are ready to go! You can start editing the app by modifying primitives in the `app` subdirectory. + +## Learn More + +To learn more about Moose, take a look at the following resources: + +- [Moose Documentation](https://docs.fiveonefour.com/moose) - learn about Moose. +- [Sloan Documentation](https://docs.fiveonefour.com/sloan) - learn about Sloan, the MCP interface for data engineering. + +## Community + +You can join the Moose community [on Slack](https://join.slack.com/t/moose-community/shared_invite/zt-2fjh5n3wz-cnOmM9Xe9DYAgQrNu8xKxg). Check out the [MooseStack repo on GitHub](https://github.com/514-labs/moosestack). + +## Deploy on Boreal + +The easiest way to deploy your MooseStack Applications is to use [Boreal](https://www.fiveonefour.com/boreal) from 514 Labs, the creators of Moose. + +[Sign up](https://www.boreal.cloud/sign-up). + +## License + +This template is MIT licensed. + diff --git a/templates/python-experimental/app/__init__.py b/templates/python-experimental/app/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-experimental/app/apis/__init__.py b/templates/python-experimental/app/apis/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-experimental/app/apis/bar.py b/templates/python-experimental/app/apis/bar.py new file mode 100644 index 0000000000..9f7f3c0884 --- /dev/null +++ b/templates/python-experimental/app/apis/bar.py @@ -0,0 +1,147 @@ +# This file is where you can define your API templates for consuming your data +# The implementation has been moved to FastAPI routes in main.py + +from moose_lib import MooseClient, Api, MooseCache, Query, and_ +from pydantic import BaseModel, Field +from typing import Optional, Literal +from app.views.bar_aggregated import barAggregatedMV +from datetime import datetime, timezone + + +# Query params are defined as Pydantic models and are validated automatically +class QueryParams(BaseModel): + order_by: Optional[ + Literal["total_rows", "rows_with_text", "max_text_length", "total_text_length"] + ] = Field( + default="total_rows", + description="Must be one of: total_rows, rows_with_text, max_text_length, total_text_length", + ) + limit: Optional[int] = Field( + default=5, gt=0, le=100, description="Must be between 1 and 100" + ) + start_day: Optional[int] = Field( + default=1, gt=0, le=31, description="Must be between 1 and 31" + ) + end_day: Optional[int] = Field( + default=31, gt=0, le=31, description="Must be between 1 and 31" + ) + + +class QueryResult(BaseModel): + day_of_month: int + total_rows: int + rows_with_text: int + max_text_length: int + total_text_length: int + + +## The run function is where you can define your API logic +def run(client: MooseClient, params: QueryParams): + # Create a cache + cache = MooseCache() + cache_key = ( + f"bar:{params.order_by}:{params.limit}:{params.start_day}:{params.end_day}" + ) + + # Check for cached query results + cached_result = cache.get(cache_key, type_hint=list) + if cached_result and len(cached_result) > 0: + return cached_result + + # Import BarAggregated model for column autocomplete + from app.views.bar_aggregated import BarAggregated + + # NEW PATTERN: Use MooseModel for direct column access with autocomplete! + # LSP provides autocomplete when typing BarAggregated. + + # For dynamic column selection, we need to use .cols and format it + order_by_col = BarAggregated.cols[params.order_by] + + query = f""" + SELECT + {BarAggregated.day_of_month:col}, + {BarAggregated.total_rows:col}, + {BarAggregated.rows_with_text:col}, + {BarAggregated.max_text_length:col}, + {BarAggregated.total_text_length:col} + FROM {{table}} + WHERE {BarAggregated.day_of_month:col} >= {{start_day}} + AND {BarAggregated.day_of_month:col} <= {{end_day}} + ORDER BY {order_by_col:col} DESC + LIMIT {{limit}} + """ + + result = client.query.execute( + query, + { + "table": barAggregatedMV.target_table, + "start_day": params.start_day, + "end_day": params.end_day, + "limit": params.limit, + }, + ) + + # Cache query results + cache.set(cache_key, result, 3600) # Cache for 1 hour + + return result + + +def run_v1(client: MooseClient, params: QueryParams): + # Create a cache + cache = MooseCache() + cache_key = ( + f"bar:v1:{params.order_by}:{params.limit}:{params.start_day}:{params.end_day}" + ) + + # Check for cached query results + cached_result = cache.get(cache_key, type_hint=list) + if cached_result and len(cached_result) > 0: + return cached_result + + # Import BarAggregated model for column autocomplete + from app.views.bar_aggregated import BarAggregated + + # NEW PATTERN: Direct column access with Query builder + # LSP provides autocomplete when typing BarAggregated. + query = ( + Query() + .select( + BarAggregated.day_of_month, + BarAggregated.total_rows, + BarAggregated.rows_with_text, + BarAggregated.max_text_length, + BarAggregated.total_text_length, + ) + .from_(barAggregatedMV.target_table) + .where( + and_( + BarAggregated.day_of_month.to_expr().ge(params.start_day), + BarAggregated.day_of_month.to_expr().le(params.end_day), + ) + ) + .order_by((BarAggregated.cols[params.order_by], "desc")) + .limit(params.limit) + ) + result = client.query.execute(query) + + # V1 specific: Add metadata + for item in result: + item["metadata"] = { + "version": "1.0", + "query_params": { + "order_by": params.order_by, + "limit": params.limit, + "start_day": params.start_day, + "end_day": params.end_day, + }, + } + + # Cache query results + cache.set(cache_key, result, 3600) # Cache for 1 hour + + return result + + +bar = Api[QueryParams, QueryResult](name="bar", query_function=run) +bar_v1 = Api[QueryParams, QueryResult](name="bar", query_function=run_v1, version="1") diff --git a/templates/python-experimental/app/ingest/__init__.py b/templates/python-experimental/app/ingest/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-experimental/app/ingest/models.py b/templates/python-experimental/app/ingest/models.py new file mode 100644 index 0000000000..7a78ef7745 --- /dev/null +++ b/templates/python-experimental/app/ingest/models.py @@ -0,0 +1,50 @@ +# This file was auto-generated by the framework. You can add data models or change the existing ones + +from moose_lib import ( + Key, + IngestPipeline, + IngestPipelineConfig, + StringToEnumMixin, + MooseModel, +) +from datetime import datetime +from typing import Optional, Annotated, Any +from pydantic import BaseModel, BeforeValidator +from enum import IntEnum, auto + + +class Baz(StringToEnumMixin, IntEnum): + QUX = auto() + QUUX = auto() + + +# Using MooseModel for LSP autocomplete on column names +class Foo(MooseModel): + primary_key: Key[str] + timestamp: float + baz: Baz + optional_text: Optional[str] = None + + +# Using MooseModel for LSP autocomplete on column names +class Bar(MooseModel): + primary_key: Key[str] + utc_timestamp: datetime + baz: Baz + has_text: bool + text_length: int + + +fooModel = IngestPipeline[Foo]( + "Foo", + IngestPipelineConfig( + ingest_api=True, stream=True, table=False, dead_letter_queue=True + ), +) + +barModel = IngestPipeline[Bar]( + "Bar", + IngestPipelineConfig( + ingest_api=False, stream=True, table=True, dead_letter_queue=True + ), +) diff --git a/templates/python-experimental/app/ingest/transforms.py b/templates/python-experimental/app/ingest/transforms.py new file mode 100644 index 0000000000..f020f0919e --- /dev/null +++ b/templates/python-experimental/app/ingest/transforms.py @@ -0,0 +1,69 @@ +from app.ingest.models import fooModel, barModel, Foo, Bar +from moose_lib import DeadLetterQueue, DeadLetterModel, TransformConfig, MooseCache +from datetime import datetime + + +def foo_to_bar(foo: Foo): + + """Transform Foo events to Bar events with error handling and caching. + + Normal flow: + 1. Check cache for previously processed events + 2. Transform Foo to Bar + 3. Cache the result + 4. Return transformed Bar event + + Alternate flow (DLQ): + - If errors occur during transformation, the event is sent to DLQ + - This enables separate error handling, monitoring, and retry strategies + """ + + # Create a cache + cache = MooseCache() + cache_key = f"foo_to_bar:{foo.primary_key}" + + # Checked for cached transformation result + cached_result = cache.get(cache_key, type_hint=Bar) + if cached_result: + return cached_result + + if foo.timestamp == 1728000000.0: # magic value to test the dead letter queue + raise ValueError("blah") + result = Bar( + primary_key=foo.primary_key, + baz=foo.baz, + utc_timestamp=datetime.fromtimestamp(foo.timestamp), + has_text=foo.optional_text is not None, + text_length=len(foo.optional_text) if foo.optional_text else 0 + ) + + # Store the result in cache + cache.set(cache_key, result, 3600) # Cache for 1 hour + return result + + +# Transform Foo events to Bar events +fooModel.get_stream().add_transform( + destination=barModel.get_stream(), + transformation=foo_to_bar, +) + + +# Add a streaming consumer to print Foo events +def print_foo_event(foo): + print(f"Received Foo event:") + print(f" Primary Key: {foo.primary_key}") + print(f" Timestamp: {datetime.fromtimestamp(foo.timestamp)}") + print(f" Optional Text: {foo.optional_text or 'None'}") + print("---") + + +fooModel.get_stream().add_consumer(print_foo_event) + +# DLQ consumer for handling failed events (alternate flow) +def print_messages(dead_letter: DeadLetterModel[Foo]): + print("dead letter:", dead_letter) + print("foo in dead letter:", dead_letter.as_typed()) + + +fooModel.get_dead_letter_queue().add_consumer(print_messages) diff --git a/templates/python-experimental/app/main.py b/templates/python-experimental/app/main.py new file mode 100644 index 0000000000..5d8c10d966 --- /dev/null +++ b/templates/python-experimental/app/main.py @@ -0,0 +1,5 @@ +# This file was auto-generated by the framework. You can add data models or change the existing ones +from app.ingest import models, transforms +import app.apis.bar as bar_api +import app.views.bar_aggregated as bar_view +from app.workflows.generator import ingest_workflow, ingest_task \ No newline at end of file diff --git a/templates/python-experimental/app/views/__init__.py b/templates/python-experimental/app/views/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-experimental/app/views/bar_aggregated.py b/templates/python-experimental/app/views/bar_aggregated.py new file mode 100644 index 0000000000..b19ae12107 --- /dev/null +++ b/templates/python-experimental/app/views/bar_aggregated.py @@ -0,0 +1,36 @@ +# This block is used to aggregate the data from the Bar table into a materialized view +from moose_lib.dmv2 import MaterializedView, MaterializedViewOptions +from moose_lib import MooseModel +from app.ingest.models import barModel + + +# Using MooseModel for LSP autocomplete on column names +class BarAggregated(MooseModel): + day_of_month: int + total_rows: int + rows_with_text: int + total_text_length: int + max_text_length: int + + +# The query to create the materialized view, which is executed when the block is set up +select_query = """ +SELECT + toDayOfMonth(utc_timestamp) as day_of_month, + count(primary_key) as total_rows, + countIf(has_text) as rows_with_text, + sum(text_length) as total_text_length, + max(text_length) as max_text_length +FROM Bar +GROUP BY toDayOfMonth(utc_timestamp) +""" + +barAggregatedMV = MaterializedView[BarAggregated]( + MaterializedViewOptions( + select_statement=select_query, + select_tables=[barModel.table], + table_name="bar_aggregated", + materialized_view_name="bar_aggregated_mv", + order_by_fields=["day_of_month"], + ) +) diff --git a/templates/python-experimental/app/workflows/__init__.py b/templates/python-experimental/app/workflows/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/templates/python-experimental/app/workflows/generator.py b/templates/python-experimental/app/workflows/generator.py new file mode 100644 index 0000000000..29394c359c --- /dev/null +++ b/templates/python-experimental/app/workflows/generator.py @@ -0,0 +1,52 @@ +from moose_lib import Task, TaskConfig, Workflow, WorkflowConfig, OlapTable, InsertOptions, Key, TaskContext +from pydantic import BaseModel +from datetime import datetime +from faker import Faker +from app.ingest.models import Foo, Baz +import requests + +class FooWorkflow(BaseModel): + id: Key[str] + success: bool + message: str + +workflow_table = OlapTable[FooWorkflow]("foo_workflow") + +def run_task(ctx: TaskContext[None]) -> None: + fake = Faker() + for i in range(1000): + # Prepare request data + foo = Foo( + primary_key=fake.uuid4(), + timestamp=fake.date_time_between(start_date='-1y', end_date='now').timestamp(), + baz=fake.random_element(Baz), + optional_text=fake.text() if fake.boolean() else None + ) + + # POST record to Moose Ingest API + req = requests.post( + "http://localhost:4000/ingest/Foo", + data=foo.model_dump_json().encode('utf-8'), + headers={'Content-Type': 'application/json'} + ) + + if req.status_code == 200: + workflow_table.insert([{"id": "1", "success": True, "message": f"Inserted Foo with primary key: {foo.primary_key}"}]) + else: + workflow_table.insert([{"id": "1", "success": False, "message": f"Failed to insert Foo with error: {req.status_code}"}]) + +ingest_task = Task[None, None]( + name="task", + config=TaskConfig(run=run_task) +) + +ingest_workflow = Workflow( + name="generator", + config=WorkflowConfig( + starting_task=ingest_task, + retries=3, + timeout="30s", + # uncomment if you want to run it automatically on a schedule + # schedule="@every 5s", + ) +) diff --git a/templates/python-experimental/moose.config.toml b/templates/python-experimental/moose.config.toml new file mode 100644 index 0000000000..3257372ff2 --- /dev/null +++ b/templates/python-experimental/moose.config.toml @@ -0,0 +1,57 @@ +language = "Python" + +[redpanda_config] +broker = "localhost:19092" +message_timeout_ms = 1000 +retention_ms = 30000 +replication_factor = 1 + +[clickhouse_config] +db_name = "local" +user = "panda" +password = "pandapass" +use_ssl = false +host = "localhost" +host_port = 18123 +native_port = 9000 +# additional_databases = ["analytics", "staging"] # Optional: Additional databases to create on startup + +[http_server_config] +host = "localhost" +port = 4000 +management_port = 5001 + +[redis_config] +url = "redis://127.0.0.1:6379" +key_prefix = "MS" + +[git_config] +main_branch_name = "main" + +[temporal_config] +db_user = "temporal" +db_password = "temporal" +db_port = 5432 +temporal_host = "localhost" +temporal_port = 7233 +temporal_version = "1.22.3" +admin_tools_version = "1.22.3" +ui_version = "2.21.3" +ui_port = 8080 +ui_cors_origins = "http://localhost:3000" +config_path = "config/dynamicconfig/development-sql.yaml" +postgresql_version = "13" +client_cert = "" +client_key = "" +ca_cert = "" +api_key = "" + +[supported_old_versions] + +[authentication] + +[features] +streaming_engine = true +workflows = true +data_model_v2 = true +apis = true diff --git a/templates/python-experimental/requirements.txt b/templates/python-experimental/requirements.txt new file mode 100644 index 0000000000..df5e13942d --- /dev/null +++ b/templates/python-experimental/requirements.txt @@ -0,0 +1,7 @@ +kafka-python-ng==2.2.2 +clickhouse-connect==0.7.16 +requests==2.32.4 +moose-cli +moose-lib +faker +sqlglot[rs]>=27.16.3 \ No newline at end of file diff --git a/templates/python-experimental/setup.py b/templates/python-experimental/setup.py new file mode 100644 index 0000000000..36c813a049 --- /dev/null +++ b/templates/python-experimental/setup.py @@ -0,0 +1,13 @@ + +from setuptools import setup +import os + +requirements_path = os.path.join(os.path.dirname(__file__), "requirements.txt") +with open(requirements_path, "r") as f: + requirements = f.read().splitlines() + +setup( + name='py', + version='0.0', + install_requires=requirements, +) diff --git a/templates/python-experimental/template.config.toml b/templates/python-experimental/template.config.toml new file mode 100644 index 0000000000..5288b3299f --- /dev/null +++ b/templates/python-experimental/template.config.toml @@ -0,0 +1,33 @@ +language = "python" # Must be typescript or python +description = "Experimental Python project demonstrating MooseModel with LSP autocomplete for column names." +post_install_print = """ +⚠️ EXPERIMENTAL TEMPLATE - REQUIRES DEV VERSION ⚠️ + +This template uses the new MooseModel feature with LSP autocomplete. +It requires the development version of moose_lib. + +--------------------------------------------------------- + +📂 Go to your project directory: + $ cd {project_dir} + +🥄 Create a virtual environment: + $ python3 -m venv .venv + $ source .venv/bin/activate + +⚡ Install LOCAL py-moose-lib in development mode: + $ pip install -e /packages/py-moose-lib + +📦 Install other dependencies: + $ pip install -r ./requirements.txt + +🛠️ Start Moose Server: + $ moose dev + +--------------------------------------------------------- + +📖 See README.md for detailed setup instructions. + +Deploy on Boreal: https://boreal.cloud +""" +default_sloan_telemetry="standard" From ec7b83840e973fd0f7e00f08009133a778521dbd Mon Sep 17 00:00:00 2001 From: fiveonefour-github-bot Date: Sun, 23 Nov 2025 16:28:31 -0500 Subject: [PATCH 41/59] Add release notes for November 22, 2025 (#3028) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Auto-generated release notes for November 22, 2025. This PR adds: - New release notes file: `2025-11-22.mdx` - Updated `_meta.tsx` with new entry - Updated `index.mdx` with link to new release notes The release notes were automatically generated from commits across the moosestack, registry, and commercial repositories. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- > [!NOTE] > Adds November 22, 2025 release notes and updates release notes index and metadata. > > - **Docs**: > - **New page**: `apps/framework-docs/src/pages/release-notes/2025-11-22.mdx` with highlights: > - `moose query` CLI for SQL execution/formatting/code-gen > - ClickHouse cluster support with `ON CLUSTER` migrations > - Boreal database performance metrics visualization > - MooseStack and Boreal bug fixes/improvements > - **Navigation/Meta**: > - Add `2025-11-22` entry to `apps/framework-docs/src/pages/release-notes/_meta.tsx` > - Add link to `2025-11-22` in `apps/framework-docs/src/pages/release-notes/index.mdx` > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit ec5acb5d19dea2d386b7a1fde960b2f85afe974c. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: Release Notes Bot Co-authored-by: Dave Seleno <958603+onelesd@users.noreply.github.com> Co-authored-by: Johanan Ottensooser --- .../release-notes/boreal-hosting-telem.png | Bin 0 -> 151318 bytes .../src/pages/release-notes/2025-11-22.mdx | 110 ++++++++++++++++++ .../src/pages/release-notes/_meta.tsx | 3 + .../src/pages/release-notes/index.mdx | 1 + 4 files changed, 114 insertions(+) create mode 100644 apps/framework-docs/public/release-notes/boreal-hosting-telem.png create mode 100644 apps/framework-docs/src/pages/release-notes/2025-11-22.mdx diff --git a/apps/framework-docs/public/release-notes/boreal-hosting-telem.png b/apps/framework-docs/public/release-notes/boreal-hosting-telem.png new file mode 100644 index 0000000000000000000000000000000000000000..e99ad7b39156667d381e57fc2becc8249f3cb722 GIT binary patch literal 151318 zcmeFZWn7ip9xY5scQ;6PcQ-06-QC>{(g;XOH>jw9bV*8gD%~l9q?91MbM13>oUMDm z-|mO|yYhj@#az$qe~dBae5$G}kA_T)3}w5 z(%0j{AKq#RZL_{nBb1wrD|U8myR_kY=Jnz0+4bEDw;s=4YaPz7y7}jvo6%5;4n#Q-R=;@1lA`ZjpQg;HCSk0Gqb2=kk^oJqA?#G@vS=T2$=(K~gleA3nMWKu>%4lJ- zQk|x0G+K*+cZ=p@!tX<7E1xv%&?+{HgjMb3@%tlh2sREfFHN@B@7en9Eex%(U6XKl z>Lc{gZns1+c$A!zM!h*xfv{2fnCltD)neqogQg?(E2B zYT;~V$>!te0x19`;v)ne9W6afDSaFroZN+cM5%r&Aq1WwAG1?Y{#L}pUX)5lNtIIC z+0BxYkBx(kgGvmUl9E!y&B99Pv5ee*zYhK}Ji*DJUq& z&cVgb#l;Fru)6y?d6@dJI=NH-=Ow?tN5<0K+|Aa-!`9h}67pVCGiOf^Q7S4(MSuMJ zPdhzqt^TaZ$^F0G0v%+B{Dqy9jf4G!qNl88Jw9eRx z-0AJK?zs`nBZ2&AVjV5Mj)}WK8$%m=_7>5{Nkg{xIuaC;(}WY@e@nFy*yY~kUd#jo ziYD2{UCWieGi=skq|^Syv7;aXgPr&Uj)oE%>A~L~C5EU}+im+;V{`qPfBx%#zf;hI zsjBG288uIq`s=rVPgT8PVg@{|ce4JerYCTEsL(x|`0lTl)P8ElAFuVKFUTQv~9+U2SJ0+qaJVV!IY#BQelvudCIJ;5{sGT&OSf zzc?tgpLtg(9fck;hDPSTJ&eR{J&Kw_uY!(4uZ$YNHuHJ0$z5Re32dh%0vT?aanM!K z2&tf(MdR)Fy{=E%#$Sm1W8JiD+Al>+z z0UDQq)7uI!ch0t1aMFTdjg-h23zRxsZteQEJJnUDTVv++wZ-?8IN)x;h_fvVPI*X!XNdFNrfM^Sef~hf3z+n7PJ2YZ1wV{0&m1Gb&3T!~nZusjA(=@c>)C!O0JbHqsN zyUsByv8o4dzkVv!&Jgmn%P~eT0ONn=vJ!X^0SybknH{DRu;ADb(R|n@EOK+ywbq3p zjwJN?573!@`#ho4M50vh$hF)T zPo_V8POAjphxQ9Up?V0XH{<^-R(0*g!xX8tSclOB@mqmfyH9plc3zrDJYvxBiPAD` z^!JI7-ze2qyxAaVzmzq4@QSsM)Ljubc3_na)j{&0aU-6Ap+xesU!!d`6a_i9K4VNL z?RgD`SF9lB!RwzKQ$YkKL+P+Z_+}@idXoGyQDUsEG)$_*lQ=aNTEfeB24NvkRn0a` z7?xbmRsv2(#v7bhqCYSDAEF_U` zsnG4c`ZgAP%Z+b7E*jAIls-RzE8w_?!1etRhGPF`Bhv`H&SV8PqJ#?siU3Sj8RWwz zi>axg(Ce=w{3`|@(Jd=l&tt?EpI=|J-;uEzWQ_L43m*`&YGz#YQzwM4bwybd2($j| zB_-}*zb51)I)2H-Nu)CS7`Dpd8SivI0lflFNHpqaH{<}vG0}^~fRrO?(X)^0r}_56 z6Xp61Pxu|%gMO@c7*@33`jx>bK)2Ya-+Z5Gk0&qNb(e0fA(QFuoi~#HDqrQdQ13|6 zBPAsHmc-()8%JfhVzf!rX|eI>qq<6u;>}Gc6P-Z3Mkxgn1mOBd zGcX%sZ9wg#2o5k>?Tv?3z%Wt~|KY*XRb}ix!dr@~Z`%i^m11bwr3=j$i_AZfSzL~Qre%ARa+ld%~c*shBx}Md-mUKo~uMn;Q1WAR?Wftz22{?kD?!(dqA~~D_)E>+30l~nkOpk?<)0Db zIlbR~)q>gYDi>YK*dIW*!29$KZz_9z&iO>9lz%oACX;#q7++eHfimfOS8VgEI2(oW zDw#;vm|U-UMkTgyd4^usUmJGP-O&=&hIy0rJuzi!h(@E!8y4LZAWrFI4=;PxD%r8L%uz&Y<;e+k>+9axQB#g#HPb5k&92_x5o*l5U*Ac1Exvj zF4Mh?1M^3X+GVDtMGQ3!jt6F~hRPmdhxp&!24#qqOo1cqK1xv^d+^mzky0+Ny;kGp zdTcCPsnhmOelWR@MjmQ!{*Qg@lJJ#zORVq?VE3miXrWtz8y?5$8}LR;t^!u~9bD)4 z#QV}kvM{>E(SwMALu}0kqd}&2wc4u&l%=W4@Mi5l&QZq1?*ee?1cS1j+kA1O2Hfb} z>*(yE=O6kLb(dF#?SH|oXgPV5#d4MFnM|3>Wvff{;f!AV>S3AnUU@^j2#RQiuhf0! zA}d+F>aBrwdd$Ru<-q-_z9PeWnH?TB_m7fwRkYZPcvR4FQ3fZIP7{J}sE>Fghy@a> z8@A>%>!rycu1QMMBqFJdML z*w&u$FGp*NA9v#%x2(gG=`HeCt1*xh1S#*m^sHX@$l7WC$vjKIq&_Uz=T7+$gE`+8 zcpYJ6Vae6b7E*q%tz^c~+fa;|Cp$G34?eRcnmu~BkIdVEQi0D5yYb{kGQ~Ew(c^GB1;?dc!5cUw~ncsDMGBmXHlit5fI^-A;D0x zq;2S_IsCW<5_BZVlM^{Lf2~ip?{eF44!+j+x%+WZZ1a%MoUiy2c+|M-%x=K|XH)la zNrbJ2kjqF~SlyT-yU`F;-Iz1D9rnzVmP%-e*WoHBUujG6@)t_-{J8Xt9U7LrCYrsw zqp>EWpdu09BKAftqVbZA5&?OSMTeh&F z`?F?d^hKh%8r}94t!X=gHsb|1zOiQrJ4{mCX2O|^h@A#~Wzr_?U1u7aACgvK>Vx_hwFw)5@-X{GK8vnk1cu-*O)aA2# z4xYE+K%$bleu#hW5XOVX;7pewm9*)dN<}q=du9CNi#CddJatE+W;E*uNci*>2J@^R zjt(=q>UoDTE@uSlXB4(ZO46XqHCMr4%uLKXr~*~_S&B~)bpX%x91gB4AQ z=cI5M`}@yD-#B9~uQ2Z67%XU7vQ9BRk>~rsISZUS%CwQH;%L2lVQ40i$VB2h_WF^) z5}nn7_&@|ncsK0L0Xr4lL5qZv_Y8fWxgyhJf;%}k-=k0G9++|QF?moC{XXh{Nw#?~ zV3dlfQlW+6R0EQcx&`1`$K1kPuq+6u_c>?r5<;oJB-#3Zn~<0PfX>Ny4aAI@qRghO=?L{r&k z*x;Lun~n1~qcq(w#$r#D&*`KLm5APII#O44*oCl2!`P$C@X4G%7BNVn>@$f%bVALM zGn@_#CfD-5PVK zzhg&J-6imJQ*w}LEKPp6oK3)}{d(zez3#~!_eK0i-PDo1(U{>+pZcoz#f~iCrb%DQ zoD&}L+zT3mYk9n*f}N7@yPfI7N-ac9M^E9^z56_M`27*@FY+TBF*v=lk)F+glnv_r zS>7CArqyn`vO*t-4O^_dG@)dpVpTYOQ^JAigq4Arl9H%MbmMreB`324LVzw6_ptD) z8xR4z5Btew&>c-GC6P39)b5N-Om`dt0A!-oOyM7=$0&S^w(CLsYDsWD_li zACL0;Oq^}=OBVzyLqT`&DQzfIE{?4~sXPnC zw1NA`7rtEMUcZR2sVxG&BRG66#(p<;5$DP3x64?=U`QXGR>QCmtuVgDvGtuSU{jkp zYkYSAa<;~Y2PpbZO_o8YZ*n~zt)?5;eXL`hJ8RT_>FyAQ#ut~pZPejPg_-p9xn_bT zQV~}YUdow5{DT|npQAiJI6Vg|w{yhvgC*f0n!OB4X50yMH8=%?YC_-VWQA;KW_T#u zx41)Mv?NLLmjj4mThG;t4jq{iK6jpcix$g#8a)?1I44>8*q6MHzCS>5@0I1tmXv$L zL93sJ>-$lA;R`_!>MGHyAwQ2^L>C7A>dCz%yy(eq^c2CQ=`Z((f_`j^&&Pj~;!AF4 z?_{VN<{H}Mmaf^#8sZ^CB`tj{qZiLihLWRyxsW_?bsXHZ9vl}j2#r9BIfgOX`uycE zPgt`lk(3qZ+$rK)A0KMf#|WM?xQOAqm!CB1+tO36^7l;|;RP!m4S1B0<+RT!>V08~ zgJhZ$>=}VF8KN6;+-6BMEM0HL6>?ZC-$94&Rcqv%wT$f%J5YOCOeoLG$hrD4J^3I! z#j7#MSM4adZ+SHSn96?XgJ4f9tZ-tgwh?3ASoDR3GR{L8IgjlC`DpUwQ>ESo;Ux+i z?l3RnU)fa($c85ua8bP+GQP#W*FY0QIgjZROm%de!C#O2TM%}N8CV^v;92qkMCf?R zLc1?qzShk|`X4mWi#DdA0m`YA}9Tydxp(#KSrp=yX4~e9oZNK|2W&Az7TOr5bf2q&c(a^aTpxMJto2} z)*A%!l0Fuleb!2g2XjwBwr(Ksxge|)OP&-<_QbJ%&MM>;B=TPaiPN)AhIQeEwbuH~ z*C-l#y%d<%RZNm)EYov|S@Erso*y`$_znv7nhV~`9Lh;xgw4Bx#NER=C8X;-u>G0@ zNAx6oS8H0@_4m{6$GMYFG)Zvu)6PIURxlo3^OG8)1BeSF3*YeIB7Y1}nuhGTnbL zW+&sA3NyE=B7_s4aPeZ2@`V+xyv(nn4Q3e6Q)L@UxabFUz7uhe!%NIYzD}4QcdYtY zcu{Q{fc)iZShjlaNJY-3Q|}4%%YJNGOjcgDrp;sqEovsN0tzW2sy%(@R=iS)*&eg7 zGHdwn(k!EX?A8rklus=3-qlV%njj9_pL!gIYTIoGKS;HJY>IZ-UnuG(`Le0O~W za^!ByC{nivmk&on!z3_!_|p`1=G%8)W%qu#>`zmIjTgPz#NiAhNkY$GE^CGIG+7C8 z8LTpc66>ycems1#jgRP%qmKGkhayBE2QRtC6F={cG^Nvb?qtq(US8PNS^eIOA#(R% z*@K*If;M+_%htH2AOV$F*>eM#*UGvB&spgvCkIkOm1*EhnDGsyMLau13qKC8n4NK8 z+z#}HUSn%v3;&o&gDuP%eLWAS+qFvyx=Qu(?W&M9h0jZRp2u?^b-S&FT9b|4yu^kk zt>Y;ylk3Jd62w24N&f_<{gxGJD&xKpM5|2lYOE(pMx;aJDY1uA=b3+v`B{NW?ZFMDLrsHRB5aMeckBswUGSo#99x^EHI0zST0 z)3n+!`_XZWk347lWBWYb5T>u0<}VuiUwCSj)@C<6e@ktDXFI zRV|Jk_H}xqyE6Gz=8;ur&8GwV8^Fwx1ily;v=$Yz6o@)b-3}OqU!XK6W+n_Bb`eUe zo_M8{EwQ73U2E&Cr5efD_6d%>{|zjozWmy^dHV`7VfPFS46Z<1W)*5uzt4pcJsZ|C z8=L9@M;{Q_DTVF;))QyI(0@z)f?&A|WD@zGXOxM!o(+g8Q}LWegM^588TJjV)4uR^ zZXI4B&Qp34M4|H4!5{JH)%&ZFRZo5yN!Y+x($Z=3JS1Tcfq_Boj*fsea}z`wL>y?r z#8f!;WScGbyL%8d0)j2Fe5ePhDNmn$MRk-(jG`uLj&5Z<%8ZxXfdf8x&t-&nMur4$dkyE=s;*{0#wnV- z4QmhEz0z1IPC@%%wMMk+B&=sDBFEj0hzZU{5zylMM%7YorkV2ruaru1z4U_c;CmGD zv9B^|l{=#1tQ=$nwxf?xI|oWRMgbmIB*nFbIZ2mpRD_XdWa;+NmoM!`BF&uGm#UQ`eKFCgLHSwxAn zCuVMGkyAfS$e-_3eGZDKv5Y?dxf#DR0wFAb6l6gwq5b`}KNl@v07Z&)a{ieUlz`C% z?4vHR^1n4#4HS{B5>Wrydj8SiYB=1K8lIfApWE<9bBjQcSTomGzd%KP9|#LDz}B_` zzW`_dc&{udLeR6W_&1L4_cuTi$9`;(gHF2W{Y!IWKoPc>p3J{~?e{tSZ-WzQ3o>$E zuGRiab8$ft1Ke2df0+SF1qn_;g)ZBFX)eQ~0>_D_|IBnq_5Gu-4J&C%`VP^5RxGiE z2zZ0HPR#Gq{_|l$9O83FE4SENHzhTat>i68VUaPa3&5?MfD(4q4ijv?>yj(&>$#2A-9!ckWqzEE6f=S*d(sY zpcXT-=fBE#ejmJobGX4risT$75)g2+=tvj<+zjyS?KdaT&{v>m=(}o%AlQh4P%u_x z7hf0n%Rop1UFZWERjA+KbbOj_97Gg}K|wwb(DH~az>S3Rv?51QS<0M4?RObCncPrroL*uSdI;KOT}yY-FW?Kbl%d8zXkN~ zidIuC62Xg*Z^;C1hFMZ%h?AELB#?$MrFIl8A~1XAJ2w2$R2wI_N*kX44soh-fkV&c zBLw?vhynRX0BrSlDqGRtJMo}k7ZLayGXDma->N?Y&f0M&Zv5D-Fs;TPzLW=z>%ysTE}W+GWWTD zHUccL3m|{OO}ip_8L+rhNN_7sAx=TZ1ICJuT}ayg&-JCG)G@sIN({!SJsFXMZyH;H z1&a{YezDQ@xHWlx#b99`U{4*Tz?4APQq#+$HP87^ILZR|NkQ1@sW+2qM+b?5!eiOn z8n;6|Sqd`@KNW$>qP*P_S z^4e)+a9b;nFzXZzASUuI7t1HuNQR!tGD^ywfSgn9k=hUoRmfJFKATmN_}$HEtj<(t zX@ikZn)}at<+sTj_klZl&^r&Kk5bHq=u#$NMcSMd>fm0^YFDJuJ_L9J&%h`%w?Lda zv0v_u5IroDO3{c-z5Dhjofh<`>$9J5OLE*EDajfvL%|VlR<;#825Rfr1%7+ru{X_n zZd41F;m1!b^k5Msb%1@kVN;qyr-WQ%HS*S+^|yij-*x;t7{>6u2a`8??d8-stGxgPddFYXDS8v8~LAR#69!|_NcfZq9UeK>qkDzG((6KjK}H`F|h9$ z0w|xMd~&SZyq`+L^KlLoaXGhcMeZXbv;*cU%#d4`2mk`0RKOu z6;e%?NHWE2)s*H*?YJlf*RQw@x)}zK_V!Km5Xln>X%}aYPgVo8>8KV|fP6Q?YJyhA zE`2Vc3HP~nO%?%Nw9OzE)o%P_)uo+im)Vcz@LltgsE7H=sP0y^agzA+UJfMl-iMfe z&a?cA*H(`*j^f))lmgk_2mWHn-_no2&Kt0x!9gDQM1h)A{~4ta1+`5sCz_hugZK@PBEB7_4EQrDYCpoayMtkqTDl_|Ebn z)e^!${3?V)$Q)EM<&~CmvzcO2ymfmVYijf1oLS~HoSr0zG-+Q-k8I`92iPAiG&YjT}JAY-$J@iT}@<^i9g3HJe->^al@zgt)d=n#Nj z@Wyoef34FFdZ@)A=OAaJT*SRqJ#@;PI??S7;97`k!378jQq@WaOBuy~?5Y$;w}8~@ z9-PHGgb?)QJ!voZU4Kg736ula=L^p5$?z;JL%EmE#l%XAz4>*F zC)?Ll6?b9huABJtI(nCo1Y^##^0fzY(+{JXuTYSQU5!<8Z0E{S2)KL-oI#{Ex}sBT zJ6+1kQez*QxxVUM+gcGNv1<9QlzKLjCBW@Lve-Jf#rCJI{LfT|+(SxCQmj8t2>Moh zkHA9gKIv%@X(#vIb1FJ#KrpXuXFHG1bYDtabhH6RmXpK_L}XW%yh@Z*fG&z!dzo(Z z_Kl4}+v%uq6dK>G1ZNYn9^!xuA4(>wWzom7%`glx0l=~eyOQ00J6sVjYue6&aKy+E z$v{w0prQy!aMwU?j0&=4h~;QxE1@bRPj2EJ3JGM7)^Q^O_vMWmA)BV+yP_<7L1Ewf^voz?+=g+qx`TDk zjPJhBm@c>Yu1%?kqgh~zLtwrP^LQi(?QO5G8OyNpUUvVbFi2=LSVA&!tix@oOCl$n z;}9!ucOy8?TPI9kwrY(XjwSDXLs=0G-mC25ZI1E`t~a%rYEv?pwq6P{?U!AM4AE!7 zslUE{XF8-%kMV<_D8snI#q7#Z3w`B!Bi^)`*&E58yvoQXm4vj?Gkbc?KX~^3w zum^<5KJh{U(x^BXTKp7%z*^@RfdG5muQtlz)5Wqs`_1*r9lCTR>cC7oU4LTWW|F)a z-#6!=)9feD9Ad-*zjqHW&KL)avbBFMipAGawmNISJ*zkZD85{X2Y?kGl}2in>qX?> z-R!ItMv=!TvNf)T(p7yJ=6{`qRL0ydH2R&IUO%>$c*r+3D>Y|&gN$wE?)Ji*{sZJ! zMqvQ(&5>Bta+|TPsQr=4K+s@jB|NO&kuiK}ogc`T<2J}>3V7#O%g;|l>g-)ymWfn7=#y8_~pDzSJa=OAl8;kzThu%jYqcN>Vvja5SH9jB372 zjyEmrR6PDNcqW3c`J;C_Qt5t!9HG~>@Y>JaR|+k0n_Ld(gO<;47R7hGbIjfx zvXS*-kdQ=dqp2*33}UKs4l$kKY|R;Uzp zMdkgr&-?EV=CwG?tQJQHdFVocWY`J2Scx{{H2%xl=(t%(5>(dq{YIwbV5wPNI1c1+ z5!|B!)X2>cpl8;#j|}$o7HEm!SvZ$LkSZVwa+W9bTIyvMFu&^Pts@5+jA#1@_@?aW|V^g+LJfetGiu=@; z^%3Q*`ywKo0ZdZHMvNxI&g9WrSJ!bL1%+s$u9)k3w~rX*dCHr%USj)h_th6E%E;!u zxWR9UrxDk4g}vj~Psa%moZe}UA#hy z_5Iz9I7^e)3ddIV0E&ZO$1B(_Ym7kBHoxstfx92sE6Alke9DieH7g=2@_$g3dHN_9 zj0Wa=9W2m#983wK2CtQld#YZHJMJE=?M_jkxm%AQ9p0GR%wBC%^j^7=GxDS>O?P4WZyhdu~ouN6O=b(=4FpUC%KD>N;YQK$O`w%zLxzN}=s=$OOt(Jz5F&~)NZ;Sfc6!gq z(!5{224L{`)4?^sWTwb-ISwNCGU_+E+!ob}A{~%YSNBu^&+(-HCcXWJ)+%VjUy<(= zg3?w>Jd~^#3gvPIFj|y2pkO&m0}fj+lx5e2`>>`-*}pgd3GKdt#A3+?RH0w0rq)wF z@*I6qc_AskvoCnu>jbaMki5ADGZ4EM)eiF5dV1j|Z*&@Hoo?oID4e^QdKkXdC*=JWoKQi@=e%RBeRrd75f89%7fM0mT>Dm39!+Esj+r-b? zQy%7$D#bQdKu?pV6lHL_=SxwgQFT(^GZF%8ZjKe888ElvkfIst=Qx;r>Aq%nHKxO$ zdN3}H@l5yQ5KE1sf!r%Q7{nqq+f6mp@ zfv^pTa9je}gs&!YWMMPQXX(ea!UxgGgk%<;)fzeoAaWTGJyetvkmDGIhXK26Cju(J zI<02wix3)h=tfvN7tvNXkWApLTy+KPKdk;x^&kZCS;jCY6RM1zj2XO+%mcdFD6g;c zTQ!Bdag^qyrKoU0@W?N^s8ZR>YBh)X^K-Sv#kjR<`%Mn|l5pg-<4&;TrnS&dA;y^;rR_rv9YvOR>{yxKt3o9bEerTy zpszqkmq64Bl!=O$UrH<1Z@%2_*DpA!&G-S@z+B4>JF;dkaVzK9w2lSUZx`e$tKG4icyIPipSp`n(Af-r z$p3bc{*(duy01q@qxD6`PC*SX2{6o8h;oh7=;YAI$hBr2Q;)XP^P}bjF*fnMjA_v? z)6>j7tIk#0aOStVceUAk#YXck=fZOyh|-r;_A#v$2e(mhznBs5SUQ@F7?b(nx7DhR z0UL#iUhM8!@qMFCU9;`)(6WRmjs(RzKWF>1t7kc(V-MA(6wI<#CS!@|^L)%|MNOK! z*_!vU=XbiL;pD=l!aqo(3tAM8B>HBBez+dbNxlcO?osFxBiF->)HBO~Ico_KwsOX1 zIB1IGAa##zZp3js#o!hqWDg$$ihdp|m6V=XzDU9#0B&@9V)ZwB?xhhFZwzx?s@s?+ zjc8bjSDzD{O&GMqV(k)ufO+=elT`(4=)N-$xsLchN_lFOK^vEgXf!J`Ni@L3h8mBP zG$M2dR5G1FZ(oeSH}w7dD+xqI;WJ7_XYLx3?j|TR_jJJEGaP{rcEVJ|nCu|4{wqFA zPBv?gVe@#wSD)=f(4Qn-t=yhB*aR7tVT#JHT#b5nc43SW)WoE^=D`tXNnj_ddx^~; zpJo*ow9G@>-!tOw9Yfq$rob_}97!>qB*39IgYWbg-qCDXb|r~AUlI-{GB4@*aNZe= z6Jf2CM>Ml=`_`~4hseFtpTC8uH&|drfyqBk?EB2Mt4SLx+ajG6Cba-YtHW(N=ayWn zkVx&}q*zRMjT=5e_o6`{yU~${&V@Beq~^IUb^1#Y_SHHO(e$n1y&w9^==Nk)M&G>T zhV!`1oeai)l9`Tkz>Oy)M|R^+I&aG^%>!RWE^AoE%(vt*m9gs&kT!W|i!Z(LMefQ0 zGK#&|5Dz$tRI@}d%aVo&NmmNks*{cd<&33p=}1zcmyyG+e7@YSVFX9l6Z!0N5t4?r z(8i+GKb>uyv&!!A;#5kc8F+6&&F!;-%v^@q>Wx)|#hU32^yPh?;@A5%E~_C3kv}e0 z#8HX3XjbzL*h1tzuoDO5*lKGeY7IXQTa4m#d(P5?Tr{-*aKwGSRtd?lAQ3HRA?`Jj z1N)KMwt#idK1A_g*0o2R?+|Vi?`>L1ItmCnnKFW^@8Bp!T;>3XVh!5_7T$y0t@x}b z4-`-I$z`AI$FZcRbDwa>RNrWFcM)w-6%g=A&+%D*7!kYi z43^r_?-{!Aer1MPo9cb*2L&NWFO>1$h3tEF+`Km;L0jC6cuQa0;MiBMwrXf&YSr6% zy_9*S5pXhfEsolP>2JMN9fPs~W(b*L`OGPU-`nWY>)>vmAy4Qm=?-sXgxpW}V4{fG z=&9MsVkQc5yQ5Iuvo!pC@$Un7(*sKPoy6U~wH6xgoKHNWw)g&E@`Pgf%E{)_d(8}C ze<#AU?01Y*DWoJn5$MG65?U^7mdq9}umRW|PgDYg#6gNOihIe65vs$^9x?-fh~(Kx zy*AY&U3q9gQ+ySf6UDjw9wMq8CZt2bS3v&u>bXd!R`)4kNHYD#Lj8y+lHSVv`RVft5#13!h{k6sJkY69E@0SZWZ5MP4zDLkr5G zmPI3~O!J2w;W?$7cQr|*ac1^#7&$d0j)Y7f zk4~>Qx##KhQ}`w^vFrF&iRDU{=B$byJmW-R`9L!87kuKi1B{526F`A9aaNOV&B9VO zTYGP4Z{+Aup3~1=<>o9;`R(%aT^2jX4S&K&$ZXJ00K@9 z+sPLKl=@XC!{)j1WWv24PLYR9azhZfJaJLV2rC~*U@)IAB)hr&x6+M!UucJ0jUG$sKkGcv3{;+`*JQ2udUN;H*Kx*S^~B(2 z*J`{y_|B)3J|V+_&&n|?WaQrNI$@0X?IlhV&slD#68{qJ4>Jz33O1i_PvnU{+j7y2 zKZiq>-TavL-iW~VM{hv>$?l7B|E@MPCc1=xv+SeG<5L7_o#%mFu~(TOKTvPWH24P;2kD;YjOun}Ld-PkghMe| zp`LHw0tRI+L|ZdNC$o$_CbiHheA^9GW)nAkr@B&o+R0STg=vcL&?c5w_Kg(R86_qG zPIA?p56Je5fN;UQ*%PRBsC8o=}%|GjIgh6q~!M71d?|F;qs_4 zmIN4+*X0>GuP>Jg;gb8*zc`p?4{xOA&Ja54esqlU^|jCmsLLyafUw`Hz%jk2-1el; zoz)t2+4i_+jjj*KV5adcXqaizhmxwu{s#X-TC5>ku-{_=$0b)mlVOC*Nt8ecKPC)0 zcN3i>AY##9>a2aE|P57UK+nq z{0k5EoBauSM8yFPO^yS$D932($b4D@JKfEhxi8LKTOD%V%XGXzk{8e2R}|-of1vza z?0|*dE7W1xk@uUMVgS&TPG_T+SGxikEqa`tE(<({(W}}0o0g4uVK(uv;c!`@5?+ae zkMNVTLvt7YE~5xJq2jO{d_G=dJ$5s-yY)#(UmXv++algjNfJB9%uVU}mteugJz+i1 zuk&XM-`~~h1co7Cpi|4ov()$dVkb)AV0YYI1BlYG6Tfjc3z0+T^$H{SFIEC%YF^VqcR%QTPbEC}8Wo0=IbK&uipxK2H2|<`8olwnDO#P) zZ4nmZAmvWM>0bJavH$ZiRnY_xbEeZ}y2yi;L6}wbgn-krnR=zPnvqHj(OvlfF_i+V zQ*A9k*$PR?O4@iMe&8YAg%@Mq^rO%3oqT+O@u8elE8Pay@haCVJUM?5RE)+Z2*;$3xUR&j!0l-w3fnjJ6wC}kcPml{=l$6#T3vmOnFk`z0fc&3 z3D_?|4r9-PM+WCT1jcai3(bJ^D|-3hY3Zjo8D?&|l}*WTSr~w(-F9bL!uh*-|80{b z!V!;f`vMzY2y#!fCiZEdW%-y4HA6G=>MtwguZo?JEJU8HjN$%xpJkxXlDNW@P-L_G z`02#9iuLEyD3HmwVS)kNi%J#`qZMRDX++I`ePm(^$QKvKf09SMekxJ?bO7*y$hI=0 zS;Sjz0R3M>;)sU+|819|bT0rxe&}LVp@Q$&A|822B#?`)_cqa?{i^?;9pcyugSo=m_M%mXT4Ae z{zE|A$dY?`|Fa-O;wvD)XX4Xe{&gSyb+tev)jj}fEY_3C=6{io*~x>NyjocPSx8kN z3lvaJNl{C=|18;36$UlcZCL(`c!@O`u=BAZ`v?Eh?Rm&fBx6_jI}GsW7~3QPX_)?| zsrSEV**-#a0AGD{|7GlAX$mqHsW6&8nc@8ADw>Ap6Aao-t5`|Umet6@YS zjY+8JUCyq?OTkL;H3Zb^8o*aA1E3KkGYL+?HY&Kn`aE5(zj_ArQN6j@V5Q8SnsY$1$MHj z4{8VP;5r3}qCC82OiY;4=o4+4u2rTB2(H4x(QS?Z0Cao4F$?jm2kb3}@5$y7IBm|x zv4VkZGYSWB5NQT(0}}Xdh7~>A!+n82%Rxb7BjA!FLo*gZfX`v~@%8I++_ciMF-u-3 zC!6{+VLDjn)|*tm&eEhBfA~%*?+Nqc#xrGEU~{m z_$X~{+t0wvDdFC6>DOT?F+ekk_KAih4I^KuKsySo6=CR~Ycj>bfr(`B^-!oGx; z*UvMF{P;Enp@QQ}&p%I4-|VUcBO~@XTzs2S!6?@slaJyU5h}|!1JErBa97R{=%f>Z zA~6`0SX#(TS$zoxL46klz4+|fc>)ME(SSq$6%=s%*tpCc-232KR~#?I)bedo;Wu=e zf6+x6w@W<`WShxICXPv5$y3zCsV(C5hQC^LJoljCEV0kVb;tFInFf^=-;Km47sf%g zGKA>z7E`V>PS!gvJaOznqR+t$x@caBa32+{3!F7~=_Ru6 z9)U;}-+^Ew`!e(V<~SiZPKu|9;}o3RpHD=s#!Ak-TUfc}BlVg!H3e}X2LF2zvK1t;X`mBEHJbS!gG3zH7| zn)o_uG<11VRgEui;34klcPeY|tS!`GZ5AD^M||#U@At}TlrO>H6X$`$UiaXyr5J+1 zu@|#ru*~_(fQH)?+zWsRuAYcKJ>qFQo7V3Fi$q(;QeZp7yc00>#N5%k4}O4CugV4F z)v*a6RCW1W7!?^s5hU>L8iW~uWL+nKXdO*wD@{$;yPrbt=HNuhf&nTwzuNH*=_@dG zX!7K~fc9+mHkc8|er*Aha|%q#r}?5}j3UQhNt4SE6AR;pVBQab%O<|GI$BcGX*XD3 zQm0I0!|gcCW!a>SrldD+70lIE2OBwrRDAMGhOe^ZK}beye|EUVwz<8e z99PiLz-OV<6+q&yQwCd*maIZO#-KVji)F>n1KX#TA-?vq^v+6f$J&bS7MviEf&LCC z!)Mb(0i*To>K`8>Y2ZRT+3D$Pe7s*%^^1+70R>&1q92!Gpqb{XNih9@9?6Ac{oXI3XFBa4 zJG6ZPErkK0#Ap5X%DWg#aciG@C-ni`T~WY`+-e`jp&ou~X&;G!xC z_h4CiMJ6~?EYPpDzQutnr3Us(lB2<+7auMj;bmC^grAia!tgwPn_!6<0&Ys#WPmGS z83fuGLAI_U81+2|k&8w5wWsuXh}uTJmO*@B2d+@Z7IF`a?91z5>ha)UPL+kfl9Ky< z{_xqhvxTRTVyYWrNvTk=|9ipUe5 zVW0(jbRRh-(_nbC2$Rt=Im4i_OiUbuOo%6rn3u`G<-@&+79f30kI(D{?HZFJ8gjS* zEC~t(WS=z&Lo4!uS?RH9v7MU)nDu?HUsN-Q|#E&x$U z#tgP!jW~Bg$mOBOCaWDGoO`;{6LZ5Pnz0j7~(fBgd#piQkiM3erL!Vz0ZbFdgi`YworZwIN2PSo4c~zXRpr8 z{zh|T7p+EkO5!O6^GxQ+p5Iz}3_G&Rbmf#CWf7Q0e4%^#z` zuo3?WZ)jBW;Rd<5n_LjEo~!djMCa5SUbX?;G78+s6Armi;?UPXr*{lHv7-}QpktKr z^p_$>i9l?d=;6&6%#=NxBF~2sFiI*Q;@nLLVf~fK^~JG4AKMyk;!e%Hckb%wfVB8BNX`xoW;10FlhCQyP&`#;vt3JXN`MBF9Gki(iSKU zRMMSsMBES=1p&52c3bMF2B`ZjkbAPg`RdrO6r5(=Ft-ai_8=;F3gpR8++P zQd&Z~8|e;dkOm1wX_1y#bcY}z-JJpg@|@GXpWoj5zOVg$em^|l447-pInNm982{rK zigOZPVTCp?e}0Y0QKaXZn$VtKe=4$0n6T~3R{PI#8gXu_;rcK5+Uk@5`xch5!bDE4 zJy`CFjlJv?Ei4{)8f1Ay{$&2251Bf2D396R{VGqWfHM0Qm?PDXEmJ=yyo$czzD%u5 zU^&fDSF}xaLhAhUuY;2X{(q!|vPGMaqe+@kJEp%XM)PTqD2U&qo zkmBKiV8x^#-ZTFJx&QZY!NMN({2o71_KI=&4}RzpI~^+qU~69lkf5eFrwz15FebG? zktIY;UTs5?3y7B!09o#KP!D|en)kvns-+$j^(H-ED*l2L*P#P^Gpq(F|D^Lsyz9U3 zy)JBY_S20$wlmR#rPsIkDQpFky^#X}YKf}P)-osa&_qx`Ma%j8d^z@Zz7(J7$<}w4 zch*&(>zvjnE(>dB9s9KQ!S?xl-$geU_U^VnRDxE_6yo(HumNUQ{*#a+Q#r^{h5!{z zN@FBMkU({fw!hljP_e7rl~dM#n-&AHx9UehS&;-(rIHVTCg%GkPAQ-(jX(xVdRyPR zqN7b!mi)PMI%I6;4r-ncbZP)x76f^$t;dq_vw1@TxT8k?=XAJ>YF|}GH`Np#Kd(NA zv^J0>S6<)8^za%!5D(aco7dF*G?!d4l(dzm=W6W8cS&r88Dy#tqj9WkT*w@<)czow zfO!bnuWgro^Mx?*jd9D0gMMz6P<3U&M-cEl{`x61Bz6pj2r{FdN0&iN>hz&Nd>8T! zScxqLV6vfu%q!c67>IKGTa&1Yzh4}f6%SAiHiIsR&fu_`_ylG&z8dBiaa524d~ptR zJFP~ufpB5QlLrJJ4<9sxXoAQ> z6!lo&Yxn-0jXMuaMWcv0X&{dp)o!{A2MAl*=Gf14o9jtrotO6=`wGq|d~fG3!B+)I z$1A;$b1#~`H2*wiVM&Lchdc+G0UsvGqCf*{(qkYJCbJ>@3(Wz+CF>`^YK(ky&X4Ac z$~%H0CXngWdi|U?nfCYs&>`59dQM7vap$Y;0QMl@n{!`=h>xK5Sb@CGQ+0ptp(~fh z@-cIYl$%|CS(pof$V7emTm+=E&jW{MX^Q?>IsWQTXr28C|5TOUi>Sjrml zfp-%w(8{m@vah`zLOKO}pgK5o0@+`k12QFhslEEbK?obQ7ZW_{raeA2V*te9`~k2f z3j(EC>MLM{*W<$XIPK-b4eTZ`f0o6toar(_*iolkrhLI=GhK}V1(HzZX^BYWtQx5D z>Az)Qc)t6Pez|T^cS&yWQOo(?6sJHKaeiIX@jw^mtCIKgAjlf13y!g&X64d9*L`Qrx zxKIigSpl&v*}07Zhukbgk8o$dIZDV50vQ$2Y8zoO%B1<~9d*9p`tl&UkJClg7t)#Z z+B<;JieUF$K7N|5jV75o?>wdiv9I?)D5?MFtIzZ=a9AVY?!6#?(@h=ve(K_kcS4Jn zKIo|7VM9OV&3Tst$@C&HZM+X#_0r{;JCEa6at=%h-9h@b+QtEeY0rJvt!@Uzv2jrB>Cr0V#%d z$VbkO7+swSZwRu8I_P1@mTF~r$zfTyk6-MJ>b>ch&u|_4OfoJk1EtSHhvAW)I~ z8Dgo8NMKU(x5QdsxL12Br^!|aLR0qsbjY{Si%S;bmllzK|lKj)g})q(P0NruH?a@LblLf zCqme|Y&*~n5qqVve_M*Ua#efzZ~LftJAA>7v?mxkGF)MYD<__=O3TRzGy_7i4VVHw zcoSF(at(*_&c6IdGx76jinOUOh{oIN%bI_`{v-cutGDQt*0=b$r~{=!F8{ZUkgFcw z*e~d;$4+%D?EdBZ3x$#{ETiP2qU)0BqN{t+9RAmLF$^Gs9dDX6&V=)q1N*+l;3@WP zS}=qF%fV3Ki9>mK#uNUMiO~&UYO-s!nD*Adm$0^&z&PJuD^qDp2b6)9e>|1vmOJH{ zJiGXL?^UH+7%kIG>L#!?MBEcJJ%on|={bi9e9*J8X2Xl_K4-yVdsqqE+@;2IP44l? zr}<4a0hN`^P-#aU^LD@8{K<9OpBpI+a70%MK7>xz$Y~Zw)WMtU`J0a4br{1Yi2<0$ z5W!;87I^>gH&Bxdo!VO8Z+eX*dcJS~b|m4fgRrjiNsch07bJSb)xkNBLCz*}45SmI z1HF~95Q1hhOW1wC;E7-cd}j1^%BA81R&m-Rd$)5bP?+!Z;pM@n;QbY-?qFBRug&Z| zXY&VDVFtoDq$yX>P5ib+-t91}tObPtR>RWU-YJ{Td6JP{Y7^XPR)@eJ2@^U5>g|KE z5^b>HU1v%uoR=R$@pDDG~$fI*w=^O38;&iE@o!c)omZkTL;{42hs!+|2DQ=N)IK)=Ft zAPhvEF(?hvCm8^(rl%*1s}70@az%1Y(30RCCU3qAKv_9|8FrvOBWHqf*pO=Jdy~iV z=yUkRrsW@q-KCfL1q6m$Y8%GrBb43;PsYc<$}?T_p>%cRL~eHJOz_(PhA%;afAn6b z-4Ym$!drS9z7_#=iE%y|h$ulcIy0_cBsG<$?8glcM~L~f0v5yuY6Meps!;VG4d#JVch%>D+Y_I^Plz3GgrP%Ha zcQmGg#c8KXQLr&{xkuh1;-KF_SI$V(3wl3c;?*jzc(oxb7QsND$M(bymx2D$3uNEA z(mTKgyF38nOp&7@lF)iBtM~Szz2^$D3B9LMO_>`Zi&J|sOz-&=uK=U44(0uhiS4cP z)eMjT99bYYpa`l40$bCXh4(P#Z1LY}Nc9N0SR{v%tO}?jNvz+vRvx@ZA!mJlCw36; z$B?14Bs)DhPw;J2Dh&qev?J#oDteQKz|{Wh7$Oy;awQ6rr4BJt#Ak68c2xc|jOLT_ zv4uJ5Sj&4?3mBfC)$JUPCUt;Q<^2Q##(#}&c{ot;?~TL{YrCT+&6~pH!#7yNk`TEO z&lJ>XzU|X1z0=r!5tpQh#-rT>j?te^fA>7)eI+)w2UyS0zxs(y$gwn{RHNTc4Xw6| zJ|6k;By0Mm!KKD_=K21;>QK{9iFzH@z68f!@33AQgE<}#!DaZbe$0wYnA9nRL-3b#`70s0+c6g3AUJas&3^^kXU&-22wl#-5c`%xyQG*!M zdVPR|#r`;0YVP1wxJxLK*TcLHge>fOyk@NIJY)an<~B?P>z1JJ37ro>uRbSC@JmFE z4r5TLFlsT2n-BQGZgyI%QB!}GsF8U8SqY0lE!Ie?xc#adtM*qqxm@%oj_ z$6(D{KGkwWf2XM>YOpWiK{amR4G3q1e5q-qj)QK`EFNaI5=N zNQ~0JIxcFGj{aEpEm3%6!uX-j?@#U0uB0Mg%2HV zykUocqBI~i@Qs|VjZYeJb)f1`QfL+1H zfrQ4Rvi_A9djHfAoo7v-rRzgnDa&Nqa`W4V#CDcksUz3%3F0-1PMNkZm4!pX0yr{| zrcwroc=@>Xq^o#KC;T{t9%JRpZh0v*+uWAGZh3A1(nO@Nop_J zl2axAVu%zw=I!4k_!exi4RSMaWhp&R5)4nr&9FMaxa7Iv8@%6yzptMEkdHQ#L5SFs zjD4JTb6ndy zFFh%`zn38P)rpT>LBL zXO9AyToTIV=@jIA-Hx2a0)H12S(^CIQu@ZmJNJy8YY7$>ohwWgX~;@89bcXSW6<#M zjH_R=!|U=;f@QV3v}nn!BKMJCB$Cy+45Vg7=&cL*qAA6wS(7iXn?M#)_77e21Nv08 zyIsjaiiNsriOWe>rNR)gKns67FfD4=C00$*BVST4+ zC?){^_RHt@O&^PNJXVwYAq6-kxsOo2D`;Wi zi&m32d&JKjWbSE$g`vTW!WF{+*poXi&3iN5r^+&iLn*%YsAdc+M6h`o97dt25G z*gRCeEHqs4!*0NJwPW6l4mZwQj;iHI7Y|bdKdrk^+Up6K!jn_-Ln1dg z>DF8Z>?^eg!VL$O*E|apdEN>H3A(F^r#BvgJS%am6}HHgPjdY{ZRo1CQ65Z@X`5;R zSxJ!9Y2iVFNO$-jfXd5Ij+#DDwu2Mlh+s`c#$}Nr3zeKiyRz3S(cr;y7we`9X~9!` z_NtISa|K1?tL}5@{#E#|lUl(5?(9PoBMOES%c9I=1La;sA3Qd`80m<%D`6;lGAx;Y zdUqEA?6~u_kX+Mgs3rG26E+;E@xpqL^a*jYn^XIss3vsJnw4Ss9m=^o61DXTGQZag z`*OeZL*H9*PEs$RRo3J6encqVVzx!iVv^qBU89yA5)+2fq=V0S;l)+N7B6z>lfj;f8^kB zZdn&j5l;(!QSYemr~V-phk+t7%PUih1nCdtJ!F+HNQ2OUq|=po>o) z<$6EzuiG|E_Kdm4j(1nRX3%sff}>Bp{`CIN?OpL%_6HmKjg`{|H4nEP6(cz1JW0Y$ zwSQ7K_*PhQ4I~V(x#Upum>R06F$S@3{Q}kA!96?)uC(s39z|OEV+(N~Sep04mX!iE z^~?(8T90RnKC}an<-R&Z+qRhCFS{Z1B=8W9H9`rh?0AIx{YeMPR~~}*#9T) zlT$!Y{2K2hd9c9#W@Pn1i+&M4(``-fVl<^D#-Es~xAn_P7Y9>;)x@Kc7;^xmKXO-? zhiO<|DzdSpM@k%Jyfr;~WEDAWMol!4avk+7Vc=;$8(3P317imuOF~LXah_?{ncci# zX%`7D7iv)klzOe7{U!3Xz4v+&*Ck5i%k%V%%E_fXDB>1OyM#m1Z^eEmLW>n9#<#G9 zZKyt;DADQ8eI{q-p)~37G0;oK|&HOJU zlmBn31$WicCk`W9Ek|*O{@n{UG5)C;U%q@AcT(o86U@roOZeAY_*+^M?}2S`u|4le z(k-V$jlxFWI>LH7KZ{D8=F=CGt?^+UK%h>-z&-OHt4zS4L;Od(Yl{UbYL77t6CIHN zH%OO!`|2mCuUJ^MHp6#V*582(2(;}5{`03t(n0yn8}og{!fJ8vjSs#=5+1tT1`IWq zndT|41S4tyO+ZBsA-w$HnElGS7`<35FE2;yRsrme|1C8Q&jcJ4YVi#`iMRL?euSPE z^Xm)R>&i7hz1+?Q_k8At%-zg^R88=>A7DE3$60P@E51aQMUZ(fv^O3AOZLz>T3H#@ zI`VdXI(VAEFhG^UFOkiUtk#`uD|eST05(2~&L2Rg+DeG_BRv=3-$fT@~DNR6BX*JaqVxl;We`)7)Kn^T6K|NK~E z=-wh^?k8TZADG>F71IH{f2YLmSPs8{#~n*J3U!fIhbNoor_MUA*gpH8L+byQO^RW7 zmryEV13R+z!c{b_+I>A_2^W;4@tO8 z<*uk+)5occf2MxB{*#SePI0xc(fR3nrw_d2@4np!1Sq#F=6|nM{8ylDBbPH4)WU~= zJ*YE;{kbG<4Y=RG^mokt7x4C9KMiq?0Qn-#(qeewEk-V@n2O^ z!cf4+Im84%`e$eq`ow2ifaePq;d!9=k53P}*8lr5|NK(_-;??O$(SK$p=YlimQV_8 zK4`-XeG-Vq`}hs_KVT>SIWeB`ph`w)kELO|W&KG69-m;?Fh&^ttW9F5?3WYif&vIw{+Y^Wv&^_N=u1(Z~7z}RIGFe`Fz zu0e~bds2oY(nQ$a8&pg6g>%Eu>~vU%M}43w$n(z6 z#Wg?tT{c*~{MXH@R5cBmN-6x2dg zrp%!SVBl~v7-L0#X`KTz*85FKQ){GFpWu*lK`lieFW^|UU?DiPAgF983j+z7@lJ+E6{qI1f_2F zi~c8kT~cWIjb?beCF=VDb2F#tx)RisLqWlyl*1+8#dSHhNf6x1u561?!*xvBBQWOB z*!*2TR9$)o-6Q8Fi6v=Ff~WP7fD-f&GPM&^0HcRVB#RHpuS{e2&R{d;E)&A=oFkeO zM)4%Qv9j~_Bq$4}WUc~k`q?woaX8eD>l4EU;nO7VCy`ba&`79`x)s1hIjuULFrIR4 zGck4T1DkCIKdyl~F(oa+m8<~P>y|56gqHb`L2uU3MOmNS(4@^dsXHdymvJ?J_ilI6 zWo!fpGi;5y^g9td2X8C7X(AJmdW8vY_BDo<6n*MD%R5I`Jh@!)6`6lsmwNiuy`P$1 zC5d~%bD9kaQ5+_kVrI5~XzPG?m^$d+gZq>eu$|V zwC&0_PXlbezf2Az7^JmCPZiCI+*`>VLg9{wSC1$hCeSHcxk5fdAwh&BYnLQRtAL%Q z{*mDz`_K);goD2IQ)-rjXY~CczzMDz7bQe~th?}N&|qe_iYOWKa1*yef2KJ)kvn$< z`a1nQ@*ERt;O(0F4BlGi6Yx^FWAeR#!~zQo_4=hbf4pNZNFyx)ijuC`uWVGg@GjXB z6XagC2+CSfS$z))*Qlk;+f^4f6;rX0rV=mlgiL^HgVPI|)1ojqCQZ-A9G(j?6J@ zqPe4rD~`${=;Oi+8F4OxLS4*UmpeL0&I^441m%c=V8IjgPP}xIn!3dhvd$ZK4zc5Y zZU-Ts&!F~b$@OOrxYTe?aAgT%HA#ufAUnO*K%ps<9ZU9W(lsEFPRLM7RfVE9F2??b zE;;Kkiw(6{uDp_|xt@tB5i!wHNESn-&>|%KpDw8#^uxVE4J34E*yKVdyJn`d*&atN zlHSXN+X4m;k`ISRb>lGhU+M42o?|J2CoSmz9n?*mV=}VV)qJp*3J5~h0Mb~NHC*s_ zCw~@qi@Iqes%*UQW34eRk(T)xUTO~%-X>BTX1+v=sDi#0VWN1+)(&tl3IdrgzC?+c zkh)?Sl-$~BNn8ngyI|8A1v%{CmDjQ@ukbhCpay;1>~opB@iN5t`@4a+PA#hjgc_!T z1`JT~i5bCCcDFaqRSZl8Wvbx_a4Q)?YKpdKjK@HnG#z@2{ArYU@%o+g-73xh7}nbu z95D9ijn@+spR3ZPC%pg8p%x*p9CC*PY1x$&wr3EFyuDOlR^fD z+91*Nw}xys@b3FN?1uaQ7<>E2Pz^hBdQ81aiO%W<-KiRB9Az^k_M{r{7L;5=XjEkZtRl-q>(uH4a0eA}%=+nPUxJ?mu>z|*(YkZ5e z)cylc30*2>;XN!vEK` ziPc#A`~KT7(NXbBd0$9WF!+v_#k51HO6=6ATUb9g7~t#RdT{o<~rC_v-7D;&=!PT1+#~!mw+;Fq`5TOl%Qn5cfU@ z)~6lY@A+A4A3&^pC*tsN+oR@JzI%E|yUX9RRY1s=+B zhB@#BQc^Kr@13_$aik645K}I@pMlvO2cYlQJGX1~usle=-W-eF5I_Wpd*)AY7GJE& zzP7f!1_5;!SeW1V5_&3N*3(zA7xbSYZ`gyNr^XNgwA#w_^9Hd!gBu{hTFye4dV2>v7 zd+B3I={*P-(*hO)3fk;d{ald9Vi8i0LOueD>6A@z_&29zZ!*hlYJjzLSI5LuxZ35F zBW^R0t1N|j9e^#3f4)02Z+8HoL~5_yh6S&J*}Zc$q=XcYhHx_`SFHi)5n&VM*!OXu zs<}+2>s2&eIbfASdlDGBoIF8D|M*oC;0!zKe}g`8xQP5epsaom(Xck)>Y62Yd8Mgn zn{Dz+9xSc_aDnvw@sIk8T7y-?9 z9-LM}AZX5e*UA8s%%)mZ?>;BVlK`0#dVhcMTev>R@ zzC_K3(RM)~TK!tR*CqO^!tZP*m`Gpiou*B|T`^?;x9eRyRRLv7*(hRo$MOlKY8ub` zSLC04QGGsCZO=b-P=`7XErQ$fcEGggLF4g<7%xvcb#`^l9~Em*#f0|8C9|82E&W5@aZ4 z{E(`$)1t-TYN>cbqvEiW63aRMxF7P+G*9TToS8bNwoE4pCP>(H@CA^QLUtgIP_d2S z3ueY9-t3EhbLPi;lNX2`HPO!^81_xhZM>sdCz1~W1n`Qa?1{4}#FcT@t zMr<6Bli~R8;IG&c?0Mru00FDL)C+ZAS!eY)=j@I={P;l8_|o^bhW(0K=+IRW$=jv z%oO`cz&0y(bQ9Dhu$#@!6bpAAOgX;;$O-Nb``H&He*2RB7L$4*vO~`tI?*1G zWBU!3j)#MympdDWzL1Ud@!(~dEMkdtrX`S@HOyU+YOO$I6sikXu0n#&RAC|S<3nX8I)W2;0<{&@K}UM zFV0wCI`l1ZujMvwRA&2oNCw#s@G7!xSqX|qm&=qlS}!(UiOzLaJA5d5a~Nq51ei$H zQW^7Z;3>7tXUTq08VFfgL$+oPg6j-62|E|^vhpg}=5g77PKq2ZHT4{%ts4?#3w~#r z-_jbvk=T(rXdm?f&RxGXtS7U*k9v%N!DCe4sACqn#Z3nAWG%3Yw;7v|ml2&!ILiEO zS@$hd)^EhkIR@Adh5vNL!F|>Ewyu;TOR0`tEj7>DcxFnQbeH!fBPWS!R0t9PRGa65 zK;M@4EW~gEJ)cuqQpnDoi`uYgQRQM)kI$tb`jEVou?3DSK>TWEc+#1DP%Qk0L>K8m0 z>n+E5M^7YhbFhb1C2vROy1PGFl0F7=o&jInP+}Z3D&}s3>?YTJfZ8yc=I$gc`U?a= zc=EoGbOra0w7LRTKoQtaY-f2YEi-83Mg%(1gP+h6JV`+l7#gi)*{QbV2}iTzys-S8(O+0aTK?YwO6a zDf^;ieg3r6&4nya#V37BZtL}y@s^xa99W3yyif8eAG^qui%i=AB}c^Sg20$ZXa!qE zmAncGgHk=OYjV`i7FF(!HSHypiWgz4pHo(hn2<;_5OakKa8vtEH%6v~tjDR33I)=?U!N*Kw@A0grMIS>Pv3Uw)C44yLf2tK? z9|NDJ+P19Ut(kEZtVQT?^V#Z6mDMn`c(fl=XWlDonz+uShPuxRfUa^&yrB#I-S}Ip z5UPH>`*nBc;%)Unltg$B&pkZB;#(I~7F4Wb#p{J}H-z756P^l?#ll;|LNz_<$!w4o zz)2u9<;W1rO8z+F>ga*(Fr;5oXv&K=liX)-YW_e}J92Sm?kg|1M!ML}o0l`2Eo=J6 z=6JzzLYv?=r_Q04xZY2rP`6T5ijad(xuFk+jImTONb@1RFCrN!IM{hF;|(UY-q>nV z2&1?pt_lirRTAey-@1{@J7$=Sct%pM)5LJPx~m-J$j*v`Am0?tHA)D)P=jRBbtcclV&^3wXHOW8krtOl2<5ZAQr(bB1mZpwgviLXS zEzB+%#p32^zf-1Am}(R}bI*~)E1h)zKtYah|9y}nbJyWlEB^kd$MIT+6-db1q(Pi? z29Qb~<|grpd)j}l9-u>CZ;F?X4JF8KEFd~Aa5?U~o)<<%Fshq7^dAZY+D(k{>q3LP z#3nP;6^SE6UaZ>{E9NK~>l|c_@F5oJwMhCj>?huO9ZxZG&>;7-3BwwAS)ESq911Dn*p}g2n6}XDEv?qukt^7Bfez(p~K! zMRs2}ax9Ec>eckDP~kThmBr}yzjtS zIsdZPreqc|Jr{+bqb$u9bXx1g(^}oS{9HpymMu7lIF4d`x2zbyYx<-p2U~Iqji75y}XpL7*jMOJ~ue&^SH zQR2&pGn}q>$G22T7o)DX?Gz}k*}HU}x+|L6}IC7g{Ed`|GHk zBf?iAd$F^^8&~pY7oK00>OU3ft4wg)?4ut%ONb$$c`TEeft<7^*Z!PUR^zgzQuBmC zifcMbJO5`~l;itL)MIyIg+5EK5vICc<`UP(9!1y*6UUmU(sV7wMH=R6qB0ItzeYD> zQoD*7N^!F^@q9Pkhe(x$t96M5lO!}vQ=FnfpY&2y4iiyhY6)~yYVuzbQ`$NmS%te% zht~baMiZ&BjCDH6#%9YoA%~4w4+YP&KaX9g2&bq9SGRh|^ozc!L|9F&I~E?w6z(cs z8iHy;q(g%TU;8F1JiVS$(z}p04hO!IsY;(l6vS?n+3!d^mK!XBZV7CAcEnxzx$#5Y+^Ebi$fB zdO7n47G|F9f*|EyYkz4IADqXR)W*1W#wBQLPi{Y$8w+gcvzO3sWnfg92@5OaHx=If zs^9=ms>-HIRei!gz46@S94(9LB+tG*H;o2Mmk9hYf;F8uf}BaFs*_wmOpZ= zP)~nW(S2dntXq0ODW>v0wIFNJ-H22bdk@g=Xz4bu5nXrsBJV7g$8hII8#4baeI2X^ z3|Z%00t0+_;}#s{kBwVtPH6NQmSs9_oVgW`tG=;*tJ}_?)+ZG)o1(B79ZA5}M52*? zz8Y=7G>>wmYH{kn2*$h&Y56%HawdwM=ZyUPpSk` zIK^v^|J%i9wJv#`7uzYvDVNmiyqzM!x#j&sWSN2pIxf;%WwfQTdmn;&x;wFl%DUP~ zYOFQyIyEJ4rBvHCZKstwz>ZvA*JKryD(YKUa|*-KeWe>{ zY9?xg>Av?-<7@;a#U{I)f}9`S!~+}!2$&|`AfHZu?NKL|EpZ+yYF^ckdR}7Zp}%s$ zI79JB@!6yHx+Ng3iJ=X!*DslHRoW+~X5TX6@dMI8q^n5%t;`;Q7pi8{+w#c5D#020 z9D)RuI;Uzi!8DZyG;^HluxS22hJ^lAn^mVGnOp*Orh>zKD&z@MrTzwA@GPDYO{7~PG4}*AMv@#9sQDcb5LC*v)PYgT; z4ja9Fb3F77PXuM-(}ym1ttINH!N&-6JUckuDC0fbCHISA;zIx4x^{(Zyb{Y>!*PLU zC@$K9l2aTB;aGBxj2+Yh8*0ZvEYsMmaAl_?NxL@5YQUjia^qWp zvyvvgyQ)E#OF~vaXCbucf5iN@jvF0Q;B#p~>bCsQf-C?yl>u>%VBQm2RO*$$9x9=3; z8vi(ITXMD;cBS5<*B_{UED)<6iuKKXXyl3VWgt96@2+#=1dq4M_X8wcaOot2@g+cv&3 z)>YZW`Gjzs&6YCy#`WXVH(mq_-feZ!%Y&i6iA?B3NO5hasEa zdUmK%!0w$IsWB{bO=GcQ(x;ZY?2!=_D&^T(C6h1aajOy}8$Fy)s@rdeQ?GsTJgxNL zj84srqSi!U8S+@;8CT9l&Zc@=1tsM8cZkw2qjia{9k1riHI}rxfOJ;T`@`1M^w*W+ z`Mqy?!-kQVuy|^rg4|S*DOa8B`9`^@)W{+(eleIKDP|DC2UGOBKxP&4GJ4pqGfP9t z`4@)W@{U6wey5zIYqRreQO9m;A@0|!O$v2c7_uF};ouGI zRzKwraREr;#j7VQ(;ui0;R7OJ>)A3ppp~1(X$s3F- zjYWe)HH&!P1ljA%4_ahJKFQ}W!t5hj;)3hn9w)nc9_bWD>2Yg$UJ#8TQmEKQ7o(3c zC-F0x(MPJ?EZpLVaHzwKdp=bWgfHD3%|^>bND|u&n7T_9{*E50cxYnW@09UjJziC1 z`A(D*{!nq0`)7LkDhe0=#j4`3B_vB64j~>BYaXH)uLa~ef2YRa4jN?$Yb8sx9U`E# zxRg2jFMspbLh47mDb+SY@2iHI5iHA(pRV(+3%+`FVmKjKqURGUK6v-B-vLEXw_Kk6 z^ou_!UYIjfrgTF%*0jQ088|js1ojr3Na-y7^*>!{)L(>u^f^CX(&}-d>Z?T)uoHM= z(dFnf8Xhn;7(+b$$LiVSvs$&ER~Zz(7mFNmA7qR1E}=#TN7nMpkG(J6;E>139f6a_2WnC3!u$nJ}6 zdibXn!Y6dZ_$bDS?KK{ z*pd9kr06Hsuk2Ep$iJPWIwFo_q-AhfWxN?u&F2}Pwg*t?+IA=24$<)IIe0C8OUhpP zzM0NMuYu<*C+Uo~iLbA|t4D$5V3{?;lLF`H3M(Z91PRxN)&qFAH~OVhIOQL_Etltw~eooGi{8^_jujaHEj%k z!wz+vzd8+%#nbrW_}z_r6n+f7*Y)>az)3vF#oPD49vLh&Nf|`l^LK7Jl;5@5Q*r;Y zET=knfVcL8>!>%dp~HfPUMGNV)528tIa^t3B9zG5fC?e~LCd!r|5m&CI8 zdk$KRYIX4w(r}kQfxZ}nA@uTD8%(%=#t3G!CAeN0&2PB31kwiA7*b^KG-}wsxlFl* zdvS61+B&?lPc2ZOp)t!ch&d?h>5ILL&zf@dLyd3y5g~+2hyBBdU;ZIHnHC-_Uu|8p zk2kN`{qyj;vx~0EzBB2U?wsE1`4q`dm|5VSG#Z|9sz#s1WyEnHC*$WVU@16#%^mAR z`=8z8<5b0bmdOa3xD<(UJ8y0m}RTA}V7!Ryq$kUZZ z0DlREeZ$jMXX@x+A$J*GjlVN0gqn?;R63a;uwC$-0phlr_5Mc9T?vOYOW&%?kr>sh~XLt=ToVida5OoF)A`ahV1O`d7Ji#Vp?gS>!uC(9X19j!J z;rkM=3W!k)=0d$f!jrvlrMz~V3X;Ul@c=s$wVe7BQ=iVYmx)0ZcVp-3iidjZV|eYw zN#=R>!OOek&h{xdM;6|N`$!+>Y@tX%>Y2ov0}3gKqmjpRRB{sQ5Gyw(W$htwC{gWS z&5b#C>ft7_YlM>4oq7@oYCS2whi&oVRo{y3y$pdB`)ZP&YkJ0cjb|<+MG;H zSSNMF8Zum_h@^~`9v+rU{PCbpbHTI5-I`XBczSiUE*9H{*Zx8oG!M5j}3lAwlKm9Il*0UYvxrB8>*TULN72N|_`R%-ma^UDTo>t;!InM4BthTwvHp zV6iq59yrT{u?MW)c?DeIEb%QKp=^DdbHA>xjIvcg-IcplkvarAJ}rm(P_gB*&scTk zYuEL=dyI5$gfI5I=%KA+xJZ2g;FX6c+dG{VJNp?Qqfh#hqs_bgZG(0iZ~k1O389^R z!^Y4u=U>aY&ur#6{YYxu^Am}F&&4jj*9W!!(o*-ak@N@q(~dG3be>|K#z;o`Ptv~% zd>hHLYL$XsZS>s+Dq7sURSCV7zHZD*JW~?j3_R_=Oq9g=6SWKlcj ztvBd@)tQ2aH%DtV^EFbiZj+Jdu_wY;2UT(f0z?xh*9f&HC3fGP9-hbWp=BPS!4Pm?4`N z)t^N3ddtX8x2`$hCAgk)w*IEEBz-Y+KKpW|=D*<-|BV=lkk5Gw`lPFVDb;|EuTj#y zn2W@(C4WTIVC;ORJseDSKi+G@WPfC`u4%UVWD5XgS}I_Rpza) zFT1WGl8;7w*b>Pm^hlY^q2HrbzGz3KYKZ6tE1xb6SQHuHc9~mAX@_U(A z$5Kc6_=%>qNg%NZ1wg$4dVUAd z**K2NnGpM+4LJ2Y1pNT#pAj~$l)r=jkw_31f#K0Yqz3ZG3@C{!yMJQo6sj3lQwWar z*)j1qVsVd6vAW{gT4>t$u_KJbD3CxLoD^=Q%CANK_y9EjK2W*8eW5il-@eu3H>kTI zHLT4+nV$7?E9T>Hv7x?t&m%KOea2peQ_}Aa>deoA!=Yx3hruSNoy8?@qpP52JT85U z+CL=}-!_~>H|Qp>DM^NDByLC{QNGJj-~9}*5GB@g5aux{Oj&a;lC56OP>P7Ux3}qw zH?VrS*C@FR8l6R4P>H?)^*~&xnv3=IIp6Vv!Js?TRil^@y0w=40N0btVMYm>f9X2) zs7A>{=C3CO=;Za>HB-ms2=V}#`B|OsU@8C{B=3=QmhTJ-LS&J6D>@dNmIo_DSNYda zi_e|_%K(;pI8zd_4+Xh8f2{?lBt@kNA02g#DGNWw%O|TqJxr5vnmZU=UyXBb&IBdF zMS#7$Oy#juQyjwNtKtK+!~tZp$@dnXwH^H=`-mzFzoO?aVU#In2s|kvGPr^bR9{R$ ze@#$3>3q3*4K*l{1jCl44XV5x01%kBnJTrO$8P0`>=HJ=9%c7BS1|Ch0#XEzWPSh)r=&VHg%4`D;%_t_|KnrY4|*(1yIM7S8CQgd(x` zK_Dd-L`-MUQy#5JXF&yg;HTul#0D8@knv5KCI~cMy|XH1(&Sl(77+_l6q!Jddvr|% zftz%<$^ghY(k}456*y{O0uVH6sRP(XegCXD{(U@f{N@L!A5u2|`cl?0!gx_VbswGs z=d6`Y#Iy-Y;W#~j1hd<{fO;02IHWvpxoa$_MHFimWPvE!97fXqhR7y#*75Jo%uB7I zL=^k`wV-{hNtXM|P>$kl3MV_LWS}rpq<#d^J{_!Xb{jn#$ve$LkjN^h3|9QrB3}*n zFn6%vaoW~q;+xMxX)aI)X|;ySWL;Yfh`{L=UXAkwSkc)JUui#m0LdEH%FASPi@hgMMBhkw2Tm*#}w=`g5dAq>p0S|y!7o1r&r=Motaw^z~F z1(h4>UZi-^=$C)~M12ph5OvXi9?WWy1_5PS)GRqIwK z9263EA((l{ZJPQW@W?qSgRpud1)M-#KNxZszSj+z^q8DukJ2(Sl!@sxModytKW3|2 ziD!(WinoH+E1f5PLPVU&)PrH=Vc*8(d(ioja(KxMCe<=^o@viyz-cU`yLAJ;rpYDr z6OZS>ot{utFkiHAuuC_UL)tpf1uxa(4Vnu>0Jj9^V!{tLOHQGqAEJv99HSL~idFMb z^NsMz*=ra64|QJ|m({j?tq2Mjw17x=cc&ttw1RX1UXt9NY4mv7_~{{9gyW+IB6EMCXYD;tuA?=3v)a(enBdXZ zX2eBtaeNxPGMGd&nf6E>8ebCvu{2fDZC#$TH#m>@a#nvXMuNHsj806xHESO&0GKc} zpCmGR7;Nzm_w0PNHmAz_=>f&Z$7nC5vBxXR-tepDNcS?zIIz^IId`w1PL%oYEhIFo zJj^gsgtU`(Yi0BJr%7Izi9!*!4&p_|RqtSio^L8uX(4rs-D2iOa^I)77L`Nl7ZRzi z--q3E<|EwM(Zf-nFn*Sph-XACmaQgTI?b{}{mL08!9)RM~3-6uvS zJ;%DNnrz(K?1Q26l1Q7Dm4?ix<+Q}RYFux)3F_OR#uhBIwn*kz!}X*$!N&7+Y{pJnfjH2 zo!%Fb$Q zqsL2~k0=@GEuKT~6a$fn;^(0swi(Ns{95v2h#%2Mz`06mnFDk~>jP2Ro|8U!2PUge zhwT{K9M5&ba+;Js_p+|(j!pv6H|-zDJcnz{Yg&6Ut!zs#rs{{Hh{v0MQMz7bGT~md zp$akp-N6&>XIps9rppOJ-LhM^$8Cj5KMGIO*8MBI-15Q}@OY(Dg5PRg;aCQ?IOEZM z%V~MDz2rwd*RVoV1S1ZFF2{xLXZ7BpM($}!f`l!&RAahp zMg<{LEf0y@cHF-E?Ft1Yxp^@*Eqc59{HRB*tY$XBMbYroSa9}y_e`L=nURnc_caVf z!_a<=IAIa56Y-&a7Fr(;8tks)9x4U>ue(dn$r?Vz5D!;-z&FHxkGJ>IT^Q5rl5o^! zht3knY|X1vrsdPxUx4>%P$L4P+?+Rar+Qx*{o>f7r^T^Fk3`@5S9_dh!m<{gl;kKXT8r-%at}q^P%$(g`5i z5rh(}5Yg>e(Eyh^rxq;57X?PWc%2vz7uo}d zaUr92+xyg*tn~XLbkcl2K9+*uTo)5x33&o&xo6EfAkoZoE{S^@c3*m&;#^Z3p9Q%| z`uE*I;bZdc8E)~W^5{!q4GqRo0@GIVXXH)v#P~^W?;$hkdVQ%g8ER*RnvDnsn9pWy zn2W@G>bqs3OC~`Y{seY$@5X)pG0?^~5nrFgfKq0sMy~Rml1*bbKb1I8F)bm4-eWuC zE**@FWWVc^r7LhUTgsSw-@`@ZU2IP)zlUnv%CX*q0VAPpw&Jso66?=v+liBCV{)h* zDb}c93qyE~t;J(*v|?*3FuD24IbckqB5KMcQig-V(3Tf+;%FgpBfaHLcKDn%S8&bb zDPESJ+|#8E7vdi*jdv`$%};rrQlyb%iWCQ!%9YL7Hol&1;u)zy?8Yv3twEO1l8fwpV5|Guwi;IzPYsGFPs9E7hgO63shDClHymgle zJDq+^)m>zGjND5t95mB-Z=T%rWtV&rN-s07mCk)j8Htg0UP(9_?M%LM24WpA`U*BP zuB5=9=#a3Wzt4`pk7o}#Q(DD1r+{7af;o_6 zoqn+h<+f86O%DTRqoAhAe;PL-vVMhx4s11xXFop?6YW!{m{zW4%n1{o$)fq`NB{Hm zIPevkw5Ko-SzP?=@5K^vHAZ4F1H6)%by__@%Ens_*tT;(yYpfBl$8V*5Qk|3DT$`> zVPzz#6=T^C?KvQTQ1CH6g=`A&?wf$P9Z6(LSclc$AL_SzzLbG*7T-^CmwJxTdJ)k< zN;S`|R4joi){T)14p0lg;divY*(D%qGi2gr0hF;ab?+P6_gDV25?n!3l`>Xx@b;lDhD-)rS6=Ec>SiBRi4=+*Z0cJl2NYYmjjZ zklerTg>S3$&Jh^arJU{rU=3w;L?FA@cNDKcf8 z#d8(@h;oJTy2Cv4mP>9RKGX!dTpkOYkN346qE(>vF#Z2AB+p2Y$4kU7u~ z;BT3vYzr_A4*)5ol}J(k)3k>5Qp$d3!hR(w&HRNMUbz7WjXS-gce^W@M1#dLYz&YO zac1U#w~ymVw}AkL8CWS%o7We8uz~yBNS)mmJ?1VL-VOdB1=I9R*`bIvLC*(*FIbgVh>yaM0%1aj`9dQk^30j*fmvWF-FC+{E$={Jw( zfCeY+=bwvY>FdP<(O6=X$95?!pBLEXlve0IkH}nmUdvs@>dH;0jQ>2x^ zhKTrNtY#x0s_i^zEq&-=OXT02DRB4|M<)8>G4Q#dI?4xcY7TTG`NVz)OJ5@PxvogC zsDH0t2P9C9X%rgVE$-1lB(v9qaROS9f*9|Tg*8j_I##`P{DKS8AS3yLnU z>X-W;&;7=BfIh#r3-OJfQIm=>8K5t4)C>B}l0}(Y!8u zX@TV*;>ydU8YE00-{e);?ntu`5ckk^0P(=PrkRW)4WW^Sm;zPR;n_4mlT;_*8Owx+ zr5MvEfi=jy#Og~SwMC%hN_hbS1!KAlM)hLgg-@m=&V3WmJQi37F06SWB1z$dP#8Nd zplxBEJ5w~)$y@W3DP8!%F9bIy#7vk0J25|`vK!W{`Hy$kgoL?;;N;!XvCp+05Em#c z052M&xy{HJrV0iq^+Y0f4l<6S0}E>oK)U|QL?`I?PqFJxL?D%eRK#wU=fb+0JZ)WT zqCo`sh#?IUhf3Q3qlzxTfm%9%6tO6gH1i~d4E!F{r$pYT@xr+AEhM9z&egrIC*S7p z*rh>Fd-MUwXWj>4MD4+4A&7-7&SG?J5t=TiK>XqU3&ySYr{Dik{$hm=4hR*5+WY_T zO^U+`jFx`R+V#=+#%v2TaW}D#sbK&Yu|x~ddiHtRc9Oo@u+~HWF@`u zxAR>mo)7UxO&!D)w<4oj5lfx+M5+7(I3}q4f%JS`mg03+cnu}4UX#@~f@DV2szdzzi%1 zvjRdN>8n;4)TU?gAe2|=raMrppOIg`pCnR0OS2v+3mM!~l{tIU5cm7J_+JNLYecB2 zn>%<5QlW!^qVxRw2{U@)BX82cm#-x1a^=iwfD;FTfbxQ?%wVxu&FVO|zv)xp+eKc; z)qBx*VdC&UU1t51_P}3a*t@tk(UAQ0`KQXN za)MN-reLDrb@6H!yx37`t}@wfC?PDR5b||V{`(@8Anp11y<>HRMhQs;j>Ya1ekuqFVhY?U3Il63t5VX$xLj2 z#c7x}@P}krwk(wYeNwLAz}b^y%%nt+KdVsSiTo{(Jb-LbujaNCelJxr5g`(hjqQm` z6M3idIh|N6P*M`z=)v4fG|>DCF7e}bA=lq7!3K9|PoC*4J=5V?_4369fEWI_!_yz^hs;>$r_vuY3U0}rNKOk=#(?X zOYe^VV{Xe61y_G*3nJ7c7C9Y%C0h#`KD#I_^8dUBgaQMNf-UR~GaxwvCaD84$0pu$ z6K>{+1F@%;FT@qj^1=vSbJ0GaY>0G#j-Rj8q;Z#qSOxQ%tZOC12a6vOmL=cJJs};W zpEeRJH$B1rl#)^XKOSOIl_!8_)5h4+2IaL~JtyQ&AKx3K@a7Sdu90w-Q?7$xJgX}^H1+ffM)y>Vwhv|568g+y|Lm`gUs_Und>*1q0j;n88Pl{Ze#BZ z#7??xvUNX2tNA3mze%)F-38yH{q&v$r0;?X=Ua1UQK%{wng>?!JA*Bx+>B(Ix`>@l zJmga%nl)c|!zCsd|I)A%s)DRe$4Ct-ew&*U?zbA(fcp4|2iJJ z{`ZVd!bdFm990cjUI_e6k72^+mSynAJ#?OG3pA<8w_Gi>6)IdoGX%Ufp&uoB55XH1 zMKk7ZJ3*HkGzX|LaoAvk3p?pwKgt2((ceFvs7f+2^+KT*n2k$9pbjl4& z`?`--k0!M@naJm`=QP=q(U9s0ET3&YYV}1bhvV8)2H~=1m{ervE|Wk{=S(7`{&KpN z@es8Pbc!_3f@&l?h^55R=TOP|9TA%0eP&#=Q%^iIL?~mw(5s~40A7|mpfjF04yrDL z1Gte<;h+@V^zf6KU2uqIZ9Rg_OjCW(T&|h5%kMbdX zS1Ou5uU4QoF64zl6$dIf!E@4vez}}1Bv!}{4UE^y8U@oJQ-<0EA3_;54x6E@{ay!U z6KE$!3*?&j=6YH%<5aXN3gmZga(8f4@Pw3us2DJ*;kR?sy0*W-OI98W<~ z0BM2+oip3-`~+p-3!*r1q`?~NVEF0z9az|SLbf-|C=sA!;iv^#lpjF42a=WGFzEr2 zZ|8u4&qLo1Wyinlgkfv&XC!2ytnPG#dq(MFc#}8w1Nu@7-8Z(BNFnMei@w7PaxUx^}-IP9^fPB>NM9T9<ZUnwe)I*JY^>|(5Q5=sL7hWZEaUX*1(CFwI{ zw`Ju3a{ClccD)<)`CrEfT12vET$dDu9G967!ka&d?-ws6u;_`uWh^t9P#H2Q7}5By zT(GFJv|g}ui3^ujcQn2UVs{>~TX6RWXBm%0c@~7Fo3Qzg2$d=<%0JluTMWGRlYaXt z4PeHw3!01U@6xP=Av=UWI%x}@K2|j;EeoHkdqIynjz;Q8VTKOu;!mH!cS2}H`FaYd z`cHPTb2$7f1c?WhO~bJnNW=$fzHmQ&$|+!dYbAav(I9g&LdXEE9l4`snsv>Dr}{v4 zi~iXd2>279h31+UF+1V5b>Wpf7Qve3-)mNlaCjw2VOm2B1{n3ZC@OZ8C+l-_mU$G& z4cgZg8a6K|A*=qyzm4QwheOcr;mrk%pBAn+#vljH&odBfsF%W(K)(GR@v*Vyxyk_? zWLLMacfM7{=#)xZHV7eQs0Erp^Jry2;>6$T2v~5ndLhG`9^ubnqijN26?}jzWEuWm z4}xJ9Y+riY?E+h$1)jxypnT&cK1q(&1xQ1hrAH)Ke`*1=9Bg2?GbaoK{Y9M{g^$R& z5dUd_ZG~rqMvxmNr^eI{=`i9v3rWpiFf?EowA?m3vV9UQm|~Pu9lARxz1b+b zCCs|_KD~X$e=im!O0slt0jOVw2wt0~GL~eMGMSFuZ0&huvQ*66n_LM%CYY0C4@wCi zLGAYGjLu)X7VI0o0oIu(I5E*WuIQIXRy#+FDIvX>AtPf&F<0gL;-IBoJ0OLFq_O99 z?*mJtZZL`#;m$$LYDVl}3J%YFKf`V%Wvj%UQzB1ufGWIhzl7jA#DLwMCKN9)<0QB8 z=8WPW$e4kSVNOug+u>cyN*$a(>5_ogf}zcx&Uq2~z;VqV>qV+RHZOvdHq^GDs@xyv ze<1jWN})n5{&RIQ(Ea?PIqoKVaDfZJ{4%7lx|2f6$@PE@oSH2_zYRSN=4Z2N2UYn*~#W16ltiw#mU;hlhI zzg**89Oq3QU}rN5U;%|Qzp;G~$MAGyf`Hwc=lAbGgo&1gE^h^5YW(Y|aO&Yv$%~ld zV_|U22m_Vs{zd4|^TyRCK6@A60b)V6YVoL8kXKrH zE^1D}Dss;*fX@rr5+k>WtI^HR+tH5fob6C@OvfLAk{p%qCAJL z^?DNAbCDlDnB^5JxPv?wZxV9T4~zHL;wkj;nZH~gmDjbrjnHshT_NT-8ccih*n1D? zN19#a0!^ENrdTFier|&F?%&evJ07>@sT+-0b>PcGqv9TYERIgwJORnFH33W?xx?r2 zgvb-+!R|9&)4qRAToPI3Qj@nxER(P1nY2&`@1eYFRuhoSJiRz-rsXb zq=TfAIwq0dTLC|^*NNnVr0l{vnDRkwcls=kw!sEVQ}Z(&ELHq$?M>|q4zvkck?o5MF>AV&f5M! zRiMRi);sVNBGdc~_iyhib_S(vw&E)@+fzttFc_3Brx%`vNes+8C|ly};_Qacvek66 zbRAT_@aWFcM@(7Ta|K6+Mlr#&fs;Kx4-J71NT0Q3Xi>S_iSNTH>YCPQuJW`t69$bA zaE=d}sRNW&L+G&ENpnTQ24c_AbmMz#iT=Z1GOlNGkT%uRnl!XzBS}hTp7cD0yMs?1 z7DIN)X{k#PG={vFJ!)SThHy?8riQ7X00VBd7-5zn8^EAwGwI4wg&@Z_nM9T zFX})^LWKBf&<0o?4CJ62{olioboVyA51U%0=pX^TnHVwm<~#H-e1QaSXi*H4xCQgQ zAI^$E?D$I8lU*lwbra?c*`bM`e9NAQlmTN9t_!(m+&V9&7rE60qY$g$UA4a)-;#-P zHd2Onjsr1wm086_5%7>et1wnBWeL8w)UI;nYO7po^gKAudhpfUJVT%ElOyjg?^S3c zFCTfSm?t@n{<}pB5qgp{JJ6}}Y?C%KU{;2;`ay!9)VFz->^;rXU%PG>wh`n_rAHyqEppv?N3}eCtx^#L!z<$>RfXtL%2S7J2 z#bTranP^b)#jKx|3G){3k z!Xu8yoIHmwfx-|~4PQZB=j*Ud^!w3D-GYE|+XR@b`N3*ocXYV_jJzv|aNcMEfi>g3 z2^~E@2hb>ec|nfd4I=KN0$rY^v!*u&?_5nLO`-k7kPTLb8j4plwEPzs1@6ZdGzC6O ziT}K4%V!Hw4ps!=hV81Y3+lRi3$94EZ2<%4f5|=qW)|C<;1zi0ija9nj7O2m;1UQ# zLiT<@=j;m0mYVQ-qnmw^Tj9J^A{d}GZomOrBJTlUmM_x2^zY>Y#X%e(XC~l` z7RD1$jM*S|h#ZJ#)IJe#vJX0L3%w!7g+R*l ztAkGga|fMQ>=UF1jHr-0%ko{dBjIsSf*!=eHa_rN2TC zA!jgOFkZANmauVXFsE*ZmU&^GdCR0LCkq#y5f%8-ZdBd>%G1OW2+!pFRDJ9`wu zMN4BKe906L>`(dO^=IAwv@;sjBMM?xRp?oy08~g_5%cx;8zSqshVw>;(1xJ0As=m) zTVI5lx~c%=nZ$~uE28~k2~0fCw+hPS7SkAhQI15h8CjTA2ANOhULvN4VRvzhhEmEf zY#wk**JVr<9pt1>gOzTqAO_|s{+JS`>z<4UIbGJ0^v|94hW(nB$W7DY>0vX zVrP}-;oVVPpDmZC@hHCX8amAO3&8Uv*Uo?TLhP_7J02-Y0rvLsQ>WvK3i*t%D+;jQ zXT|C_e_QV_lY-!P?k+S;dTkSZ5=fB92X^Bno#Z-=X`@ykty`v-=cGzM>fqVH|NuEUM87XRLx^;Od9u(LJ9Q|~424?M)u zqWTg%m3xkKO7e!5%C5I!E5Z8lZO!^I<+h=k--Ze&34D*hVz6)urD6p(5AiJ`HS5ZT z+XK~4KS|Hp9YdPfjBvw>cqZSTB_peM;Bna~pr_VN_#HhdWt}SclH>&Hf6AGh$hoI4 zlWv2xgLi9y?$3w}O?_E-xDMWmFTp&h&ALaH04l~3IV5>X*#$+jTR>Q`;LA$vi@aRxM*r;b_hO0w-ybmtgx|L|v+0#(o>h=XO?$&T zz5t^t_FeK1ukI|VpP&$g2u;Km4+sp80;qwJgUk?QIlc2$_w>RzJrR_geun09u8z-& zpU3qjTAQTK9E9aONp-pcQ3Cdm9eGEE0pcqp(uxh+{XR>cg2LRq$|^1)p$)t6@<@Q zHhGzcs8{>|9mfzfxDXhMP5pu9mr(>`J)5)iY*pzSnI1ScCm^zMoiycN0b?Ao3xA|#e-AgxFMX1sfq-{8O z!w#P_I8ym4;Jbmc!MW-ahr#U4s(2VAXn(;>2(+F0%exq@ZUXu;s*-8xNF!?>-%wn4 zm6h}PW_c6T4*-J5K#6HoAr$ms&)-3+w?P7Dh7h1CXFJYQMXgk&=SvjQJZN_RLXISu zFMP<{2!?@f$OO~Bk1;y_P~hEVFn%g!R&idrag$d#1xz2^@R$dWgH+TX5%kZPQc>`S z;`t7B|7|hQ!6BoU28n5hxfmB{AwzGqhq&KlH=8?sD^X2vL~v?48{R*9gZsC={=a@? zN1~z_d!hRqjQ>YI{YPW~8>y`1(bK>7oj(`NzrzPC1}s!#5|+Pha=+gvxF#sfJ^J?_ z{MQA7g`_~ulN~J@Tay5XT@nuv3=T-HUMoc8v5%1k zz>RhQhrd-FQNsTWU?Jf<5n5Mf~LR`z#hIub!5-<2a zA9Tbh=UdslL(9M2$qt;LiViL{8XKUduo{Iq$7iQOB9+z888`?92=nv<4A4-}CLbSWvy3I?vi0u$69tAiWm;n_&q(YR!Nv%8f zwV)(3xG~81Fapx6JV^9`ykTog(z<+KUH}vv)=MH}Fv2lug27v0*!?wFWkh%%b@6M$QpHG<^9rT}o~0Ox0R;j<&j-ACXND#JGbBNI2XL zR1ZaUfF?YJZ9P3W;xQ9;`Y;Jq0%w6%mbViWC|w@7Dc!@y{54EUQt_1)QV-O_9f0OPB^>NBEq_uTrdg`FX4m+H9&)qr zz>xNS1&Psq1A4kP;g-^oNyWMwWKMuqvcUZAdZ;D?om4b87tpW@i$iprfDo?DeH-~a zJ~|*=+cWPRaJf9wx=Z2lCPDow>g|M?W}tGZ4eh4uQT0~OS?P)V1VdXuzZ}@^*4KzM zcp!*l*J+vOWn13F=^G!-ygjy$4(y&#i7eOHaI*dq(kI@G(I@<*zx{!9zo0pYLzCa; zL;4}LPWp$!nZ3vy*G~0oE{2l5s0i9Iz|+FrmK&D%0m>4X37~=)5uP)6nTj}84{bH; zPFsFm47?;jyu2nvhC49tdx51BO?>&tTD{3Q&4|~?s6ka}?wi`-Ayw1TwvCytzRhxXUA1gN2WF6$D@`Bh$7MfaATH4jSV|QOdIa$ZlLqT~xmyH9H=ZFrlN2p^ zr~YKkp~*P9WT{d8=SU=0=Kgxdb*_YWz(H~~h$j8Y@L7JS7pVGZ*DJXx(puJc4x0t9 zPx~xDJxI*b8o2ug^2*?cRcq4b1)X$%8iq=d`w4)rhAFt7EH;DqUb!%GMz7Pm-T@H* z+DY5(!mUl{Ze+>bQ_^7U&7E+&Suwq ze!7oS+3}3wCvRzGXd~Wgd;e@?mzZ|*dH3DyRx3pt9G}EcrF{fodxLBwUV02Yd$PXKS)E(_Ypj}Ppr|+DIvThq4UWHYc1h+7-QDS`{qe}0w|N3` zbn21-uRa0LdQSwM3-O;TqTa(zrZQesH;&?3 z-JPL2hO0HvfJ(ZXQf|Z@*3#rnq5h6YC{tuYMGv&NYBn$mW^_SRpL*m#p2|QPV9Rixj211LwulFGh5tT2A z{ifC17w~B5Kk!JreUW{AseYtO6x`pwN<0^^M0~|D9@b;#()TM=ip9A$v|AeZ4U*DI zk%nY!Sg>CPD|qgMh_!JQ`FRqdTsiktwAjxeRs@f8;>@_>Q{sA~e}T~p}IB2L=&;~4s+5!;E`1cL=y@0CLl793W!+9$;py0IiePKq^`K6SN7ta-`S+ydpE#<|7jY&db2X?oGqIr>4T%V&{waH0zH)c z?woqnDH5?wIFZulRJIJI1mJ!Bn0DyT7FQ{(iC)AQVC$`Sntol3aB-vk#8@qYC-~;rzUvswWL*}G##gLaz_R!BlzII`}6_khR^UGr>9;cu32BUF}39 zyzt?u^?WF;s9f#hBv|kwu{7xYP(}dlHfp!@+jrvhqkK(~a_8?fg;@ud`gbN2*9MIU zy~nP8%%Z+Mmm6tT^y8S_*?MNjsqOAs!Z-_>I3*_~uSr`2@r^-8{I^(N>6d48+lHcN zg2~r1wOFgs?mS3Nm9>64(VQ|b!!ahdDdcxdz4Jbd6dq`cloCnS>1%*$P?*9?jr&iCY1WRyE-EHQKR)~;1b z`kd|U>QIJR&h`9C1^wAwat{^il!o4-YFzV%Ykg&rIg|QTpe=rEg;g^3b&b&M*D?9{ z0k`x>B?W7|p>zzz`NoOfxItSPc&C$M&f^B=r;`cJwhf8VO2QZW>s+I(9lS!@8Elr^DB%bpblK)OgMJ;@^@`4{VYv zUb@P~Zz$1!?_pCbfdVeO=`7>K_rg)ZlM06J6PKBg1f8DaxDUOSv-|;>cLYo&N`Kn= z9Xj|d|Dt%~;hKWGMiy90c(wR>bm-psV8uh<;1t>7z-8?1KcehsmHul&>WSmJtn#l% zLoRh$mnGNNufw{+>1S^{M_98a07C4@WvzGVCF`Lp{lqVhQ30m!CId%ag{?uQGqsu} zt81=$J6CXL*UQIq(Mk+;PODCVWl}{;AL4p#WOOG6;(i!VrWkJhh;g>nWjz|d4%73e zJCIR48dtqu)=RwX`$Za}n7+rF%R$NVoZ(>(VLmd~`JF^H6}wR^m^JcB_8rZ0PW=7X zMz=xy8o$PInR^HSliR@!4zC;G&#&fKI7mv-uP zmXHiW!%80E=rUxS^*tl?!ns%m)ltFLcYYi)c}>81d^x{Wc60}{6QK8oDG)u!Ed^?( z7RAGkv?tkgMhH#T!hDHeHzKJ=ol+}KEf*YWLG}p>Zv-Dct+nP=&Ur}V(EP|tGD~x_ zU?bHdg?K5w@`(knQ$J-O7y{t?3qmZ*) zLoPkj&zi|t#utVW9l1HuX6B61+% zx=qngV5y*_{&Ex(-K$;E}{`!ii(;WT8$rFa?CFDdIo|(1@3XxN8{lxf_ z(MK+sV2K{-fF;^?98l`&Z7>^=@2pd@TGYo-xv*`$U3?fV|5eBp1?3tSu$kUj6}kHd z-GW29kD2+dNI`m>ji)e8?AxkGC|p`44T*vuM{NReB{^@F3D>l48;Z0y`0 z4_J)U+52{gtWM6bTCDVj?1-jPla2?fwKp@7V=}fAH%dbBXBov#K9sD!*G$t%-_gmk zZi_S>(&j4<(yXzLzpZL(;YvhHnNk zx&(Ol+h$j14AxnNn7Ai6VOxkqvbpfSs!SD1b4A|}iQI&I5eA)uRe!v*@>LA&Aa=V2 zNB-Q++1BTWe=a;6;-a@u4V?^Cq@@z>xRVcL{JOcg&&Ssvs3?<=6TSA$D*$Fhq3!xfT#AcfIq zlQU@6R=+LKn5I$4HC*Zpr_FA8SBtplUk_L&CYuwbWlpD7s&y{Ty}6vUzPI4O82tU8 z;3qH=B#N;*79L~TA&=@!orJ;V$o77?J%V0UUm}9ehntW8e?AT( zvMyhg8n{ZNPqp>x8~64G`UYe8LAoRpU4Co0OmJ??*q(PsxIf6JNC777f8B)a8+aBK zR%P6d_wZjNI@`Pssq*2FsdB3QucaT49lophUl(Pe3@&_E#89AUbRVJ65WMIExEzH% ztJzo1oJRk30qCJI$)G6xc4a(bCp7#bRd{M zJR=R(x*h*>;Q#z|jzA6zF}QLtKirx0TGcDz|DH6cLPqJA@Q36e?IE;Q5`ST%b&ZF_w z$NFdBBlqDpia%)476*bc0^?lTC+&sZuxOpdA_b31Wd(bP$ANju^6eJ1I$oi+H(#+~ z>{B64y4yhW78(7IUl{VQLN6{19yVVoNFN(;5~*$*t(pt5Kvn5A{2tS9G-FJD-vY7s zTae(DeU8=&bg#MURioMRUmF7iPpP1Z_X=@^B5xuwqpGk^x}0f9>=sN7^<858ebs22 z7q?(vNP@pQ+wxQwv9LOT#xQpN##p!x0D^zZkc$28rIDlhx&-_*0nutPi}e!_J>UVf zWHhGn(R78M@d`}|7`+IZ+kb{$s4UfOb(ct=OCyTq*HpJepMyv74Y_r`p<;7AQfM{I$Lgv!@9>4WTkC6aca z3$$=;Bq!5);&6Se-0^TN-8x1x_uXxTFDU#6ve=TN#uI{YXBubq9=82#(2|8UBUsyb z#T;d>^37<0@Ls0%0Q{Or4aA%8h}&RF!#1$mhXr`m;gpq^Nz`d$J69q5<_s~OiOura zEsFI|TEOHg6o6F5WoFZrX$F~+TZAqr4WQicdc}yENWEBJ>KwE^PO|}iA&T9HgwOY} zA3MhQM@6}zkO=}*+3bZLWsa)XBYgB&StP=NrClp2L{`$>?*ngBDl zQs6Z_nX6p90oZxA>~GJHx6L(OFH0c_BwMfdNY z+*cnkZR7|vG*7tQzGDKQTeD$^ zNg47g%|@wW1JHVt9Uw682(<8hp4lguOR5Z8{MlqcooTn#V!TGHkZVE7bD>-} zUwS}C4^0VcxC!$JYPr@D!X9~@#CU3>&ac7*kFZa8`>bfU^X1oejaA*LK5Dz0$f6@SfUZ*o~Lt zu-Y#HFt!0u!BX%VX976QXZSvXgj)?&1-G)+XrkuhF$Y2h;l~Z;b(h1D(HuoaMdT(F zxu{>^w2Bj9)N;=cqp^g>mXeU=fvU*B6`<;>hTvsCEDE@8ud9421 z`2>J16^lo1zk#~VhK?WTY@4F}0E#0QGH%WsiSS@OB%L5)wg~Xz^R&=J(cMA_dYgC* zUhjtj43XDH$b?h!s8EAhA+R*B=vb)$)vyer-9*3!WsKuqSQFnz4LuI&D$cdmbwt?OHIyp)AdI8jGS#} zo$avg6^PVs&p=f@D6uwE`#$S(K7Mt+ZC-59XFHUwkQa0Rq>}U0^&nXxSV>BGG-eD4 z84?a%u%kp%rK&bs}}kM2NLF?0B`u?mhA4&3TlxjIanMP89&p_E92foSJ%?3 zmUY+U$|k#i)Tp*E>}uQ`NcEZqW`zt%Pu4|0WQ%M}f@XvT_sjuLMcKDDUZkbq+%O3` zehBhnrP?j3GN3}tJR_3;Po*bW4 z*D(-|s3JN7Hq!H*&W2~P9Hv&Vcl_tKtSlU}6E8q)s65TO@`F2GTaI0$TfdhbAzT0m zkCd~(8te_j&*D|g7o)#=6A>g&{iBGWbqWVx5;R=sa}+sYo_%_0NTJZz=jUuoQ4)O* z?BRB|kH%Y^?-yNwjZFehq^3{0)MgYR^exG==sd?2n{U8~REr@>?T3(+g$hCqgJOPM zmykmI;@)Y49zrrUof0am1f^2`v*?b(7`SF+95G`RMgB1Iyf>&I|1dI?b=48Dk!5n| z_(GoS;+R~7wU+NczvD8uU5>MRJxldT$v`rKQIlXT@6GKiFTE@s5A3@=2Px=dupQl$GF>Z6U9?7K-Z|U1wdUUAt}{Cq{?u z?hmtk>+4P9+kbuwHcwJ7Q1#t}IsyY62$`_=J0li06hLo@C} zt*Y`&GrUW6POgcvO=`MylCak! z5Gma@R4HUiP8=SW8(ce#6pK$$8K!Ipvg{t>j&)$hvnAgo6Y4^g{x(lju7B~saxvN_ zIMhpD`Ri!M90(_o~!KKvw}FG?iW- zfSBq9hMWX`92zXsSdz}bK4BB=b2hg#pe+r-YJK<}o51dz9== zMT3%=y~;fHK2}o3F(Z4Gl`Y9$zxUPs+|RhH@9+25@1N(nt23^1U7zuOzt-pU%@8() z$P#^H?hASoS9%-F3I5AiUW0(}77v2D>PlkOMLp54p|b^{*BR&d-9z&=i>-CvW7xS= zM7p1SD%fEVvGzZ1sp2K7)l0G55iV10t+hcC@)9?@U_{(NNiVoRH0=6tkoCoJrxjA| z=PxTANrE~|8bc1FnKw~eV3XjR+6@bIcevMki~B6`elTA_^{E=c28&Q2lecBBNy_Po zV6|!^D+yGH+WZx(k@sX-xARVTD6-KIg_1{(VA8s-;o{z(E-A{;` zn*y-J2x4!@Wagb<$usc z{YDwkQdkc1bJPCz{qS?8k$%UfC#RYa&KD=0(UQpQLozuF8m~K8W>(j=TC_J)s~mUNj!|R_lok#beTqjg3jZE;ru_DE-n> z*OL)_@TBp)Zr^Bn4nt9;UrpR!axY29wCyd8YVxdfx(G*y6my;z6$@8jxp6c7s;>Vf zqm44#j;bFF0;Iownw7WRdp*K2R9fCCjVv9r&I}DlqH)`C!qE=~=afin<1}$yA zynCen=?za;ruJ94A(~~|u}`F4q0;}n;oi}Qm2V}C@TFNxACd2pV}D#=`Y};8o2ml{ z5Ci5Mmu2$|)wEmZHE$g|{F_JRK`b|ZzTy<;UWBhN)1gk*9J(ygq$wTg>!p<8K^M$&YX$=-}f-{ z66u=fnsu;gh=^w$KED6g3q&&~d*uZ+hHwOj9ifdpUp@9}>j(6Lc4U*YJtH5U={n>+ zeb@23QAo}gkG6&Jr&G?#h-uK`_i1=EEaCqInoRP^tVD`q+&Nl><}VYXU$Z9t(rd$B z%zjhJ+vo#^#^$;PS5y|6fG$#tG84tHYM0u+VZeE8Z_M{r@tNxUZffs;b)#(w-mTUi z8p=I$t214#|9~h=Em-rSL~M%4k8*0ph}v`W@wM`hUmi~!U}<9b<%j3#zBM^L6RXO} z3hk%ezM%1&M%80^*hZ~4a-le#Qrj$cyCRztVN~8Dyz$I)2$<6xDO3as2%I(}Dx~wf z(p?Pvg~CE%`Am1V)4;he z6U`3`_){4)(YR&v*79ZS-02^wnz<}zW1dbJzSH>yc42--;+FUsDni}7_z^MlsTP@o zM-b$v!o}Y4qU+sLNSPWz+xCX4F`Tb;%YQUFiHfuC;)MslS-CE<1~xF~`87{cuEC0p zPlcA5-G#`Vb~&c0>%e$obE>UYR~eelpUv&W=KF9`(+2uUIx9Eu{<|6yg+&zAd_ zyA1e+=D7$2#3A1}tt?3FCi1K{rj+H^h4h00vZNt1RD>an);s#p=duXusEE+t;D15K z;4@3*BlxfLaZ|$v{)yt-pnL^~%5PpOmx!+QX<$A4!X7~&e*G~}G)XgO_%!&Puzmc0 z?Ge1*dA$V}py#>&OaL110zUQi@8edWKP(2A0(1NJ?Y^%XZ3h%3){QLM;=RSeyNRNd z^Ze}>0s=8g3^mumtZiM#K`gd$$7T_d!Y^)4MA`UMP;9$^buF$)Pq;vTgYZ;AglblT!vm%rTFG-eita#+Aaxfk0eJ~=N)7eHF7CxqWb+^JM{Pl zv&Dl3_sx~U+Z8UQb&$DD+fASmYy$_C^^I`eLa-n7K{sa&LgGz-4c;tJsW;M{huLk z`4#s9jIpH+8Q~q4?;esB0WN~K)(Tr3t?R#6S4d7-L-D6)MdyvVVVs z9)OwK>r*PWSNgLGr0>2)Q5@@mSTTj|&L11VWFeJqFMvJsc-Wy^jGH6J^CCl= z-9Adwmg;|l6-NHDd8b$xWE)3WfjT({a+TUBv+3(2cOdgmzT+FwsHi<;K-4i=kRQn4 z5Yt*xQF-eE;d=Aifv2Q!u$S^a(4y;8l30JuRhO{Q{=K)rEI8_UUOIw;A$eR@P-)&{ z6{y^m5W4JCth{ykw`dEFRejXLRwog<;*FKcuU>$w!YKBI3Y$rg`aJszgu-UV( z7w>x%JZBUu!HptP2-qi0ptCK2+<+mJJ<>x$+q03_dU!kVB)oI7*vwK*^5Tnvnw z$h=3PbSKj0AQW%~KrCMuj$loZNfntK9|D0{{dxc(EBHGe+shhJlAe{WTX1(*z){Y? z`6Piz)q|x+HE{$Y5ze6=HHPt6@Msm>wGVhuv)1jwpMqQ+_4g=|q{f$2oIz;gKRR*w&bn%x^~y5eY27eNAKf`XVJ^5IK~Gw#CE)McYO%9!QA&dS{#S6X)UIV{h! zNbC9(|F1$8T_vj@cP5jaICVd!t8oDj5?$u3_xjE@erBI1hzg%ETqQCcJflq4Wlpe@ zSRJ^Z2~%qY0yTS~PMw25wtRt>3pLnt!4b2s)?i=yrKW;ShvNtgdfV0c!P7%td=a8= z;Q;GwMgTI`MOqjvl_a+d%09;OyQ=NQhzzO>8V-jv;25@aJ-3&OY{%tj%he?9J(rqA z=Ad~ilzw)s7tZFkj1hBmxI6+lUrE&Hy4gcXE>|b0Djh)4*bB?)fL)E>N7vAL<%xWP z`hnx;-l7U!S7xWI+f$s`1uzkHc;`6lzV2chJrO!oAe_5Gt|9BZIP&2on*!KN|5;1? zd>Zk}#25zo=XQqpE17rnUjyJ6$?xyFI&UR1aQ8vlO78S&PXNAn`{5T*2#4Y?OuY`a zeE;}x7H{rCH$a_b;BB-7n3q>ubDWn(M}5x)b(KSi6L((5WqWh=g=lKnb&-XROLlwT z@?!}l`i{lcgIC4{Xx?|Zskumr^sA^)PB4kPD37qDbBq7s3Ui3xd~X!6;|$^= z7+Sx197hF)b7My2WY(;oNN{zO#EUjOQ50}a8OF+bLs;<92jONBq7D_w3 zL^D0FZ9mOvqROZbriQtx|M{-J{|kMXJf!(kx!R_N2rlWA7utbrvJCSn({25`Iuv~w znz@Ovt!Aat`mO&`WbYyVAcM|DmXp5kP zRmROE-~QfAol^ceu?n9szn#Co{Xeq^eeZ~JP%IN{udsuK^};&XUbIG6Mikc-4=bxW zurvjk2dHI$bRmJ~x~Di*YkOl^m+gDHTE>1B+D;kZME}{aC>xN_c&cgV=wH_A;xH5n zF8o2Rm8o6K%ft2VDnJD1p%{I}ABgieLSXeH*Ay)~&I|rJs}J2o=Q4#JTD}+lcrWY6 z^GVf4JSpamG|SlK>rUr6fnG)rUxp7#pK7NFEn+9=JKW3^Tl%HPBO+<%i068H)bZvP zYbOs}%ma$7JwZ>eWQ;vx86SAXms$gK>gmAna1+k@EcEx<0EP~M;0r0~QFCT>GQYzX zu4TT3Woc0D^V2GBtpfHp)O#)n%fTw8Kj`@Gza%^|(Sio&63R~6rzPm)KVOZ!`4-qf%hh{R#hka zb>#lMhHHg@R;WJXhog`FaZP{yF`fhf5ngYGP_;i5)wg&65a~>m|5NqI?hOp;jn6XJ z(BFT<9&c5OL-LA+?9gKW{vze#;Tn&W+{Cc{;Ubwk;Vb^?n}7Qe|6gtcx@%uBmE_hp z)oiXmUdSQ#*J!}Mk6vXGbHKU6qpr7xAmJ-n^e|cfGkgB$zh28BEr&!PE;K{CIExb5 z6@npqDl~nWCuquUs{KPGBv_tVGEV+w#{Bh(Xikzm&Q~~phzdTdy}2?sh|j!TQ7c|- z*~`yLM*xO00dYpD%r+{U`1LMklXzvFO}y?sTYQbWG-!at2&eJ^Qw}MQ3`ax<#t|BxG{gXUyk{ z0xt3n@3_EZaR`8(ddcC?g)Q{RAbjY_~)a3o;vDF8nyGVjVU*eA8 zUX}w1*TRGZkw#p(3fBG7@%6RQ7jtmvP%&hq zG!nviW;Rx5dpqESn0@`gxcs{?C>r)~UEP2j;C$ekd!gw5?CTCm5wkspFDMl>F3$js z-HepI8VoLQfDq+!KV&Oej@7?h=}?mF1=*VdD?coohvMFXr)RFRZi9EzuksnB62}Aj zz*YH#=A$w+hWF0K8Z=9|Z6jr_3LW#@3kNWxX(q9CW&?B9Rpz+xI{9V5=>$)%s|t30-6 z;~yNf-A3xGMY2hw0L2M^Ch?Uocwyr-quU*RSmH0)ID zCV0X_E8VH|&0AOytzDNDdRZ61@Ri?V^T*U^t?0xnLwQU+dE45oScL}Jv-}p(MmQpB z#1|YfTmT$gT&U`Yb8zlhbm4L%1WfQd(BH`_2&q3@LS)8iW(s-U8ljL3{# z?KL^6((A8Z2F0yatv_8>Wu7^g#d=5u&h|LKPdV_!_mO@T0BPKdObt zTuMpD{~o+S6bu5!{$)3l6zD86)fwL8LYS{9Mje=u>`qv#c;SsTfwDDGtW0%tZ>Sn z4KRLb9GlMIjrvhB=R?7Jcc~v@Ml(HkcdW%j7u^6Fc?vZ!3AhDV5((I z?9)g)nJK$QDc4_QHG`jlH1!PMo6awQm+!@5GR|BF{&O(&gjOu@q3uJSyY6LRT`JP_ z_vva>+JvWSH~v^v=6Yiq|0&ps?K+cLb^2W02Xx~28*^za=x58Nfh6Wgjq=}^Grgw zmN3f6un_2c**V=To%L6k2i;?O6Itk0khAE1mLfWYDuThU?Bor-F#aViug+wwJ?gE% z`q@Al!4m*>q`5*BJqyqeRr6I1)a3i;gr-!>T5v-SlCg#cf6I)YV!u!UcBfvlv(cY@ z@Z@s4pqWVKHgoUpw8Q%Nq>4r?uMT^bb*HDqUh9yMok>WhRc7^1@Ucek?$G5!J z6a1LWIad?5VT@d)epS;M*426R3z3ebg7^Zc7kHVGWZ*I`@d6uFeJu@L zcCW0r_Tc(2CTA-Ktb_h*IC0Pe#V)fUnD2$!6F_ozsLp?d&n}QX>HaAI68bf1H7fyq z(Q#&}I)KUyk6Qu1q6K7_K6HlsK&-Sa=Z=(0 zAm;2IJA(xncXl=3K``?_)q}UrvHEm=+G@&#_{_*w#g&ClP)Gpy->^)zqO$80>2sBE zt&8$Uh+2yNt#54dh`7KTg>6a8%C#XshKcXI;o6pDSvmnGGlI$@9#9Yc|uBsaxTIa#7RMJ^ONE zd||I!roD&(v1iC4vf_~^ns3{UKghe=>H9TKu%rCw+fIc|w_tDL%)VWAF%2SBECtuM za_d1+NzF+NT_w54Ol=aJAeJ$cWoeGpw}{2RyAm;wemGm+8mq(=wU)ZI+P8|zE_x;D z=`ksjM1I8rW6!TNE-FaH(+Du!81a`rKDNBu2x5&!a7sOt$$MpcJt&UBqFAA8iQ zymEOA4%!@eab$I={^e;az)Fe{OKb+if*A{5@OF45*Ph(b=VEwtJ(AtYdFKmXaF>NH zQ`YQN?C6IMJC$>tB5LTX9aEpuf*YTzw_U~fkT*p;^qKKKQ|t{%ubIvNIuapK0|Z)W z`~Hogrii7avhy+4K4}MZQIC%5s~6=GrxGO89P*e%Ba)K!ah0u{qzRwjP@Tybh85~_ z7LBqpf9#a$2_edby$vqec@%_q3gc>|_{hu6dTL*8xJ+IR)ssa%wZ9@?{Y}54LC7;m z-g^jC(--&74Zn-B6ci8j{04AQl6zDne{|<<=kcQ&c&Sa$r7UNRkv<8vI$=pJx;b%! zBu%km<%v1$#WAi~PzSmy8dZRE%8K?{*JHw-?t%~n#*B6QTh50<8-Q~5-MqB@RdV-K z;0cQ79{*U0E}lB1M^dn#!|m;bTfTWza_r}VO;ys!QO`m{K~Rom1}OKF#MU>daNU#} zt)b$RHjh&*3s;wBoH<=F;de+MuzU=YrE|dd#u8J5IL(oWr}OMG(Sghq-Y;F_o#Y#- zTfSk4N^_Bd{X%-s;q)80rk#&TvVK0V;LyznQGtrYQ@)X36n+jxi%!y44{F%8F+8zT zYwkW_tUY%53WZKeS5S2UD_S#269OUb%$g$O6gHff4_g8RN>M4ad%I?x!GD9OU?_dz zgmpg1S2c+1TQ$4~)YgftVGN&4<2r|^bFA&AQ{I?hc{V5Z_Epd7om5?^S(~cMzx`$2 z^J`g#zcem+my~b;fTBj-Qp#q%YO5GcRbf$tAOG(z23aLG^z(!jUwh>dHxRB_4_CSm z^efr(J!tOcULFi7OqQ1(rV;EKUnF0foVYS09NQK8|eN2sxX{7rVVLO;g1>ZEGs z+P7qDXXmJ*9u4#3ogC^DOJIthkomZ?)VxLCgr|!udf-DyZ#a==DYJITphC&0!a(7y zx@4!N2Y|5yom}KTNV5I>dqgCv<9i=>Zwe8B{C6{szJ9EJnxFQv6Dnj`c_OPyN5^~2 z6!W3_Ua)&+{5>b;ix(7@SLy7l=TQ~Ug%nVp`_Isxx#me?Krt!yn%fiGz7Z@-c&Mge zH+WT%NJ;liydOb|{aZTq6>_KaOAoCzQ}{_M3(Y#ZI{Vc){G5_U>z;kZRL?|uJAV*( zep1F;D|AsgzPo!T=Him4Pzcq9&t7e!=HhB>EL$YqaPIWj*>U13{GIqnKC#2o5xZB6 z?@w&$!(Qrmw%1J(6>#+7YPF?(_jSq8g_{l~#7o(g8|f#@+ghLCG%gE#tkiBEj=!0$ z_D*Uh5imNznf~Hz4hpTf7ph^b_XIwJ=J3Do0{|xS@tQ_#t|}*$)n#nieCj>`nat9roDM@w0l7&*L(WKB?E$Om%=)f zz@N==T#sjM2z0G>BcUA<3YeUqFx}PnAR9Ss{|O@hnp8QZysbm0p-<4&!MtdR5i6Ug zUxcl;5Pk_^V~kX)aF#rtOEZh%1e5*T`<&8aRi zM6pRq3_1~+>PWq$O> z2}zIgLARx5M68SV%1uu2*BU(0p|=jnC?qqT8R3Ypo_7t`eh{ws5ksv>!7;%Ygyt)7 zO4!k=OkW?jvr|zGYp){@Yp?oj62(ePNG=c`Y$ z!Y}+Cu!(6~T5qFnj6BU%1PHByn3MLL=gt;R6OBF1#h$@zUZ4(Wns$t*z(tfoQlV2a zwbg+Bsy^k4{;JV!i%3q9Q%m~m(XryLS^%rgBK@3DvRa+)n{!9VHq1}-N>da}`zEII zcoH~LdLZC07!#0L8&GsxoAS6UDQc&t`!d}ip+$X9)IW(2^gZgf1ub3aXo5PoK(Gqg zTTr4i7o*7W7h~+gxG}ewZ(Zu(xXZG>_so&{023E_nT}7*q!i}4(|{1S0`r7cHa0y*}E5!#qED)3;HGk1If8hApby<@b=v! zia~`H7Syd^UGC6F%Io_>-%k#fIpzf$y8Z7GjDP{eL4EI_8TOCFq2-P>OaP`Nul`?>{or5djH0d7k%|cmFlD15X1Gbl*rSKk|RB{y#qk z%?~dK{%EN4%T@jh36Uk)4_*{sW1)XejDPJb-bV0(vq^O&fBJ&w`$0hTlCs(SH@x@H z_cXu@6lGrA`_mVkIL!Pi`ZPo2AKfrP$P4_LM*s8$7g+$1lNA=s0C~o5uky@kcmbVw z?Qek5pWCPHRWE(p=a#1G2$b{R`|00*ye0!L;CJzV_J=PRdklcxiQYCGqVD<~G8X6$ zFEILXoc8Ze{KvP{ANIx_U;}jPi!gK%WpH$Kao>UIWIx}Z!oS~srjZOY7nIOv>dYm2 zPFNh@_%kA>$UFmx+-M09+QRy8y)wI02R|t_QpJBK?U9ilS-lW%N(5GoeRA>24Z5Y5FZ4<_&+nvS9ulcZf!K=rAe!3%k~9kM)cE#7 zC@G&SKr~V{piLrl+JH~sg#z6>z?Z^9d9;N;>5WzaGPeTX-Vg1z8KRn)>CQJ%DubvT zOE`R~7>*fhLIS4SE_~rL@6NK4WwZ}L=+v$vD{bH^4Az(vR-r%7Lwa*2r_V$xIe555 z2_8F-YBhE-`Nq9Kqh~6`NvMNhYBh-lj7wr+MVvrcLH}wg%_6=YjwM*&GzS`yev@BX$;ve&B7?@HP;_ zZl(KQ4d>MzaK*A_$Uowbf;_(wJe;%_DtvF`Dy{Mmz$*$7?*InO2kfKgt@mEtyB9$u z1*%?0M7)bAoUCtKkFk5w;E>``M-O{Fn?hAzo1pv&@0yd#>IlB-Z?G^xb%-}@l9ImVq zmJ#j{Mse_#bWrXYcA4R!4eV~-25|Rx%aRwG+Z$1U@5$v47$svLhOA;<?;NHpLmb!vsDC2-BzW-U}{|^3-i)LpWtw#0D1QU{%Hnc*t@K}aK4%vNP6BK z>+oNmKs?VCh0n?DxR^5?j6I6-Y{*G6jPvE}b)3GS9KZ|mQv>phmT%>O<>DpVv1=NA zQbb>2{X>(KUx>EQslnnRQ7SSU=A(I@M z?akNKHNdV;8#`2{Z9Srr%P?M?{FpRo2tpZqk$wvRe0Q36B*VwQu->}QOT zpE^Ew9d`phMkF1LjQJO5;xRcq+H*%)R!b62o-1m!W#v5K#PHc z#WJ~_hH%US(sFcUzJoj3D!mz}^#L4nF^E3P+AcKN2|x|qK?6?}Nl9JtnOawRM;OM{ zEkz(}b=)hy%qKqshLA$b-gcf~>vt)(i~txr-eLQ2;SV=Wiues6pT~9?zwnbJiN58$ zli+85h&0NaH6uBX3FaQY9j@mP^tk83M1D=?~3X%XEQH{EZfg%$~q25>0Zq;c!leIAss zZSDhZp$}Cve{?M$y(Kh$CYicbVNFn8G>W)}TnFIl2x0qaRpgX%`8Q|bAdI|B;|zv% zY4S4`=+N574g!~!sjA50@z}()gM$BT&Hvu`A8Rmk2?USouGvwg%nkzmG8idrWzK21 z2ri30;Nt@4d->f-OI$$edw@(ujzA-ycgOH5ffv*3MnOYE1vSgV)gT0%FYrzjOa0OU_t2 zR6aU`-*F;;x<)T}0ZGnrn!oSRKT9JuD1l4y;}{!&CL_=wV4X$uRLQ&4$fsESZp~#YU7(GaGfMi10CnS9cwmFa!-cV7OZ3Wu*IBD zS~4<*Zl@gGJ9f6327Z(GEKLb)dzPS7w50$AB*N+p&q76z3%hB*c#3%%2%-&Wb?sh) z_Nc!3v|2~=m>q19hKP#C9ipZpqOH`MEfBXM$WeU5%mCLHhJC~pQqu3&B8Hz z^GnOf1BX>c;F6>9Uzi2DfffP75bvA=h?X>AMN_B?0N-gI&^p6Vu27M-fhrOif0nQw zAS_{ta-@ijwVWt2B}eIFy$?NRjc-kQjzZ$nc=m~bYH$uboW=8lW&JR=a-^%AhS|+7 zSDC2eH17aF#dWs;Q2KrlcNxaHeSZQMuZKE#(#i`#FblT8g6J>2e=z?l^H8fE?7TRj z`4ZmTF1C)@)TxA{Z-t$|8+BJeVbhhZCyCUoRufGT33r@tx{=h&^GftTWtHCJePw@B ztav}d$(F%e<{+^Ul+zada8!{Y5Ux6>lGSa2eDk5ef#!(S5jCnQuzmMa^*0C`{K_Qy z_T!623HJ@n2!8CyE8++L+QZQIXq1_q;oWxCB>>j;@|KPr;ZjQvL5{FU=?d!+Ps_l3>{Cp18`)FHHvDi5VI=^Z*<&R4 z7@2rRQhD@1hhp>*acTYxvZEtLnR!3&mI1CCXP~Os)@wvAta^?5r6`y{K_2?+O0Q0 zc2bC#JRwby2M8*8QxDiwbCO<2^zCddDIsi^ZrCy$8av8wEVv8K*s{fC*(HnqqRh=x zuk*_gBGk0vsC+giA9zUN<`~wl-a7%=P4w8K?58!t?}Nj)4^E=gUwz@Rv$dXJ_O+cN za}^x(1DrFMekE@QcU~}_%`3{+-=dq}xVHugM13$4XZ3bB!eeX+99=+9gn!uv(`o5k zCfSGYt3?C}2{?1W5?B+5xNL+7aW{7J6werVbg=dZZk=oCc5lbu%BE|IXpKCD2>uTU z|15G!x48=xVH)2Pf%KdAEO&6uoN0Z>+2El}c?Uj#{2n7=pH>`B_I>N-S_MdUf z`}o{uxwm}eDc8Vfxt^QO{%|Byb;XP2sH=MmY=(#If~2c1gGlCU?H8p6YUQ97L=LMi z-%4=kwwZfyP_%yT?2RG%uS4`NBcfgVqlILcQ&2RQJmBP<0q+es)af&~sN=jPh(ZNJ z4=bAVe9O@4N;z^#KdIK4GxPeSBG$^(9$j4;{i8Zn^2X`{O~FRn}4wZLpszV3_E zxXntEZa?-)S1Y1$p=ue)Cq?OHdV9#tXTA$Du9?N+Rp-IBW-IAphl#$O;h80(JCZ(! zNB~wxZ+RODVW$1@2YsLqw09)K4e7bg6$%^H9$4Lb>+C7Jv^#TK+)k2;f}YFSAi3Ta z^W5aD8HMf1;f)M0oF$t%T{q?TMiD*v@Ues;;d$b}+Ew&-VgQCK%8YhFHjoePQYjgy zGy2IMm>D@Rss~W^orab>xu4FiR~r=$Rk)Uoa%}Ia>*J7U83v_o8gBI`{=g0QGwN}q zT~?^Ghl{W=(iM2V9Kpf?Mgf7+XAOi zsj#IHr~(Kep=+vaZaJwpn##eRhW$ho&u?;6`4iErJd9dv zHdqSh4=jA%lBW~GHnLk_B(E0W%pRd>F)qqw~*;LwfOeVIC!deTdD{qu~E^isFUO}Z~f zIaTixnQnhpRYB`jkL!1ssP?NXoIT;Q9P^-N@o(wi5o+XQ>w$ai2ngxIuA)+BLmmX0 zo<2q!L;=jJiF-|A-*NAOPX+mN_K)c1ZnZ88#%x9qIJ-o&Erw1cjqdeCMFmm%RGWxg z-eFeuLc=rG&3ll&o*87dF_$=N6cgWOqSkPBr^glq4fE!rk{EYV1IA&_U_A|a56ECk zFr)bqb#4^QPem&^b*nBNh-0~bHxT53P~BR%dT1)gmyV!s6~Qu88Lum zTtlNtHOvXZz@@dcaK)#5&W*GVNi2d>Y99GC5Jc6CpGFeDb;;JZdY!yzM@@zb>J%(* zS(~ZzjwDz_gB^0dT~@ax^?(UBYF|I!I_cd?j+|7p^>*r=itOH$roBR)n@bAS*LnPG z2wLjQ7=_Eqp)GCL&;n1At(9A0FAsXz?b9Wd!0V_L@vC%@?FW^TE0%XkHkR*x0y;Z# z_r#Pvx2M%v#+*%bo^SV?oG;CKE*bOq^YOcv9#Pe~{tH_GD%V>OyE}cgLe7KZ!7bK3 zGzUt7uaSB~9ozQ;wmEB~x-OpH1?{GVfVu9>((8TDayHLAAyA`2mi+g<~>3c`q-c;8eofIKt(% z&zT$WV{T_;wo-9#>d)`uw-&N@Fw?dlGJRXA#x|Uh_Jeoy>SZI{uGB~@LH(fc$({oa z&whZ1priVgo`<-_I{1$jSOt8VPrgT$E%xbTUMc0M&yKIkD^*j{5*l@*DDH^Cg{EaO z9G@nAU1a0*Y&^U5#wE#14{9DN@ws`F%l;*%gJ=z!g3eA|PLVg8KhM48eIU>tVs|2S zQ<2?OMdKxpF8&i`vSFT2q@>3)!2u+6kr2M#&VbMM%~oPCbHa~5_nLQ29BuzLL4Eww zaPhZcmYoWTVwXuLVW&$|o12TH7b}-*rh7e;leXDzdg`XpO2mCztv35ET1IkYeQT;1 zH{CRoaElYaD}QOTc2>(Uj4ceRl3cqG5|5sK2S_!=x*`{_KpD+)H{v3&XX>E#gHJ;Su>e*ThA}DZNzAVDj zanDO@bgSt`T*2qM#hT4ke6ddzz2czV!bAOzMX%(P;C@lI&d!@>W1SMYNDp1gJ#oUX zyYy7Np~s+xw8neX!o5U(s|^KedDAu%?hDZ#jg!URz8_5#>xwqq6V1{e?RGs`UF-6! zq}WhM+#!s0i(hx^_$szGfwkrV>2R zrsdZZ3V+x&j6!V8xQFtj45z53isLYiymg3YM8w9U@RDye?>D!g1~0Q$W#nhPFdbL8 zVY z>&j#-+IL89r>fQdoAhnxisx6syU2@Zs5?_IQcPc%M`T7%NGqE{s=oRXH*p^QJacBo z=|-I%dF9l;((2@Ub~WOmRmyEz3S=aBQH8n`JXyYV`Uy*X-*usAp;-Rd(ESNFYRJxz zhT1L_PMdPEy8`MNb%cuMvG_vy(m+7MaBEc}uhWQBL>-iT`CQ@&!~4juWVGZ;$fnn1 zK0Fr6*k*DwPD$Zx54mV%r^&7Bhcdt4Ky7&4yQny*z9sr~Dsj^MdEoS`lx$+i8qV~4D`(-(*uvoAVZP&Cnx7*!)Be0k* z{xc7PzR=?s`8lpY1fC72#G8I(0h?l68At3%ncY1Mf~NY2^`UHv07flym#gOKsAT39 z(ACTy3_~h?>*SPKlVP|Sx|OL9vcDan^>_s{L2|oC`J0yaUEM3M^-HWzP}?B&r9B8% z8KXHh0ja5~6ElB8r`UVgTokRLG7;!Of*_6w{-?W7&$dFo$EbB|$_-t_kE?WP@nM;m zLy!K=;c3}7)?KN6{`e3z(h1SUcxvK*m&H|H1g$Bjn{_@raJE$(u-+@mUWyR}^5n@i z#kz<1v^?t}oGaU8f}y)uwADeM6;}0AzpU>r+-X;yK1}>Y#J}sS47T6yL`HJ!0%V#G zz(zZ5Jep3O)mkZ6Flih_F0rZJwwUQ*Z!_*xMX*mq~I5Q;MzL3ZEO3%gNS7tAbX?g5fh- zOgtI|*rx?2>>7E!HRGygwQhyU5Q`F}Q_H7W8nLC^3N`~%L+-J*YB$gsxILnoyzzQU z_KH)-xENyFZ0VS4HT!M=Tw%?j7NYFLR;&QNwb0PfSIexNFgruQR|6 zaz?7Y|BDADlBW*HL1b>yN(#iJFbCrHV0nX%8UgirO_N!>QRhrKi~n!%Wuvse7|wQk z`)6;&MyC&_eVC>-ThkZmeAj3ALVO^g$Y<~u=Jju63fgZ(Nsdp-j3oX5u1QL9Adkl8 z9T+m>7&x9wKCPX3o`mR&$}F;C?IWCF>AYfyM9Tmm!t`e{Y7bF9kWpNBap*iPP?saw zGr)=Mp_PB$#WzH`4jw#wlm$|kIzZ9d2M~=lNZ17ISLYDxBf^SRjnn-Bs6sRAAr}l; zQC_nrfjn_wxL!S`xjix%1dUR-LZn~=Dp2V8d>5ij#DL~;;0&Bv$SMZ$TA6^l0@oE6 zF^#u|bzpuOfQ+xT?w~ms$N?pe?d3_pb7~R&)J*%EXEXD=(ZI+FV+k}-VvbqB$+U!a ze%oe~f29wy-F#3lmF+-ET8UcPtq%wdH|*wM(y~gI?WH-yDhx)kF;c^mMeT2H^aId7>ywAL0zslvhF(PMuH%9<-zbX^ z5DCe55(T4Q6N0p;BrgG|tyApVZ#nAomYfZtPz;!| zs(d)Ly0Jq2P$>XZ4uGoQSGANv`<0osYZhW8Phu~&MhMg`cjp+fiIMO8fywbD$>V)B zYx6F}(kQg7Eg!$`r}J*}gTfEviQCdSGQC9p1b^3upcOmiS5grhH5>`?0ZwqXq}7yw`l5-{di&g$ zR)Wq7BD0@=ljpl_2qU0PGu1EiQlL9%saRkV>Ae}kA$xmAu<=J)j%l0Q8^$gvzqA3~2bPHT$edUyl zt2w9Vmnb>?_vW0K1m;CV?F8%{Ib}LJ&*wd4e-|R4(L8>Jw_9oXj(tbDQRKA9>Rf*} z!VbkMyH6M;H~dj3QBItjl)R|ODqXXFgqEBpBjkZ+>T4;S$6}KQ^sPW!2z4l|sk&TI zXia~F>uUhFvcsTi%LoL3^a$~sLt^e$nprEPcD}rl}FfZ?XJjc;^y=W zT+}$SVPPfx6j&09VGPAmuyDC*6$TYIWVfE+4u0A8{`jB5K-nMuwE0|6KZH>b3bwy5 zJ{YJ7*@fF9x`@xBTQ%_t_N;2`6-{dbZ)^fQG@!5s2)!l3 z5%C8b{ZXcJ3JQu|{4UL2Mbl3|4xGqvlg4>G;SR8l?S_^VSq33UfO903u;(`9gueHv zevVIzmD6Q^dtxXg7ZkL~?+ zS!Z`9MySojR@E#FaPlgpw(=B^?WqEchl4e382K;nkb906^=ZI3?yU^UFV&=m?eUG5 zgV*0B-+vJs%o?b?FPt_&Y-~&!RvczGChpCt;Wjqn{*BNl@Rm0YPoyW;?f>M@8V`B6 zCIWlJp+Ar;%C@NeY~ce|jgf!)2o7ox+3?G;J^c6n`@b7{hz`&&td9M8z?S^)zx<8L z6F?}G;vYozr*B}@fNN|REW-UUdXyi&f+Gnyoj-g-!%^l}I=00;7XOrp{e8`^|Np)X zA5`PbI+7i_FeXPk-k9Y6V~2srOMsL9ioCo_3|7@5P=2?FPj_WTg~%bSF0%B({$_KY zu>zcUHJMiZMO`KLMtL^YML@8TG~)?JN??FobvIdc1CtV_HCDWM^4OFDLV`i5QUu<# zPOw!N!=IlD9v=)kGzlL)b=ZqK4Sh@+)FBd6DZhNecZ&?j=`BI&w02z_E4@mmYJkw-uyJ0)qQRq+$z_ir|*2g(t2J8J;7Q~z&Y}s>8GS#f_Z1i0p zaX6P_1yV@*pn8&u%WXrK|JR!Gv-4qYCWgj??o|UQdq3`n=T#FLt=wh>q=EG*M6HjI zaJpzmLI_6!3JDCEh5QHM&NSVUy9teZ)Z1E$0lBP0CE1jY9}B-cTQ-9kXA5D(HJKD5 zzCtj7AO~RZieW}Moe(&#YzO-8V$ekN0HZZQz)n`!Xca-g1;T4u)d!wRX!>OM{9^5N zXSU7WeLtu&Z$OxgX{?7^Y{%)lutk(vI>cNpfm7TQTa4i7r+g;ytbCyRdO%_{>rKj( z^Mv{=9{uy!4Ja2@+N7vTTm9J*sY#GpQ;aAV9D!wv@YGEBDt-h}TtSY0PjLPjf$HEr zRmbGoga@CF*!6^4$NwH3*J98S>DbJXmC#I`%a1f5YKMcN9xKbM-CK&BwdUgKK2datjPnS;9+O>q}OkhT`pCg99V)Mtmh_lQl>3 zj)68Vy-jW@8SME^3ZuW~VEcC#z@q3n`tSaE48^%{tC=8@s&8>DGq>~pZc%{6{nI&G zMFtoyy(Aws3-yk!Ry~spPtBK?*i?RwLiSwZ-K!Y6SpVT-YFO)k zkFH{_uh@|Wq7unorpkZl$u(SRR32cOP7c~znKqPb&cL;)`G)mfKj;y4ricesm3kXE z4dZd9l2oB$^^N^w=JT9uDUs!z8u7_dd%@z67aqEt&QQVOh4U&Re&Leo(nPMP7ZVq- z0W-49-vU*6v_xIORLccr*YO_Cy81*;erETi>sO+)(l3h^X~PPP$by0&`&>IhLPPRH z^}UzR;`M<# zu57b%Giok&{;87Ms-5KWC+!d)qCFOCwKC3@R?vhQ7aKJM89(!h*?A&qx$nHGjn18i zy|6$Ura+Ux3q`F~VLG>F4zq)fBMp%>`mizy{2#N){t&C%HyvC&Jr(sL@=3nmTeu>^+iXOD;upEU1-KjbJKMo5-1An$^p|6@p` z?cNK9h^M7~UQL6;_fJ-@Gb1@jI?m&vwcYpHoSy?BcwgrH7U0GNyB8O%Ata=BH`Ctr zvIk71MCC9QaefBJeK2?g8mxxOFNSeHBk7agzHk8uf3x=2PcFW&4Slo&q^zhE@BKrb zWzc)y;_qb+iS&fDm@v$zz)z+kG!2IukRtRZYYJPqZd$y&toMOam)f-t9Xa9x@lxK? zj#1farAvd${Z_3r>VMq<+Mmk%Uh^$T zcz~(#GX>_5+{h1!UB(1HNn23=U^mf_Z3ycDwmz0Q*>Z{IE$n&U-UHx%&yZ|Z}$z8H;eSsGh?SOrd%8y@u@!GF%14( z!;R0NY-30dYBiIfSNhLmPCTriEADn?;YLO~F49uB5YxriCq|g0pEGZq#&BU9Bfr_x zz9q2~Zs`%j_Jyy}J}d_u#zN=&m!a(V+io$G-Qsfxac>>l#%)yJ}{$P_mlvO%GKdMc8vJ7@yAvp_A z`VH}~`v(AWzEZYW3R@rb_m|Nf1IKA50r|eC;-eMw=qM=_Q&7j4VC;X!*4QEMkGUv$5`dBL#j!oy6POIJ7raWojg zb3_bpLgG(EeN$n|0aXKl%!6b;= zKVSJsrP_I!UUF@}Mlx%9*~9f5#H5UNfwJ?JpLtX@-n09I1V;s0Y}Y*2)h2<~Yg6sm zF{t7A=Q-p;2_YcNx*-9znB|vum1v17aB$bI^4xK{Yj8@a8;j!QK?+f++cw{~&PYPb ze4aI5A^fo8rv@ZqeV&Io3+l}q_v!6_AO!WXVX46b_qMk^ce!>qx~nYh29x&?B^xg` zq(S(Tu(s;HV{4j*`<{wv0d(_%VOijrNZ*HbOU-@aYnn?b0U0zRJPeA(Fk}BUy8}-U z(?!r3FP2_4P;hro6IK zH~X}7h$9RmpG@`Y_^T8qc~25eF9v?vmqZKXkM*2(9chh02AscYy`)rg7X4|F$v}6e zTI91jjRdg}YyQcTE|UqzhrQao9S=U_@A~{x7(S4nTe-g$e;!Ia!3AaE6WaAg7Y-al z`>`Kn3C6hkuJY>;AF1UjiwYa`d&MR`aC8KJ=uR6%p*2l8E#E0RwHyQB44Gb%4CUz) z2|GB9&)vP>89YcO9#a{uf^dkhiBtCfq3o^0s$93WZ$Jd3OHyFcof0D5jdTb|qjX70 zNw;)&3W!K|ccVy3w{&;C*Iaw;wbp*V{T$!>{>4G)yyv{*8e?4N`8&UN-cX&PQ}C}= zBOU~Sp0Jkw2H5liw4>}e3mdVpyBS-%Awo9xn^KD*pP8hB17ZY_7H?H-OCvkpnXhla zVVePwB9H0_x%w-Bl>T{jF4>z?$@A_zM2GC-rB7XX&+td$LHC9CXlj8TLA%wG$?j*{ zrUhNA56Q%*d98!*S7ge67NiRTzkJ@6JorJ zm;5mGp{DL5^DDhGuc#U~@tI%eiC3Tm?=wQHtv_$)GFHq>Ig_nOlzbzG1r;Q8U9I{Vik>Aw4CfOPA-}H)$|Cu1SL;Oo_CdFOx=-Yfz z3+L8^yw_Qune>W9&_tXVs|7V*r880H0GEJw@1%uGG3^)cJJnD#+<0dfQtDZ5tLL&? z3WGe-?gxCmgHzw=zLr|kM$N`%kkOufvZf=XR=2leA9|$|EWMyT0+fKy#iE(T)40rd z7l5As800?_V8?5JuiL2%Bu4EJ6Bb*;CY^G3hJ7c_3?{c_?ZbFHUQt~`pZr~W`f|M= zG>2PmFv?L@_*{kwFK*ir$*d+je0LSc?VMog!->@6Dekt9UgU(DcJ3fd6G6|SRV}~jUEFJV|Rsbo?&^h+6>qt zv6eix`cQ~mPe@}SFHrk)kx;@|JmLrAKtEb?>N6fbD^PMc;GIW%Dpcr3(U&SW7DzP}6h?nRNU)>uJU-V{D zl`=7!T8IhM`;5|8AD)`*&Ql6dpQ69={iD$?R1F{a=I(J zY$y$J7RmB!cg)&Vn1nnh4iN3y?N=j@rh*A=E#O1 zU@D=~IR9`oi=gC3ttdPf9!Cs8cC#(-SWj2??0i@&5bzy{RqSOl%#==!d6Fvq!TmWZ z6^D^3YH2y-%I{N<@$2Y*`3Ez2B)|6($+v(slzaeI^io0*M^+QO-j1*EN7!5)z!%1i z$1f{GfU>qbl)}Y$9qF|lq^yNv5tm=sMf z#kw8h89mBD_LxEsfGL01EK;GS{4FQpId@8rzw`NY>m&|qyPan_fpe(YK+PR;>Ni*} z&SBoDi}8G1RoQDyT^0WPF`T&sjTCH~S~yV0&V;wk*f~=n;$w$EjRz2bw3DNA{f_h` z_v57QgJ9I0yuQomtX+teG7k<}K)EGd#(WW>&9%*amxv+MU$k18Oiw4D+jj6=b!X!5 zPxa4k7WzfS4@P$NHOcls%A#f}yz;FmoVWMG?>1D3UzjOgB1x)C^MR=bJu~BH)IN<&DcKKR8DV&{>t^iEN-krhuw9mPQCO7`r_zcGN7}ScTG~fTWqGmnPa)Y2pn=bo(k zVwiFLdyMdp&mY%kjvNLy9Iifav$k%}nnb|6>!v^4rxEXw4!dcxnqXeBy&mk3xvU)@ zhpq=FZWem0(x5lvV~%0UvfE^f-yP+YmG9G9Ix<476JNciIQwKtx4x|wrE+5`n@g%- z(!Dk>YpSdF%+83iXzabW_HKP$+vJGeYVR7(8~MdFszy@U^Q$2zYMaZKnR^!`-Nk-vANHNVb6_IiLHv zRVPoDN)5t;583M0&(Eo1b`+&IPexWXXKXJ?6(=DL_KQz6+Y!CHrojfGt*#SQ>Klr) z_OJdrWM*-+Ms+7`cfhKtXA0~XJzT*;Z<@|OMhSxSs$nKaVrVB()%c`ZipfrJ+DBTV zju&hNiEaudHQ;koB(o&fm~b$L7$xv?>mMJIeKB9rCm<7LYU+(h=|08gvRRQ2X=@kw z$x~;Zi{1hzeg3HgfnmrV#lk8#GN!ow;l5az^cY>?NSp$#0ndKcrdHKeMBK)oCEgw$5@_>yf0c;~I4p2$3DsG&^MC)a zWX1bCT+C#I_PNA8E$%{%*B2}8W(h^tJvE>2=||_d+5$JD0?6WN)T?;1o4t{-tNOeW z75p)-V<@Xe%L$SC6x0I=Q9^G^qO3E*P?#(S(XWp5jq~?^psmGSjKonsxK%L`@#%BB zGi(Y8oSh7b{LJfq`ZeG3K{Y~ulSHS&XlqOHUKl13Id}pLd*6!N)kPe#>LM7TWWp@A z1$oxNPlot@gOZVr!`j;ML882;+42Dx=n`RqaX3B{Z-dpBoo4FuqVWY8>=LmOhs(|+ z^LV5kK}r|!y~ynv*C#+7pe15Y@mP0VbI#h|vipkY8kZ&M^F!0T)`2{L$HKbHbBTcS zC*K?QB_sHUz7jVHKH+<@0v)-$TW%|_nU#u~vMaO#YXZ!A;Nd-%d!ApCX1=rGtIX{| zql0pyYCO8>JlHKyYXBb_a0oFZ531jywg@j<7Jan%QeG&TYe%y(DUjjLyXBUaB_c}eOOML-_@7R(aZC_Uy1Gd^t7i_tv~g{FsfhZS!= z1(Q8QBohq_8orv6xV_YbHHfoaoKZf5Tt9Mz>1u^u%Zi`amhykxzCLQ?QPsS1q45kG z0#kd=ZwcbgH=6_DNk*J^BU#^~ko7HC6FWDOmHC*Vr!Z6s-AXu{?Qxn^3EU5-Q|^|M zXVNww#>#tLz0Ktq`IVA>Ieh5B{?>Xh#*MNvEKp-um!7ipN;fo--cZ-PZl6U9)$Tk> zKTrAxMNGAToY&d;Dq+&ch}Rz=ABmCb5OlqBPy<;B`CWt6iYjz?hjQM71|td#Kio~; z(c79`h<92~6U_m^$}u?CgKW0SE!d>WU>}&X{&FSE*rSu!w#m>HS^Di|bc^Q=k^UgJ zU=uvb?`TDkhgX?z-MKq#-E=LSP}xSh&!rWKT)GKV3ZIxL_FpWiCXtK`+4o&SYKudv zdI7x1!0Zzxoxd*7peS)?P96=AX@BPw(4NL^Kn{xBR5jV?$d%5{l*D#R<&6KoVEL`j@* z84gb)N-_IlKcR>YY{7F2M$fG_d_&4Zfn$2nzunVSy>eo2v=s+85`xSNQMsQJlq9@_cSXoq+{j1rS!8=N4FciP8hmM^$3zfGObU8m zmy2R6uOh5_2`{Gcx!BB=DZ`#i@`;Q4Wj?Ed0#rEX|KL^ zg}Jr|>?@9~2183y?cT!V)+RO=(ae|WJA>KywJ}Z^I$;N71kFL-COo5a`-!C*lA%xg zqavOk?_Y4%nlgnSDGxd@mruV*YNsuuiEX@0CFUkwY?HUnXzj>&xJ+Na;+T2MS&LdU z7pX~hNpk3L$J#rX>HahgO?Hcy6a}w3Gq4lg2)a}uRzL#Yvi4& zbqPNu%@2=*=luc1My^ZMx4(qp7guGSQ#@)*WLPCnmw=Y#Gi_A#H$Wtn!b$J37A-|I zm-IMJGvQ}N{d#*k9xK;Tp9#Weof@m;Dpqc3r-HH^fBJWU9YIcPfoh4Nn=VS4drKfGa(LOk{A!9`6YULIZb zBDqTCu-Jkqok9dry>#?E%jTEb1~6_McdjYo@xcH&yH{Hq~mDURM}y zX=&fC?n$ca)m$s99DS^8qW@%>XXKYrVc5@8F(ijGrfa=OaR_#7iN@#)ahY7<+Kupe zkAi%%R`}r=23|`q0Qp-g29;e>c1%DVz^@jAbX__;RF!x{Lpg)W*W4@#NnVf3(XMX6 z9(KyHFkYn**AaOuVV&uZDCm3|b2jb{SgN^8lZ*&|E0$um<*QQ!Rs__@JO6?jA@f^m z5yV8OV%z<^h{S1Vea$NjwVt_^ejcy3hy?)(v@=vIfft)tJfftJ4Sum?nlS4PWLnib zzFssrk-0c@&%INUD5n_$d&oc0Kb4^InaoyvUGQ~TH@)+1QeIvurm(c!fBW1@yIQ+O zdjDt06p{ffjX`b!*d^f=TgnMz;TRp|U)13#?iUB0_o}w=2?<1)t~0 zUv8?TD^PQQ)yHFU+!7=!2W{Q2NPJ?}iCymxo$CkaU+3gV{dP`#yr{o})sulyPeDlf zB1P}5{0&BbIPDNKOkLVo6;t#z;=pc9RXkow{z zc!h)Al|wcz5tY@m#5WNU{-{YA|3mI2U%q0HmrBv{qUo9KflS~gW&P0op2_kBkE@$i zECp$~Zi$*Y&NuOT1eKe3^X3NEEj9s*rH6qr^srJQj#X=99uG!P%gRzX$wuEBAX9xO?L}Hhk0|TV5lyqb2gt=rK$}1bbQHNB0Poy$hKR9I z2ze)_F|x7x3ySe1T_B`_EI^SZY9)rI_Nx`IJnTfvM^@VDCg~YjCFsWO^8>QKBRIZ^ z(pvSo7Z)m+8((G|rKOZ?R;qtq_oG08IJalxnJ1E;0h~|)@`@nbV+@@zR)Qbj2-FPA z$q!)?!S*;|lLEzv(26o?fd+-7!7%b{VkcYHBK-0I1oiGyOXpcYs~BElb~?S#Rn~Uz z8IiTT&U8R*dn7D5o>`!<9}FU~_XGL>W$`GVI@grI6vDK<0$Xf5y5tX@`UAW9Y$M|W z_)BF{J5W;{lCs*Be0W9dqusRAkw95^n-*q)c>UyRT9DuC)-_evig?1-_Zq?9Td6@; z)qoSgd$fL?nqG7LY_s;{C43*5Wy|&$pNr7KuJr8yf+WEdY?!7`k)^UV@RAOweO4q{ zjR|mXcDid5CcO|fOS7}KXL*;xbs>m={iOG-qgp1D4+JQ{w|<^vsat|`REYM<@#HVr z#k(-Md1~4&fO00IIm_wZ3^t|NUp-lW|FyAI7qefYxfQL_ zQPL@`+Nt_;Ov{w2GT9z;n#!F2e1r3+lJtvYZFswhG#`~oly9Jn(u!~e6XK0duN;Kz z5Q=dxB^aH~%?IG%hF|Hn#Vo9Q{t?qIB8aU|id3>Q+yUVVv1c7?jmG407bSdf`kQbe zRV3r_9w<&o`y(F51Q8Xdxu}LPjPR*8b{t!j>az=C!6@=vJCVo9?c?Tsgs2=?XT<*K z{?AmmzABRrz}1H}q;j@({Q?;ewz}3$U>_y0OH3W@%~IGlY|K6&{jl2K9Il5v-@vQdl9vVV_aQB;7d(XIh;nkjG>rM?hr&OMpbDVb}{P zhj_7V!(~4XK^Bbi^ng? zw=aT-5j`w)Jje&z64lq*vH07l<>wz=Jq+_K5*$Zz)PM|?ICf+|Hr*}OC8Ug~Zv-t= z%;@8(=ZLJqb3jYOGFiHW^yHNe7MaJKmo@gu{o{44$xZ8LHA|1mhXr7El@uMVryWlf zjw!wOnG@f9C?apL$C1j+J-CcGhkrg)qx0Qqxd(+oXtVQ|&kfwgBaOPZ-;s5HvwBrt z1)ULZZ=Oe#y|2G;XvV=a9(3tbuWd2BK-zD`Je`|jFAciR*j_w`?bCl~PPH?%es{xP zH)ew>$o|}aH4ds-xZ*0R}d)` zFF<=2TJbTn@f@vpY?xdAsPE^nx6FEhdee69OmyheZhVME_yFl_h0)NglyX4=fJNdCVFe+*O>Ykd^7;*DOKbYkaqGztt!+- z{Yl8Ol8dOt>xpNZ5aX|7KLbs}RB$RV{0PXI)qLcsd({#AQ_glGD{wmC{K9=;s*Aa{ zcVcFvRHB;V$mRe>&^j&&b79R`#sRk^s#Zjjlur8Vho}azbCH)uDuHr~T$AJayh)V~2_jZ1Kz~YlY7QU4q<>x7PvP#4%4}&^kaQ5_9Mx zq!q$G^UF0PAwckv%quc-&%R}GG^4?DnLakPoSM?A^lqYcmOc2bPejjDMrz2rh&CY^ zN+&8VV->29`W7bA{pyVbththH!s;sOi(Izgj)|2g1tOB3RyV3GdZ!m}Xi&q#i|= zlrlT`<@KmTB}-X$kSr62<#%tt=2IxqVHv;*dXUqu$hqDGK3192$s6TraZG!E{z9|i zdOs`UCNvzo@y+iSTAY;6oo!|%S?9+JlWCHSCHP&4-oUI*QsB`Uyd_?zS9QN29F=k` zkBK+EDd_e^L9IR&HvsdyKrJLj5)+th_3gUq6W8L(iO0md*RcG_w)R@z>>& zt02)37jj*DosX$#eIjF0=?|+88zpbl`lkw9EPk#;NPvaCx%qKMsdYmC@F;YQWj?rc*%SZFp5+L}UD^dX;~ zX@_?)@M%k1CB+=|-Ns}6BtSNajv4Qkd*lO{=E`D~5q2215mpI~FG`sb5rf%i! zD#xRI_5o9*jn?~tivHxi9@9mc=3K;q0m2NpamQNa{G;^>x3enYM1mQL?Zju4OAMdf z%4`Gprex6DBm#pV6IJv+U`BtD|<5j>N!%(xsKldl?!Zg9$o)eFt-^E1D zLxJT7y13`V>6dw%`cIx!V4e|6@$r~nbZhcL z)W_{J99i6CiR3$bA;d*ufH`*#d8+)aVGDENA>Ejui|WOyECpd01M${J6q8!&k-kME zl>Xg>AkRcs#e!$tOx7}sT?Z^C8bA>qah&XhW7;1V(9(gZ&T9>%l~ROz^!5G36=Ou6 zKe@d1n9=?1M`sSw-hPMt*0r&B7&HYBtUmfuZtI5xuR%oAJubhBrxYw15@&8aESDly zD{x)5&TL)Y{3I=o`bIGR*8C{j#5S!Ct5W%cXQOwxDhZrd`?ZDc@E6Qxfp)h3(t*M~ zTxb_6eLAfY)#S+b7hY1j`);Rb*M;ur9htDfSua%!paCRv-bp?k2CFcm3g~A`8RB}% z%CoQ?;y12pGmbyM)lJUd1d%R@I!lA^&`~IacXu)U@GtYB51&xb&TL$<7t+kFL$fHx!c;`>1no_ftpV=q8 za#JC4z8hN8F89LFW^=CPh-I^I3xZi=SIxM}i;=FOmK5oUb4P;zf^IVK9EC%h!Y9!g z@4$%sdGMM^M1@XfTUYlt#x9&d2BPC64g*Y#y5=%uK4~O8`wA0sS)KE*jR+kG3eiDy zAN_%zugw0ALCm>An$Rh*N^<>e#Bmlwl~YjD`v`|}J0b*@OWc5Ik#sZ>6VL0Fx6{{L zPOFu#?hOp?P;!+di%3SnI(QkE=nOBV%Nd1_uw}A?x+R~!cj|-#@$sh9A)ox3^VEr} zlmimjhZqx5b%#)|Ks<9F|4y5L?PFF!1Vn_q#Ok_ANLC7UqTK1h=%k?Z6lb;t50QA` zoaY*cit#R*zcAHW3`w)E22wG)AvUjMM`Ni>AikXP^HwW7QX8qxROIPS?;vw4bg?_= z$$t2``TK%kiuwNYURD$__qU6?^~ixp=eEomX4*SKvH=lEA(U+%5yMr0dM3Tlsu7X& zMr)L^!;pMBe5Nof9PYrH%lf>MvajP;l=JEv^BsxIx7qTCkWcQcYbl}udzz;X3M;Is z+=%YM@7>c?@ZuJ9d+UbE66DD+7#kHBX?47acLg#o=faNGBkjp*y|(KYwY_&X^6pik zE7u>pWIk15!twp)Q&g`Z&{pOt+;hQqL@DEAPc9DZZGH|sIo|n967jH{&gc9pmIl&w zFckS(?#Ify$>#YP*jo@@NMrpJ8@{qmJ(xBt!7(cw);muw=XEcXE4;A9-0(Ic;KGa8 zImV?aNeXrE@R-0@@cg!T#`Z~OPPgn^RVpzPVvZYcSnauksaMW8j@&Ia7lLpApe?^NGhEWhV2o-=b&Q_YgV&2|Y0fQ6kbLGlKXU z$e=I@X2G>?>J#(O_AV||AwPw@PQg_&W}{t=q$;7rw+ky z1|H3XyCcX+i)lHKPNA)}iw1Pn23lfzyc8P{sauNayvFabMr0wRqwMV%|Ad){d7h}} zoJ?*+-wDDxt?;sET}}PsK9|%Hebj|}B+ZQKT^&`q6zqeDyl8?t7J6O>0MrUM(FiRi zNAwDOG5Ec=k~(9GfEt<#2%oge6(QF#FXjXGzIc8b-gLH85ko}bW;A=vqQh7?H z=xF^pHV>CA(HxMg`AtaI3lxfodz?C8U{OMSXgy;+I@O{K!G%&BNRr{UsSCWMRFMSM{~YB2?k50ciBpcWHj=I?jyPbb0N9$UuAC1XNf`# zLce@!aw84#QnOwd-q94=cF85qkm}-~Zamg*a)L|dr z1R5Y%lzl9wgg*QIS^qQQHSVbw7tgN=_9A9cENe>jJ&pndO!jdMFVMYj0^$XbF0>Hr zo!dsQS!wCQIys`+?lM;O8s4*qIw2s>^gi%knEzw@R$2%?)O8?xw?yhXURf>=ES!%~ zfdo;9qVS`-!k1j6H6mCf@^Ldv_#)Tn8Y-N;vPB4EABxi#>d<+%w4f#DyB3FxRZL&Q zz)^()SwzBh@^6r8E~g;eqc4Y6FO}_;uOCnR=y9r5UU%%QzY#I@)2au8$@S$RGH!8> zB=}`%T{e`Ln;Tfw2fb-c74>e+Wc?U~{THIjDXAw_ml z3%bPpJZE_Kayv~2M5gw3kw(@=bU=_bg%XaAMXXVHM?b;`#^A!m_{6fEY+_Ap&U40W zJ=ip<9o!6(#S0qdmR^Y8L_hT>xO#tc<7QC+Jz|M1HAE$pvJRJOC9#x}$|2(FaoF>c3#r&_k z9qhv;A_?DWyI|ML4~R!nP(p<6wvBjRjogHoj7VYir(saW@ZzhJZtr7f`nxbQ*yM7EJnH!kngbUxaQmT$PPJ9k$jt2M-)m#j8aYtC7zce@!3J@|lN8;DM_jS#jcwZ3H=DL#lP z$8Qw01A9SShh2`s(lBK=?Ij-bLH6$*<#Q~!V)+HBHp?0y!;}P%xq$k!>itmRc2Stc z?}dFRjmwSrDvZG&C5ys|INllLWUU4sye<)mxF@P@3|W#5jrtsgQ$L@3rV)_P;AUXrUMM5xc*!K?GMh)Phg8UN4Fd zd+jAkX%Tg#u;D&We{MCOA+j+mv)yr^X@6fC981?N*v!J4hX zO#b8J{O^b069+v2AV%%_@4x@Y0n;S`zlJW`th#0XKR>AdcHF|?bvbzR?JSJ`^DX?B z?{MYe# z CF$OgM&Re%2f|M-H;00#u0Ibc5BA@yHH z4*&RD@S2cjOld}rtD0FG9KjH+hL{nce2D^R15=#)6kcOgT>cPGe%|7q}#l1^-64ba=urLNZX8s)jBUnBL z{Aya$z{JoG0(t0xN%Iql+}Q{qFIsHB0r6@AF{B^^g0~hVx3uTCb%o%phtOtVO50e1 z7@@@Tz1i7s4S%%4|M`~w`JIobukf#!e!56MB;!`baPjyd8}|&a>#Yg^0|23gs(I4U z6#GTZ4c-fGoBd#-^lZ@a`gCgym@%(_q{X;brCznk)qZQH@-umOQRCvqXeMja@WdC* zM370=2jt*P0C*4saFVRA8ub-zil$<9Crb$BbtF}6}zd&t-`=}QD@ za&kb|zzmoS!v&}`mwp(6bkHe$*6R#*<3L~#LOiZ*y<<*L0d9P-|4155uUgOtBzJ27 zARG%kti2<%B;(ye54V-%W_#t>)Utg*mCpzQwK=y)fwXE2G~PI+AVEY0_}nY?vqc}{ zJkCjNh$P|DFdIpu2R6_|*?5MTwLIYT;{Z75WFFfLR5C$b07oX~!>3zsAKhF zreg-$Y(q>(G6Bf_7-;%wCm4W(wG8}6nm|iR33XtS<}t9al{RcM%LFQU%a5(E=lokzMUDr+lwtlx3>_rX$I^;S)fCi^SMablSQ;?HP|IH1!D| zv&n85b%)9TwO#(ZA9};0di0%ug#tWM&dF*eTFT%eAc-Z-I|hb6)b=^h%f*g>9CAj? zCPM_D+j6Nf3}SA1C~bDU*U)g!{&=R`IEZ{6>MzPD(7fG@=z$lS8!M7pU}%qIqxxc57gvVRkD?uF zM4(e{6u=35)O8)g(L=o%&tcQN$vjV^vxs^m;N5anHnb?9NORyy$w$++?1To?9eZaG zxz0B@ae&dlH{Di3QIe@=(7b@7M`$vnCQ%1nK~_Cv$C0E6iMkFlHCc5VbwENE;T}0t zx!E_R?`=Ik2K;DFHFoDHvcCbAagr_%IP+BKwJY-aJ2R%B-lYF|LH~1gz?}0C&}b_M@R<()Ux2L#v)C?pn0*fX zj)pwi@c}F7)MeI-iwKDzlef;NpTzdt7Or5+-n}`!YijV&Y5uJ05F>L{N+eEG4I>`>qRQII!kQMRHrLm%V^S#@ajigrS_@ncY#|M6Mz{qNoR!q;gfvKO~m$ zFA$X#R%KU#(VZ>qpcz~c`Dh#yOG29u3CPE|v6$c;Qd*FNTN#xw;yBx|C z5^`p-3UUxFTQyn*TN|9Eyd53e_W%2u_{ZAX*NU`8%rVnYQ#NZt{?_p2Ev4A+9R8Y2 zpwH+B1TdP<>CUg`sO-soGFK@-z38~qwwbFdzg;DJ2oCS1@Gf++f3)mk&u%%%6srMD zcgA7+GhL7v<@<_DQWjRgm=kgX;2eG)T$esT0ba|CRm5I_#tn$KJ3S7AzXuyj+#%EF zK=g=<`-3jux_QP|0*sH0!%%uCsOtO{O@mE>ErM+ZU=l3;_jB;a8BsdK?87(VC=;Vt z732ME;!tZtOGM2Y)_;ZOJye*0!w!6pdXm*QlRCD^uhjTGn8emZMa6U*1B(fKiFmNU za^>(U*%Oag{OTt2;a2K!b*|+OOOh~RHUgSuQ5Yi?PCyO-0*y->IW7kafo$ir;`5N| zJ1FX8^*k4_lI~TXc!Hp#)QF1I+atz^$hSzg|LC9oS@it14%4|J;U3`y&F*2kl1*W# z;J5bl*OhRkqp!MsLgOt}H?`Yx~E`s_2c-Z(VCL046TkOz~ zyf&E9ks8{?`dIJ|eLEt>I$nbDDzvX}2Q`i~ybmDj_W>Xf_LE{mTp!U8nCB{B?Xtvs z!5jzkE+2>h|2mYvj?WU34iC~Rpxk`oR0LAD9?UD#YY@vrTX12|Akl9lSDx4>Y_#)yQn_79c`q=$(hT!ocOZCV<1w0Ja+~I*=Fh#(l^?9S;488|; zdUuQt1S-s&qhxn2`d=19AZjzumsiABQc?c|WX~c{l!OP_Xetk@dcVcXP~jFS&^KJQ zJVCK~4DHiZc@W^ficpZb%-L`3fQJ+_fZ%2cO0bVaMP7ae1v#!fTD6f-I64ARdwWLlg>}jKGl8!;}{~>M=(GyyEm4x!{-v zzypv5u*bRfQ=lfba@jRdvJC(tJ0XhPFH|LT0ybe9g%m2F_fb~i3;eYIpHGcHzuRSo z58eA3&Pn#~qlE}j*nw1o-PXA?yr#z)MQ|$()>p+Tf7URL|62GAKDDnb*KwPN5 z409)l^!E+*IeoYy_bD0HuJAnLo~ba1HL;$K zCJQYwRr^Gh$f8osggf0r!g`g0ASJwnKY=Yj~<|*$0j~N9ik`3kAsAL1(cKt zZ!_F_x(shFS|9WvA&E@dMjp>AF{!6Jq`unpy8;1sr_aJtAA~+G-j}) zh%WLgfJ{v8f@rTXD{cNWTko6HXTa}P0kVk+a|ksF47uIUcc+U$$slKDR-pFvwK^V? z7A9XQ-``KIsORtl+~4T~4gL0XP}xQZmtPGr?* z0a?8$!yt)>(HY&e0(v|CdClb^z(Q^CJNPAX3cgn_p0a&ZLcmd{+guGC&tZhSfq`8} z&tbx{S5)&5NEC$H`30@Bsg?ga*-!(z1IR8*!p_3%seT-4;z4#IlMhx{S&*S_bW3YH zUDi%D-{j^eAg zFr~0=E}<1*briw;!%}|wmFuB4dlGstV~b~js@ehA_n}KXD=>+NcKdcg2?EMN_dtBT z!XOOP&U8Q%77L2@e(<280!c2Px{W`Bimhi}xW;cG5i}^Y>t0#nA=o|oELUI`EMWRJ zF!X665>rngUjPVq4`Wcfqs!lLkEW2o{=>>_XD^Zpa&{G=Wj7d(MT4qXLo$*y{U;UI zU*iVwq=SLQo9&c3ZmD7zF7BUHPc^;EY0eEV{ZU#|l2NTvQ}RUL1y;VzY&E=DwZJpP zTcocM_*_A|X9>L_kVT#3NkCLQ?#Y`|fb_&qdiiubRIB#mX(wH(kar7aO%`1oQ2nxi zm=PZJeE`#8YpgCyis=u?&#%q^*B1oj=thH0?mj5*|2y65s=}tzF*#^4H0b|iH6N4} z$H3PF?Ig|dxcrn%3^~0{A(;35<@eJyDK#_b5 zXg+3qr04m7iPO^w z;sHV%Xre*&EAWcU!RzE&87xw301=ZA9*+9+00D!PLcLgRM30)Y=I`s0DnR(?PBy9g zd`04=`O+3Jv)bcW?WBA?-W+;*eDhu3bp3SwD5pojyRUxOWNS3c^e#$&B~U@)<0KC$ zxsbqBx)ouj9@TO|v-O@vzm?JtfxH{@aHv&$eIwO&M3p0%OAxSoSfCXeigZwW`)&)! z=#(-WNuk~ZWNgxQbda=gbsNT-C%-i&TCe?l@R5_4RRe)4x^?7$5MFs4Ko$hOa~-3QNrvya$EPIw)%5pe=Br7q}N0FDtt3 zL=J&luwUrm>eUv=Y2=`W#O z1T!?b8zgl}L7MeJR?*e=m*tG&Yr&a%$Is-DJy9F`=igzTkhhI93G#AXhgyOGebZsN zhSGr2M5ZC-Py+5BArKzQLfIGzu!IEHkkTwjZ8 zbJ%3Z6}?g1b}nMwJJ@1cT;!3}aAR2XHDl^Si4oe4&R*|ZF!wGAET0dC#ISt*Lmn1hG z#JJE#YZ+~tRbHctO}4W88lq5X2Ex0OPDna?%)fNPuR%O%vY{aGq99ttq%EVZO1q;`%jpFsaYu9xxR z&G!3JR=t}{1}Py=-*Ns5MWzhz&9?oB^`sVB*(uFNxf90Ma?#-wDxA56&raldiWX`# zm|RyKQ)Iqax}cdQ7HMewTAYwkymfiSyRyKdQloLD_2QVic~yUlrJCY6LgIURm3S6N zlZRSTlP^%sR6$n9S9eUObUv4a2iGvZS7|SlGe8Gyb-*0;Ay_LZB1-Gd-+eqg^26K89z(Nb&nJ{2XN!+9O1Nw(d;t7r)-47LMpuqtx}VcHzF1SG-p_z{>~7%y0jAFcKd0)E(9 z%gz4t(ybwx#a7onWdP>Yn!%!4ei?ni-zB95ytfsDn)+@SpAWt}qmsgVzTj~^TvGBl zkqbyxzTK@H-AG(1C`ps0`HU*7APrW7F*F|l^c;3Hv`^y)&MC4HHr+(6bKk8o(&0+p_I0BE{J?l3AcCtvlUVA(2q!}5Iq7~$ zHm5WG8?lff7gAR?(idj#;+$cf+3=T_tFPdFQ|$gn|8WE3(<+{DU#G;tJw0mjra)5L z+}#pU`}7X(YB}0APd>td6l<<0ZIDh~p22n5!l{H2C*ww0Ls5t2tL-~hQ_FTpI@`)jLJu=mu76aM%)iLSXd=anKQ!-kOEy16}PPE>e2XFj;T-WgLudbg8 zbkwIarIu;Y2nZ=|lguh{$QG|m8zp7d45^6+!z_mx1TwQwmx!RAv~)IN*jyz!W3XGG z@eufLHp8p4ErU`pe%c*rU={f(!}OBb;A`fnF6gw3A7u1w5V)NlJXqA&d1!-XIXQl_ zRA?39<>iv!@QUTovHTy_YY#AWB3d5(@@;>F=Z$Al_vXl2SG6u6?-;@JJ9Q4vf6o?M zhpMC{R_=}fzKmnN>&y^j_N;*<926!SHTNi1(*V@gwHUdfM!I5~+n7v$}G z&4yI)?%ytKvhNA=E`*IVSwDT$}-Y*VHpE!py~xR42H$z z@M8=I4f>C)rYm!HooEGuXU*MkZkHeBdp1k|Q96MNs%=5aVD&gcrNT%iSGaVcAi?So z@B5AMw3TX<_Q9)I05Ca4S|%HI8Sjp&XZlcUG@w$n&`_|uSkYjml{T+$^$*XreAAzv z>!_k{3Q>Q#u6Zn*pz*~KaB(SY?lCSTjWsBS%2<^> zgP?gCa>qE}bRK>>b_4ovv@JyKXQqH;6IK-Qb|?w!_XY)n;3g!R{M!*DN>NI1_V4Ng zhq_4u&@N%!GwC&OYMzz5K|9?)kQ!cGmdhWX?4%li`^llufxB41P9F*Cz}E9U%m`wX z2)}r-Am;dslqQ<Pl;?`Gng?1723YhJMsbC~-E15g5d<-_`MhZW<*(LAS|9H0>Ju9zQX~Sgq z%}3>9^cAQ$+o>Wxq*O2_?^1ld`MYS(LfRF8ANXLgHq6N525*xPcY!n?il`BTv$$)H ztisMD*4x`jP3pWtl` z)|DoyS!u~I@RTwzer8Ghx>F-Nfj6PxI%iZTc{T9CEh@1(LXM|zgoJNTbAE#o-nF+4 zk-3yOiMPM87nsM5xeUS+t2SX4?j}=lf=L>dJeIf zPR<{m>~>~^ow=n?6i?6We}2{-V%z(3lL_)iVx3H_9u$&RO8Jo#p;-hcUb-||QM5_N%;#aTL$ zU0QPGS>2D?%}RB6aS@3=^o*lU%glbJ+^vx}Q#-$^B(23V`Ylh#|EqnwawWD;%QG1!enV3z)l!=d42F=pwNObOMl_ySp}%~XyI}5nDL1{ICm1sR zSZH8~Xb&V4Ca;GUr@9E?E|hj#Vgz41tXE**)h{}#c=348zrujm!gwMGYuw6~d>@Ki zFge-402hD+i#Ou;(FxzY%{FP#Rd{)M32e;pps>E*`^7a|ST4kmVTjqq-&YUifD#J5 z0h32(jMQkaxFP=FvZzOt(|}DofEUOrFR;4&cqKUqqhM(Iho$7uQTDDPqA@4HoanK# zx>FS({yjW_-j*t#VBp!1*N8F}S88gVPH(XeXrCh}vR{=XRmd}tsZ7+Betc*1{Y;p5 z<>4i9B5n=j|K7Vj(x6r7jNeyUbUh98-&fTvjW6H%q>B&T+(}hBr1O>GxR^a+lC3Bfp_}@jgjNV z5PKN@@tYTROKnMwVmvl~dAD=ixfg&S-rNPpB2sI~qwG=i0P3%+lTG}W5AEa4Wmx_1 zWvF9M+5?Itz~sbd4Qt8p2{n7}08>s|B(MLsMKG~3xeYtY3LrBkMFor$G#u0m+^*$z z)MJ9j&%+McKkO@N8^xt!$=tftW4x#8@PjvP|4i6YPT=`gS$N9M)2H>hWSutB3b?6x zWFO(Bwn1TECnT-9aj`}3_~({l#Qt;Z#oz$){qz>kL4MI@Y-ND^SaVYUxUoV`#Ez0^I>-gB9Y~naS@WM#tN zNE^uKCYOR|SEIS6Kc=rLW>k3}wu&;}qNmU>m%d{Lb+g6o%Y*3{)0M5(TR#}zIIx2U zjid_i^S{6rEESoqSR&o|dT5wA6E2s)#C||mcUc+MRz5Na%t*ACq8sxFH{Fi4y^UCK z&*Caee!yj0S-1N>_Km3r`-$0J{?=L#2`@lXAL3~@6_gH&Wa!Ar`><}&ZBsT+@RG?4 zfx!r(i_r6Yj%>mE;jZAmDb?uhyjVHpI_x) z))J0^MLKzzKR!BfjOvC3EyS zlu~H|Qr0AkQ0h_5_pjf5()9AqxLS5Mq>Ld%tzS>jk^h;M@Bp?HkHwzn=Y>n~w=zEv zGb+e@ND2O9eqoOVMGk9+ys&A*eQdxw_V0J>_dbsKV>}RM&06=m*L`2t`8rQ>zEsEN{;7+F{hz@m7s~0> z7kmc>5`)wfOpA4=Cf!)h!aL$-4C;`p^RJ)C^AGgH{+%p zM_%J4Ycy~jyab5eS3eBb`_8&smT~!AST!x^N;KZiIA}gQk(YUEk(Q85Fp;WFKl5R}^4O1)7ACOioH;!fYWnj3wCeNhO3Q4hKJJ_x*E z*<0c#E)y+`>%j6;&JFf4J2-20m(kVn{3%;s<4AI32JZ%_oPqjQ$BIbp`@-AFGfIZ% zu4iR3>p*Je&BJA%*fAT%kV_O}Osc>ztM7*wfU$sBND;RHmEPN-N3q*e8AwDsBY~kZ zELJO~|C6(su&qpDKuBVF5c9&Pya~sy?fWG;1iq)j*o}c`@6oB5oXp~0bCdaau>4lj ztqn~qGIQxQ#Z%FznRdZ4;5BQ0IbV#sd(YBzb( znUOsDKKN}Y8L@EWU4@oK%ZsA{fVGTV1fgxZ^a+z3!`_}kjS5T6} zPbYb#gSKI&%Bey%`jg7X&})~&wYjQ6o`Y9FtiTVroFHKJOn`zIA}BExLwz~(5xp^fG9hX(P{^|-E)u*uoy74&${D_M`SUXemBicqcB9g|oRO+O`!NW$9s!HaH$`yl z!-0zERd?hwptl?b80s>~*^!>>C9(y)xU973V0t=TX_^%`Fx1PDq}Jko=C`2X@i(|_ z(Y1Cqo5klGZ>h5|{4BN7TYXfT(8PKNzChoO_)$%oTL9qSzys%>etNYhu|H!sso#?B zsoUNwVY3K(rlRcKL_oy}aND;prKowDK3pEHZeArFasc*<)C^wD$A@f`Z&L99aNByV z#fl(!dF=K*;I}$lEBnT~L{53I#N1g~b&~&|{eUWd>#d#uL- z9SJ87&mPvlsPHQxu>&4q$?3IihYT)M_1oR^eanm-z1RqPX6^lkQu>Fi;(@E~ z2~X0OeZ~$xBW&Pfx)VQqU3gqzzltX#Qhe$j=}H4t>miLXX?!87q#Ljbw6$MWfi7{? z{MA!+X(R^e(H)2xELYj&_o79wS>oO~52=!#SMlz}*bUWw#9B&BKEtZSn<)5kU;m?j zu2Eom!0sRDSsPT208H9Id&Dn;)^S{hBvnFqf%RC=SD{-Pb=4(F?_I57*sp&36VB~`@%odQ6eW9SO(7#)NnuhRSrui0_-L&1qD2ERd<(KnzmFNo*B^(Ss_9nLRGwUzo2v_ zQ!`-g;7&XurV*0SJpwFPCbU|PW8*+2K5sOj9to>}LEUNxca==KT;fQgR=m;|V@6dh z;gd10wW)z&^`k{%giEtK?4aU{Bhfibk^{EKH#I&4V1nnA_h)o)5zM9fOCJw$kYTqC`FThQ-1}f?`lc zL=B&7KOso`=rC#rocr0^8Hn&mYehTwq=TIxBfINa`pj|J*Fn53=SO~qF@{NRIA}+; zfS$XAIu|>UaUaG<_&p(7MoCRI7SE#dj|~eC2GV(PnzCNbl8FLkKkpzJ;Z&I@9WIA0 ztS>s62nN&)U)rI!RwH@gI!8Od!HWtY+E;0c#S7oQ{B0O0ml}qd?!0P>zJ|M&%=?W^ zzzRe7n%H$?1LG63N*3jF0wI&>Ws773%R$r*l?DiF*IdendiO8A8{{D|`8a<#jFo5G zyW>)-!F~I#lETUB`454Mp8Z4Q7mO3DS%`-9e5-ZB#SaA0G8y$Wj?pe~R0Yq>ZX2-! z&zM4eA&EP`%lp$QiS@G+x|U$=em8A5^Cpv~ zSgFHziOY1IF$FgI;rShRlP~8!SH^1pYKi9$yVq*i{x)=ft~sA?UZeO_t>eV?woPV# z;;XG^6XhdsEavJ^Q5QRXi%E{SP}K0PGUXa5ahe7Mpp;~poUNvU`83tC3w7m8ws+vb zQvYI6G@XG)h+lSmrR$>xu#Ng_Q^nGZF1`d`8wY%*90l@upMqBcKcREWQ-#uF=MqW=p)iDD(Gs{)s`n@*SHf5P z0BJDt<+7MOPUzDJ!MCPKhb+4LFQg_Iy$D?kzkBqXo4mde$8f@^=#%6DX@RmOP-8!X zaVx{D=cXXTkn|EitV`nzBqL(Yjx$7t7lFCU3$<0|gd1OrK_)TpNf~fj4^@f0f!1>Y zTL({}cMKF{TYPZ0q?&kmld|W3!^{1*yG7Z9v%uYR4=A9A{>Og27n$*Hb`@0M5aE{jcasJW|ef zR$e4)dmKetSK59Sp+I8NBj0e-%vuDa{-W}7Ubd#MSuy6Neoy+W-Aic}@MCB$b0)eU ztGe-h0JU#ycnZzH7rQ(_ej7kQN3esZ_s3JGERzx^>&`PD)|NpM#(_-e_Mel+;$5~O{zE#GD&jVO|Kc?a zidxYhE@n>dWOrK*Q+t%CNA`WN)$oX61le99S}2I9wq0wcfx*b=0e3NmZ+5w&XoDLj_sd52!W z0wTe1fz6Gih7N_;5;0<&TaeCr8c>Vk|KPOYN{8T&bYeGAR=)V9W+5Yo!vrA+qP}A5MIc9M-Vp& zkcWozr&i_pY+M>Epn?Rh9}ns64>x@XNbEZGqL-X35@HdJ2Md>KcO5-rkzJw|cwH0h z)MRdv_10?2rBs7a;}6?!s?nweyM=kk+Zl&RQ7vnrw>O#ebNH|3VLuIeK2_>1(CSPHF`>pxl?|)t2jMB4 zHw7gnk2D=L4tYm5^@x$;bpS$`FbXZCzeNZvhy!#8Cl%j;PNR4pZWXUkbI)r2HF zO+f8QgfbPDL@gNspWyLez@R`HDc==fNdbtZRqA6P)kU9sn_S`;PM^ceA0p_{*Gn2c zT6_h{F{Gf(F!qS8ALkHzNX0E+8Zh{)(Q8GRpKDPV?7|g9h^Sp;j{O;r)?z?vRLXW1 zg-amhH8~v>cJVV_7AkZ_(b@$TMOtJ3VK(}$E8H+?-McMd9_UhGdZWCH8&p}LvP>7r zLeIvBw>H7fQqAT^M!dXiwfG@a>OpSgwLoE83$Fi!0+X%kr@`UZJ1h^|9H8%oCEIUU zgchw72G{ThB(|M~qa%Bkf}yK0N8Gu@Oz`BtHJ>qWko9}G7%=P+${*YQj*TBxMQ;t^ozxaAAGkF1V_0H7gUD^+ zW!rr6sLAW=7a&unEV$?33r}=VXcLefJn*PpK=1eh12KTqr$VjRethDMSX`Tox1Iqp zf#hOnSOlKpnalmajzgqFVP!R0jZ;@U%fYtS$r6wc;dP8|vIzQgtRjO(zY`dajc$^# znSkNbViJrVp-p2dI_lNS?fCia$!yT?<*cQ9kHcSg@>G2X z=?d6%WdwqG$J39Gqo6lQoDl2P|K>sfUq+aw-<$BE9-Vam`vdKiQ!FBFQiNZm9`iXt zkLVr{=1y@h%wnV=8k4#}q5Ad>%}$uaA9&8elnBy}jc4}j)476&raQmtf#?tT+YhI@ zof9Ff6-pH2hVZ%Q=5pzquF{`P56s~k2Pv}`e>iF#)JO)$_K?xdciP>LB6d{rJNg(6 zttpbfSRnY$G7mSFqrMt$rwpJaZF@QSO@*(GK1p%&-tI7n*N|Heb*P^Dt`{LmPqgmv zjg{x&@U}yunAm^vtJ~4yqm-5#CM^f8H}*VAi6P#Gy_ZMj>)v&J4$bi1grx3k&I}RH zD77~gN6|Kql9xwi0;_k2hGc>5TO;d0*hVVwhzP8VI|1ZpE3(sG|LBwa5iXqj4Mq39 z4MON39;&a8g-pEeb6-erSl$Iol9zK*1^bgR7BM9zVvzaHC@BVU zZrz7b_6`zFQ6F7MylyC-Itq*r{4&RYe~^aS8OOAz%=;t+>l&9g<{c?AS{rPt5U-RR z(##4r8>>KhqftxxOneYag1|CinWj&aS$s`gM}lGcLgRA`$JFf^65j2uWT$bxoa2xk ztb&m$6dGZLEzevqDTOIHsJq-&P4U?_ydf;6U2HJ;Jx{-_mtC99a%+oPvxo!eToyfuY9@9cZA<2#=>ITg{bY4R@XUiaGLLcvwL__u z)YZH~FhbNn`rU40HzoF!9L3yqEb$pkhmP3o4m@oaZ9Y$0-2Q|v-sZ_P@l^fvPi-ey zM^(|Tc5*Fd)5XlCEv@1}*SXv^<=E=VAiVtku)uE!=TqV&Z}kZfwqSK_r*YE&&$|w2 zJFJJt^-M(WE`KXNPixP_*pJ#J)Q|E7dxv#dlFwADvbm@rqv@OZwlyIzU8o$)5|gZr zm!4920zR*fCtY{#L$r z&Bl_&P2z~uf5r$bq336Edl+>D;Ykv6`LXumpmNatRiEUS&FsZ``ixwD9q6sA=n&ll zX&XblnvpJ-bY`!)ID#*c*ZuItLjK)RN08>^cxbLZLBwr9qBYk3*t%Ca6w!&Z=8KZa z+0Q||-U#GF;m}54vVl8&x36Q&E~f)oV0956x}x5&|EdH@v&b$6W%7rl7k)Xu(n~Cl zAy3i|DE24Bp^v9cY2N@49)&p^qHdh~gvSIaDX92?xVj057qs>m82ueDrAbzYxFkS0 zB$OObICy@H_9Ct`MYsaWeZ1xjcE)JrstuL=RQXt~uV~APuXaO>4CGhS?HdP>JBXf` z8^!pMjnT87h8%6vCW^(K3yN%|eV$~MeXI?2Th2^?HUe~HMzenY!;s74jE(e;Ol&Alb z?hGo@_6-E2rz;}@qHY}$M+~^rwDsdw@3x?_6#l71FMsxRJOg7j-G>hGFzmm2FqRl7 z=fxcisUH^gO~`XlXR^6O+Dt1w#Q89DFMDGQtT~L|4N3U~zC#vsEI2T^TTb$0o8EnN zGtY`wi<>;$N08S623h>JkbcP2Lgh^v$wA}*Y5no2abjaN5d$|8iN(@ zySGGkSce%${`+Mc&gq*{Pj44U;&%FhgL1CLl;b}&7a&N;0mQQQoy1Mxeva?zFm|`~ zI5k2LF(}0#HPfG~CKXLk>%wxJOoPRZ(Pi{u`Qa{GjV2F&#V=Cj*DTVV5IaOxTNV)?1FeJC6pX=6QVpEkna{`-BKJ(Nv^@2xRNlPKvaY zY834^V0dVMeeX4qS8OuX(?0$oE?eMwyt3CGh6#3>aob`yNj`V-7 z(J>vOxr}9LE-sp$m)3VC3|;5euDVppEGko6VH`^zWLBG1wrRrW*qB{^{=4P{oPVq* zmPA)T-7S@(>*!n8-(*@zq zwFVRC%52u0wN0?>3O>`0KHb*hr#)|wE(RBIVse}JK(&>sx0*%87XIS(&}MXU{NuUB zQlKclHP0QH_7Q$o9NCrB+*zKh67tVEJq~Zwx?Np7RGW@%yEk!lo~*i&*gj^(^!o&a z8I!+!nGXjDI=h4!UKabk{3-&CEa=^GnEhObk=&?UO*Vzt4>9DgyS+Hinmk39L=TuD zekDcW(AB)rNp_QIISpZSL!UOO+3-POB57`q6I>W`+?@qx)3VyPa+{It;5+KFMO*$w zK)3QY@+1Bc660piew1oE(&iia(1erx*)E1VEQEm&J0gi^WazrG`s%8UXEFABykd?g zz)q|KaR)DMzEeLbzxvNisyI30;S{bMY0Km98%o#({x=3L>q^RL?4j6lV}byJtF-I%Kl$peJO$)#e@X$a#|{LkNk%@@dL~c zEcTH#*c`*Wz%l38G1Bp}zU$BoP~1k0Nq>Sr>Hz-vF4vHDV<&r}&mIO8mKGAXz>mI* zBQQR1^-7c<6-iqViql-$6rKJg|#$ATJZapZr!jwK-*!tt;k#m$@@ z02th-_Uh7u{`?~0IjnFVfXpcxU(TU$91WxiHWW`hGH(in0p*`|OX^Uld-3W`8K!|? zsbmFXX+`tyHAhP`{E8UDFmrN-4q73e&MFo0^}4SPV9=BQ^Cd5b_;f3klEac4Lo=66 zj4S@;Y*yU^fpyUC&zt~&dU?hju?HygsvcWuJu571W5v-fYNpj?fPl;hd8wiC_dxjfi1^PpuTh_PFeOLa6&n7ZXA=0{;yL&+Df)#_ z&4&N^(b$NXE^6H8c^_#J8M6?`k4qTw*Um`_+-%j~#egM}z1K`#1wfov#GXG3k|NcI& znSe9L=+gE>`(KB61^LV4PBHGhJ$b4}ea#KqK5U@#!D3M^sUi`@ehfCcQhJ_YIg*1_2t};c+9NS*kxv{qYa&KOv%m?gy0C-T|mM zwm=(>*J_mV24I`*0O2@TWgXxlW&wR0y~Xz?Ab^h7v>QEL#Isfe2xXfBfbM?h|2ENy ztAXS_#@2y?!{VhekQP#YLx@6ZC-wKpfp7wH4$q54cRL_!!3_|e^#)AQnRbseKMs}PFTmq(e+LRwx$S`TJ2yb$`WBGdEq(+v2^D~% zSODAz__M{@sa+VjFKz^GlNdrf?CasO6#R}~u!6u(^D+K8XF%{*jM~=>3*M!%!RcaS zvPAClhSm>-5G#c!JEOXhTK0Kc0SfngsrcWrfBlM|DMJjgDK{Orp&qam55BQ%%%6EQ zOn>yhjieD19WxLFq|S?i0F6F!(m0@1sQ&dcf#{L{JV4u2uDxY{ysV5#2qtWgc}vW= zJ^%oVar5dOW}p=|_zD0oD<89r?>27$Q0#^D8qssD`k7%}UENxhbUU!-JuGIY+ZZa`AJm#R&0yGqh!Y=B;4h zZ|BsFV#%}5S~adX^GS(z=_Gdj^a4aAe+>~9C1iDFMZocI4_Q(#uq74@lvc1>uSSj+ z<^Ze#2cCrHZVRA;yk6HIw)Sg2T^%oIz4@{c#lMEWWspR|MaH!TbQx;)=j)ni#{rdj z4S=7|GbyIT6xG(e8}GywIaR%xU%n@PN+W`&3{dPE-8Qe&`_0bEM%$QD4AJg9HTfEd5{^kSWg+#ACtx01|H#{-W!@I|^+1q5jrLEtfB zwzoj`AfU_v`0TjHID4SuPMliy|E}{bxX{@gXP)dF*Pvi1?I|=_ZEQWDIunqSHyo)-r8ll{?IMwL^hb!Bolg5%f>LKgoWu z1voj&T+KI@y1-L!57@iVr^kMqNq`U0fOyLI94)!#psuGz<_C69N@+76oIuqyL5hLYV^SwVr{P_u?Z ze*){fN}zq(v#6-7tZYt}HSq77-_M`cSB_iqIM_mrL@~%(9}63lU_ytt@nR|VeIq&{ z(vz%Ad|mqt7yObki|~!G}V3m*(Lzq~&srvuW+~P-Y#eKTlVv&%@2~{G&5HGy$MA%;(4f46?xkiD0OU3g;Ek2sIV$f}au@=zKN$@ z0BGSKfR}x*hkupQ-oyRj!ydq<)d9rXvPa-DkX`Nx-EWY*R(YJGK8GPpTa-?cPV-9Y zt;8zsnTeuUH<{TNi>1tz*=CT zE6x#N?6P$)aNKi^!WG=p<=aiZb3icAAb1tcAb+uN;z@$I2I%&kPrxE70MtaXjNg#} z0)^s|;@vByy{LGCgj)ZTpzUVyFk|L53nnYzJMsIlOBOOncS=X;X_j&Fk0hvSeZp>#tgJbFC$i41|jP6a# zTF%q%05%i{`Q-T?kY5lkFm@3RA}5`f{vZb_P@Z}?$iX}*=i15|ibLvwzIa&uPAPu^W-^971!Ni% ziYy%|eSlGVFj;QWrC;(!9rJ4f7NO*KJPSWG?xjY6aC2EQL0~TBJUL2}Q7mBHU7zAF zM8l>06KE6&5rtyVmj>Gb&s$IQ;YTb?;mdkU9}qk*psfO>5(YdF4t<(*(d8DK-1!E< z&-^(FL*>QpFZ1+9kE}U9^B|3FJ(jvc%fjk7P4Qn{fxPo@9CiQ(P*BC>TDjQjT09*9 zc-KC|7$ax)rsaMy+V-f)QEdqO>skS}1i&LcL;h_1#55B<;UX*_P()DD0$%>2T+$v% ztFaMYz{Qgle&+zyAP9y5UIs4I)7@shL%K|eGOv8o!Sau@RW=E?z8pV^2UoXi zpiAh?pNa?@?b7kcpNk8_V&t?<^o9a&Awa3^Klf& zfS$)CVk5qDW*qoA6Uy$JbC+|LNUZRbYgVeSiZ^vbM*7^fM89DotR`CcI8*^R7pR@w ziojCGgYaX;rC5gyhwWpKj438P<_U2mCmxhPn5}sf5FTd!9CYr6Bze%LdZ71yYIbU; zW$-k+lZJnO^|>#^WP-s#vVR}I^-eDDfd22}s&UkFj^@{|p&%NGWD}ID8X!_tT5Du< zl(#lWo7a@vr}^jyE=4C7^(-mv>+6fd!)~5Vt0SRd*5<4N83+-^kv}@#1CaaX5xog+ z3=*RQ9zd=+4)vuFNkhjDl=H#oWzvIm#?ho)bjL!%9WCKMZhl z`Xd|%e{GrOL5jC3`YR|sHvEBKo`hZh9gv{zyxfi7x|wDZ5l*+%zLaZ5L@?ox1%#wV zE~KNHGRf)boB~{^l{r@zPn-d1*M=_FwR?VLAx5(gxEEYGgX)0L5<856C->l|Jwq^? zJT&Ca-^;TbBmfp`d|M+DVi^*w-BOXYsVI!NvKAl%n!S~6X+ZI$Bb^ZTiP4FC3t{t{izjsz zE=NtM*Fli^rc(g1Xdfy5>6p*`WM*H+#pe>Nucv&ai{p~c*B7rv{@%Fw#l3>ud@HFH zu!%3Zcm6s;DY!UQ$8v=5j~9S<%bfROlSO_NTJ?8_Od8=HWzQ>0p`B~DBsG(ZJJZFy zo%t{y!QbTBKf4=ogoczqHaETe^OL{Q@Km~r2d|1<<-FXUBgwzU$1b<@0m9b*B7po zWKh5)$Bx`3b)m3asfCqWQrhRFnE188zM(NZlces5upn~jd6dI_%*xZfk>sfe@`HUNslm}lN1pZeh zM_dZ7!G;DpR+QC`oxMu<{(NVZ)bP-E;p z&;AIBpo0YQR*5=DLdaUXa4m!bPMW)_#sJ%&Jy{_!0N{)r1)~NWqnAgJ7YNgRo0fE} z#1aaupj&%J$3>yPY0_o*VRH4Mzbx>4B|E-z=tz;NmUT=TeK2q*AIAjFr@y2! zY1y27WvOurXjiw0$2Y~$R@Z)NJ=*i&T8DGXC=;ZQNb>k@wmg4sDjM%B#22ds+E$`s*vtkLa zE}FUc6_WD#e>R9LsXrS=S>s&8BO!J`AG-JVb)+=xb(k6`p`~1P&qaT|%OMqbronb`t70izI z;EdE^N*+0sUuI7$iB=jMl6GFcZ*;e#Jvte8ee@$LD|y;&)goexnZ$HyZB>RE*Nh7Y z79~jv$TtBXNf#2e=7LENc_~A(@1Mr;SD5@x(kT#;%_8c??>}}rEX5Es2J=mH?;ZCl zP-LgXk7JWgBSy{sNikfob=0rI8ATFK_#U19)S-D{HHef?0exTr>c~b4r1m|RT)79f z7z?3c%9IAkN8k<+*Ql=HuOqt1bQ-|+4DF=8BlbT_XM{9*20XwEHp6mz}e zdj_}XbkkoRA~J3Imd2lg0^Nm9R}3Ojnq2R+^zMGYE}E|Oqx#vHWv73yP%ourJ5y1? z=CP%BbD#In_>njw%Z!cP#=<3dtdd65@O?=sF7ggHao}8`Y2vO{Ezx9K-48Uay7y?B zb$Ewq?*-=FW}3lO`={euO;`355>1ST;AmLqo*Wa4<=Df5loI72s*b*F+!INwe*U?s#hjzCB z&en(R0w4xThiHAI+B#zo%DTOXEG{pmO_};J+LA}19t=IHUe-1PYhWVs>Pt*H{77OaS6`A z4_R0JLP2o-dXAee&A847j}B2jrD28HgiHW#bFh;NF|pRHpL90mSL_F9EfS(=FkFSXlK{~b8*=-v9- zpdk7{%7wn=44RJ)Xe4(FF2~Fvs}sp?(5N#e&Qe6uJl=f0Q*q#7{3j44q% zWHzX+qy_d5DG&wPwuvlBdctna`SJK@3>9S|o57z=to3rIm^68Cf!yjL=Mz&rh^xQl>dylw2{PZ z&hcM$B!?e~9I%e)kI0csLPTQAoaC(Tpen4|U?c z=RZ4&2w4pcsNV+`EaQBX=wxSPhS3&q*5Pm~R?5IEysUjDw53U5`p1yfgDcjlYhaSw zPbV9U@3{_y9hq_)PW3d1nV!j=oJacNEmu0!R$O{&dZv^8Z(z9-8+b5t z;4Zz1VKo;v5{GFwb#S9g3}oFjv~b)m*LG%?bWA2V-=$&&M|XyTWIR-5hSV*tAL`O8 zlT1rqZ2=hj#nwxCi?H{9s-L;$y|JkF3n!J5x&<=dUgI^fpFdN3By#T%mcy#`re?8DM_5g_ax|~-9~ZMg z_f?rlpFAaDmS7z??;KEGI&)N74Z`1oY*F*k{ql?1v)dd_jWtnC2)FlareJStZT-*c zHD)wIRxgea^}F4+d9Sh1J`TDg<_o3p2DR=L=fuJJD7L0dq-VdE#-@h#YP4rZium-t ziVL47=@zMjT1}hwXPP}*60;vxB#nN(HxxquUh#`)zWHN3LG^N@x&Qtg8Ov|lqKU%B z-y-GS=E^-gsam!Av;7WA`DjnF?2g2l8Ww*+n79Rz8{T%#NkZo?K6?oJoJjmU8-406 zFh;Yg3i57!U}q^)%QyzLx5OykHD#_2)`KU7Z@McTsnd>Lm`LZ{47m^q$~NDx7jXPh z@c{~q8D^AYHvG%*<~s9%IFYnuTOY_T?UKf=pOA=!K#^sN?~(*9JQ;w-DxC- z)DCG_Le%rncWWq03)+FUhaTJ0u;|1C#~&9fK;Lvv`&t0HqbX;&?U_Rjlszn=?%%VvD+*uyL8+%ea{)7520H-mN#XWF`2yGMfTA5bJ+ChbS| z>X$QDB-Jlbfz^yFh?ssdE#sp+C@D9%B(u()I!m)kdq%X+nPq?frB~7s`P?biJR)kmziC?dpa#_0mUkh&+4d z*HN7PcwJ4|>t$={F90v8{DHB1BS0#+-pb(=|3O$nTb@;U2Hin;3$ntlQ{Im5FmMt{ zBcMn|sfE6~5d+sPR2?>0n?6g|{%lpp!CZL?S8}Zzqd&~*wBV-wgXy4GQ z^W!jq)I9`z^XL~|$B$o3Ffmm5ym_lw;D_EH*s$^@!<5@Dwuf0f1v~f;fx2)0N2sz3 zTzW4lOxy{yVz~{h(>_plcB4B?6{ns=;nfA1DBk*QmGEGYa>mgy49-y)W?OU9608M@ zr7bwd@|PJjx!IEv3^feU26OW{GgKREAYkynTG)zobA-uBVlJrzQN<%Ti>B|0WW|nd z4*}P@6In=vB7b~EncVWfOZUK=Ah|C+UrUOI;=kz@dCSe0W*G2#9o?LtOk^*hz6#{y z3)NgW?}hCRJGJaLi8S+zJm%7S7@2)wFW*cQ}F5a{57N@{l zGLIa!h0cO_SeZd#R$@Z*>h`~fiL`QFqv+k7y`LuztZ#R&Jt!Z$zs#Y6s=pC}onaMs zi7?ON(XiLB`6ZvswcmJjq%=(Jq}5K@&FHe1okWK{1wswtZ!$K9#`mPs12p95gyXI{;GCxOoN zi8gN-LocnLG&K?wc^(*#7Xj7k#uA>3+mC+C3=u$uQ_4wpx80wWjq)r^$EV+|4iyaX z(T~F?*z7uYaQGHtb~)>}D>+;yM~zCc8=e{{T0nUi6ub}_@aWkq6_M405S$_aGm8xf zyj!RP=a*Y@Ob%ev6}!25HDBjoL=r$rOA>7wta!yh9P5w46l!ExYa1MnHL8Re#(wnk zBF8aw{#Nx!z1SIodQy;%#}z+-yN9Fl#I(vK&k3*(Y)I$rv~-;GbF#lO(E_3d)%Jrd zjY9r^=H_GN$2L^r5VmAAioY$L=k6Lt7;%$DI5P*gXlM(t3z;vv_2+W29q+!X<+1uk zkmu!xhk8GxA_(_5uX?EE$bMMxAlouOe0pn%T#*tse!KjA`L0r#^9<(y^1STcLHBBq zNS`Iru^ub#340pn@?Zj+nZUe@tqEu5urVTq?t9S6eYW)CV@2`=Tb7%l%?RagzQ5*C(lkLfkhkrxGAHU+JSh2yxg(jRFL zWkT)&)eBL4hv7j3XmbKjsxbrnM(8d>nhJV5APZO! z37}0OXoUf#`8uGWEv+TRD%af$j~pLo*zAHcXVJcg#SZhNdTL=IZ&XM)*OU{)81c>C zi+^BtC%*asy=Ne#8%tZCI8L(lxpdw>HJA%r2Rzv7x1i_XvD3gRRyHjr=;U#m``52u zhjzXJ`T-{bNDgJLjT)IlUV|T;?{!7(Q2IOJ5R9kbs12eb+hULA7N!BK;(V{>ajO(E z$(IhNu-{`bTuVO)a_P17bhB4&e$gt>AVL#i9?I`>{kc1HIgjautGTomp^=WaFf}#O z{m_~bjjRw7?5+3S!p7yDRx+V)+fm3H^% z6RnOJn>u!%>%`DKLP@TG;34|?9_vED|Ct@trM}eshF+i7pa87vRJu0H8|$J2 zhv!fcXg=s-7Jv}J8DHpa?^xg8XY+jbwR}_^+GiipED&Hd_hE?~N`AEZBufS8JYrS* zbq@gz`lg)$F++2aLH+baaq7^1i{8?bm3h&Y`K+9~lIyGc z&aA#0eWwV_ziv-(I}%@PjG~Rlc_=f(%?3P+PiM3-EK3)95XQVv=}iao*aEyxpK{Ws z+QHYj)I6aAsU)S(Qj&R`1+{#O}^1+^)u} zj9FL8_zA&GKXCbvl@Yjr=|zQ?Qv6Z~4(RdfFGs&_7*eXhvDoX|x!U7JlHlry)kiMi}@Iij|!nXu3klfT3m|&0LE|5_fay z5?^~f;fKaB`lK~1RX&?PN<|nh?d}bC_BZM7?(SP@=Nc9m@aQc>^uhU(d3uv#7FCfY z;@N0MfLbA3@V3@=dz1~IrGld{+d0F*Hrs_8d~(Fq@sgprY>ld_6e;Hcf4@$;PqIET z0%FX7+lPwbb)lAC(B{nT=!d4*LW5$8L;a&^w88vJy9cLq&0@WQXBv2> zR1X~A4W8B2an1+F=<{BFJ0cExrR$oEim3k}PZ%0Ae1~#z^L5||)ymvHSH)1ISAn3- zfA9H4#=XanFFTqk;r?{rKmJRj^cwp~H&*Zp+~lKC1phm9lo^2U&)^&m5S2v6I}61M zeG!TYsFIxEJ(rQbA++eUmw&YhJkUDTn?WS#oakBf{y87adLx|H;)@tAITmn{aRdDT z_klsfkK2sQh>&(#4G0q_r5#`|pxiF;%R)34)Ahz86q|u4$1pAo!cH#^kyg!edy^&2 z#=xFaOjE184D3q*mBe`p!R$88=CqkX@Dn=Jo1aKwbHToWD+f4!bK$SOvmRuJr8VdP zBTIToJlVTY-94AtW#7kg>Gobw5hxs#LjVt}W82Al zs35J!xjk(2vP1gaZDRx#I+EQ8eC$wbX3!v}gc=K|h6xyeEnSrjk6_vR_ zsS+Am9joJ!ffO2)F@IlgOoRU|*2NbVyOGh>6^!HZDvDoOfP{z!cbL7vM(5pE z5#fk6ufa29_=~xf-UeiaC=*f|z^u5G6c4QLP%b?Th6#iSJj`f^mY>p(U#&jFxl4Z6 z#wbYKX?OY?KJg?cyBU>qvbwJ=<#>aiMp)t8&qtu$dF>^4H(9tlD?=kwrHJVGbUxs1 z)MTzA%nm%AC=fE$A?wgZ& z^~Po5g^hYf;Y2CS$#niW5{N4_2pdJ2E_`w20MqX1&t+>W1{4jOYbak{pojQSni5d(e#E&(AR- z?$lu{pTt7_f-fL6Q&KaM`7)d*!F{qHEbX`xx-eD5g3;GTLi@pGc2F;vNJyyvk^jmu zu&TXO+Ji^VV5mfJcQ?oe=6PR-tweXk)=!t4w!Ps%tRRU1Re?%URD4n`y$;6}so<9@ zsrU|h#UVYWpeQ4Gq4Tde=*GY_%8e~S+=1mZ;rXT!<>AKRV4*?z7elz9DgORRpJ9E5 z)u6DUtfMdl*O}G0_%ND47z&=^rqX+DAMgC%LA8>%YCg?rO!3h|(;x zD7zSk)NMGK_NC8*leZ%-H-v{AM^c(akZyz8*Wno{t`xms&wtCAt_bzEq zC)A#~ZJWcD``FjP=RSDoW0K>>)+2Y+L$|iJ?V>~GX%pm+hP=Ht%s0Jyl_#{b9%CNs zavqn0ckaz@j6x*0*V{)e9^B6o*!cX;bA9uiMI%Le979A_erEZEqUwYE8~R z2G#a+NS81_c80!&0;*H>>Z;$G|B~x!W8>M?_LAn(rqy$Vtx`&XI7U>#bNa>-QdyVeB&pzku`+q(0 z;<;ZuZ@3077}l)cif?>^RB_;I3g3j5q41Oe!uX94al%DHS;SW&V>>`_A{8~2F2pC2 z9D}_l`fMdFD;;n{R%{z4NrE;`gO8(q?Bhu&+JH5|)JH#trVVTm%>1Ct;yb3?&|YR%^B0cJ)K;0xit*|QiJ6seODPaP zAy@>56jgnyG*V=*v5ne?056|P!(m$F{dpekULiq6@>7VO{mggGO<9p>p$-IqUlR4& zH{X|k8vW5KqK#^AM#|&f%ULlbAElJ^tF;QVHdz<(!*0i*wp#(*Z4^G?LfX#Ae#HY6 zSC8+a9wQTeHe=zh3zK1GZqNIj&B&7Vf#+<&gZQwzbVsB>uWsIHQ?dNo&I*ed?W?r? z0V1hjpR_Y^gPK=qc)q7Dhb-!avJ&q)k@v-a*DAwbG$7rxTKcZoKKKLl5uPkFGD}X6 z%fq`0l&1=dOxqT6TthE~-U!3LceyDJ4}HA|-(IcG)T5ul?P#VmFC;Y3h@_LPQ?8cm z`Df{b89tSBWl#To8o44ON!YE%aTEP=vRnfMxRckII4^C zoDzyeJ228&G@4J-)HdSC$@=)m<#M6|AK&9u-9`j&GYt>gN-lD79xHvikkHE~xRXI- z);x?=YDEv==Vn^?J`$C(br!9+$D?x#=jG1CjCep2-X4YQRgE;hXuWlgpooGFjLWbD^BJae4ZM7}f<1<7%{7TZ-gqfOw|QRUDCqP+#2D`(}N5ljqO=Z0r+=uj8dV`EH*D z|0r!~+CX42L|%t;TFO0lO^LLL;4sRM8X4<|YdZESylUw)^9D=JUeV3!k&!ch$atsIN}U@iOZuCC~N@vF1$x?LSRBf8lzEcTlLx*JylmqfA3hk zwFoN(0FA47nTjL=*h7XtP4$La7DbwdSHnVxCYop}iaL%xsE(n9tduV`Ppl5(z zBxO*IczDI@bnyc>qMJgn!6PEp4`fO;vl2XQHsEBQGWAa++TrS}>ONHDe9CiS_E9ez zY{4`vItV`8Y}QrDO-J6uF*d?IZ1;tkeApCm$~U%9Fu2N?z{it3{~;}`2WL6PUH?w` zntbQB6ZfWj^!BA$#4h@+DAIdkNq+ptzYwM}?dT9GkaXRs5w?3`zN;ISilZ+x@ZTv=*d_Is2iJoVLutB&bTXXaRI3L=un zt0iemfekM{R_=FvIK?0aNwQQf<8VOkOGi#dd^4ytIYu88aw5wS@Vm)oLqB1*;Fq(q zGV++J0K;`%5niK|_VqT%aZuS;zS{1gDh~fS-!cj=;+@MNXE~w4ORdn8#x*4Btt@vk zI9|c`_Hdl(ZI^GuHaFELsY++!Z+0JZ1t7F>;6zv5R`zN(wvQGguq@6!x_IU(XJgjv zXE0=JLo3wIX&~NY;+D1Tf8^|pWVKe>*+Qh^MfS{?v-ow2*sFtli!399?4{baFnynD z@)8i0{F6H((?`Na0#{tUdDQ(Z0(0voUuarx;v-cRz}wY_WZ}a4`4h;)m4m-8)f!|E z+_B}<6uNrFwyDjr1hl}PX` zrBVToa7U{tjlR^0I8P+%b$2g1VHGZ0*}NT}C_+UNY79VZRgwT>jT`e)d+|VUSZ`L5 zlWyY2&_wE@vEt0rFC=Mw@E}=O^aRi=dy<|1qT|TWlY}xem8b-U%bO@v>Qr5SNgUP^mJn3Y;*z!2 zLJ~3C9rHzeXk)3IFESN&G1Lmbq9=-(SdfG(g82*cxB-JySo!NbB!W|8AXvVW*!D#s z@yArB@NFhFCZ-2C6Iv-VP9hm!Bthm*MOTGHN4c z*rvB<8k(;5ipZ)5289LW816Q+$V5IJAz9C6(lLl(Mb#0Dr|^;-UdJ&G`-UiBa8MZ| ztm6q>rkB4$tMiP6E!3I5%!Ts2=Uyy8u{6?1dx)kiT^Z{~FMr zFO*#YL+pVM;b))!7|OqW8;gyZtOj*WlK+^dfBTkh5I0_R_j1m!x4PgDkU>`h|9OP* zPp|=Tr3L98*4|QwH*f(=&;(352+zW9>@)wr{pUaVZVa0DaoM^Q{(qJln5zFrEVcg) zClL$7v3t(@==DE8sNa7T{}3z^)#da@_5b#~|6^^wy@27^$t306`JWc=otqdk+`mYW zPgSP@__iF}=I?Be2Oa)vtpXe=*c0s&1nUpX4YmS|CxLip$2Ce|M`B=p z#j|y(FFPtm?H@kK{jlc}aSxu_f8zSRPM@k8fx695wvq;=;IsJd4CyIQsePKOS5+(t zFnmJp>v}0-t~svj6QV!rft%aN=1h}qPpar15bhs;j&+eC{TrZ7l>qvLV#j^={q;%P z=1?NwMP=g!h%IQE!VAlsY&HM1>Ca{0%4>TrZg(6$;9)r+ zTY)KLv1znuYgoHo(5G`?^kfQ@hn<{>%F*}#V81E89?^ZZ=%hjnAsOoU}4M6>`U_gF#=RygNc@4ku%X=+{BX#tg=q_=-B;F8tReFc;I{ z*hXy$=xh~cvixO0Ay}mNB>|<7NUy2%EEv609AM|UOaXL#8Ad`Cl$@V}rh$vBKqfD0 z6nK9zL@actSOS}-Vjvqng26U!3;s2cvC`O&qVtT!U-(%s=C>OHP7!IAXhyeAO5U^C zXO|DpB zix6eU!ph2846eq%kYF-jVGWEF^d3#c8F(KWZ+*FR8QQ`7GQ{b|hO(eOB;5Z3ASZDO z_lSlK4Hpm4gNw~|FX#!wpHDRypTY#_4mol)$R&Lzg)@;HCu!V%0yuDaptZ&Ay9I1) z^#_6T!@JiUAEi4lwd=V4xh46ro0VBg6`t-WN4H)6-}9nu0M$15Vhrag?$T$$)BQ|O za7o1CVFOF5A`B6CQ-m2IGXg^cKQk0UZV_-)8^u8G-I}9)Nf3f&SMxr~q4Gf-3-a;i`Z1O!BW~=MAG9rQv%O(f^G0 z-f*$sgbAfD+l}6(qqQ5O)#rId`75)F`pTYg7$KI~j{jl`evo&E z@8tlJzE|BC;0AOF^(}yhS#v_qhX)a;GF%6Y4h*M3$ad^nLDB-502VE2tk0Wo3EcY6 z;MU`P>kcaXPnH8r1 z7c#e^oJO0-hw#R&Z@?rQRqh5H6t6Acy?VS4$b{vf@P@3-o2~>YP=I({_@|n&$UiSF z8JV}<-B-9r4fx3=4aIvqHXb9dcN%2aT9f6KZ?JHc3T|sL>k9tdYT^-V)bg#H5K>FZ zD7CqL&C09aac@IOsytsHu`SO0%yn}+3Rub=&#!YK;wb* z@r8xaFiYAoxii=h#&>9PJC0Q`;8}X6GSgjdqV#T{3`|rHkJ$h$ir%x$5a|&BG2sbo z_li~NrEPCsa9J4y>?(7?XRCGUS*v1E>m#nS^XoRjM06H z+4X6OGL_$fJTF0vc}8rM`O0v?RIVPJ`YxFiR&`ad`_?J3{KZ5Anr}&vyz4I3NqC7sv92Va8 zuU8(GUDU8KS=*5MDpj>FdCa%D#r0HCRtj`p8%P@Z$t@`mfj&t>^<91>LUzFQbO<11 z7~7P9$MqVpv@62&LFaLd9|DdKFzl{ye}SMEnNfRlt33*++|4KY2p(}`LE!fqoJD?d zZ*X{UiRUB3$_#FHAGb-L83Q%sPvS$#+;_FgZ@FsuTnC~DJ`kAiM%5zCe%QKX`d2{B zNTX*ml?J3ZA7D7{)=FVnIR*U=#6Z8$9|4-wCA3|-_K-Mva2c?SP7$!YD!tFYYz9^H zl~)}DM$2;za!1nYJ9WU1Y(K|P;77y-9+9k|xt(SICmerVeeXkL_B3>DorhR-Pv`HX zh`LngpL&*^R=amXpI;mssOTX~H~CoW9Zl`(Bi{L`P&YE)>!FW0TI1uBS&}!}6~>mM zDlzUAm#J?k4Mn6|xh)tE&9~DGt+fcOeX|~Wk{rpeC6t}#>VdkI>u%GejBYxjGPeCP ze#&3tGv7u3VlOR{2qey=l1Lev$4p>vv`YwNm7^7`Z0BZJ^rYpH~>E1?yuAL(z8al3|9t%jw3JKd?Bx`H}APNu; ze}!NGrf&}BV6Xd)0Wtxt4v&OCBSH3tcd-?fE7aLPG!mUEXxfTDtmrGiPohLAtHV?* zcU836L%%mod>EEiE6ldS8)Yf5`)YpJOEMX%_)^eOo6*4D+XB?6&AWGe)>7Wd-;2u4KwuVYQ>X6+sG z|J=I^Cyu68#1iNb29y|}knEnR_9NeKW)FibS|`8S%R$f&j=UxVEn0=Q*GQ%XrNB z?Xm%Z{XkWiVEqA9SHv#e2fc%z>-F%WwTB1gcydX9%R@U@v-sZ7r|sM2b~8^{Z&gsj z5w_!%-5z)B4a)k)aO#QP<_dl$g7&O&S`Rs;oOa=hFU8AoW@kx6OE$S2wPuqkK_G_d zcn2)TrNV)~^wA^>CcL-1xdSr@yOk&!jWlUyuJ;;sFxnkKzQbu|4~T{2A1U_v01HI! z2}tKiYoReuaD>7KT>m0)aeUi|rWSv)S8f{&jTFS)8USx3{BmAxdZg(DPLmg(xrXWO z9)DN`E2+(9cuyL~coyzk1Ek0F*<>Ns>92X^yO$6t zbygV4>)OBr4cEL;8hQc~ovhQwPmjoZ50^Pull->YS$k?y7S-_g!p3N=y?*k>=<$l! z@x9{j3*pCbq%Pme_+bGQUdHw+k2pX_=z+nmK zVH2B>f*<;h%})Z&>>4qiE{SYw7)tmAZL_w)*Ax1=Zl|nfGK4ED&n_D?YFd3F@L+2PJ4eno+QcN)bMPK-IkR?|;J8N6!i@S^5I62dI-IKj_ zBKva+@!{y&><4sEzl9qi?tCWPDjQmUWj~h7fRJ737fZ@S`;lxkfw)jP^M<$el;4h_ zu;14u(IHf^mXPHE=kd(!`=RA!nYY}gw?9SQ)T**G*X29)%JnpI&2`gsJ>LBCIibWh zq??j&T%FdZE@h*fsMH$yZS+gsT^-8&73Yr|MR!k?39+FKAU$6fHQ8Go?J+$7_z6dv zw>uYJULzW0MStMz;caGRc}D{b+<-SEWtfV`GUEv(fX2j;!8HB@nZF7{y4AZ61tIYE zP^8?&-n-Jogfn}>H=`#(dvcfx9C$*%d}amxeT8Zuh(Z-bWxmGLO^pPG>Azwa3*5J;Ds|ls)25qjNjNI4!U^RXfqO|kq9mea^j`}UK6ob9e!b;qIbw_$PO%VcbrMt|9%_Pg!OB3k28Jx5X zND}l%{0v)wnC;xl1t;%)BiKpij=ROC;nUtc_w7zq>a{1}L6Zy%#wci(Z_6)%GxeCPf=nA2z-)jZc|n)?QH_QGFYDEdr(-+8ND zKTkC=PEQryspce!2%(5>j**V(vNQXFH>=>hrkw|ibBr?zBJ>e(#hgTxncUOjm zP2$n5(&>)<4KN3&|hiXcG33KePv>A z^FRS|HdptQuOfzA@5_CU@gnmYU4zeizU3CkoZ1dMgWA!Xmrz9&T~A&IY+fI?ZCjXJ z$v-*4Av?;_+8(>D9QFRo$iiSs>f6&HW)hp>o=L{llcPsB9#vH14DCw;=NL5(UG+YI zdii07?HEs93g1*GARgudHZvaR;JCz~k3JPiN({H8nz^IZm7`@kis1={fahS(FCY-* zKxzEa7UbqHVI;mO5+O4RwdS3q($)p`W9wWJ-9lN%QF5^H$l(HQpDPtF*I+ka_Pv{{ zHa25JPD&{KI4fix=aHy?3751P%sD0Qm5^Q|9De^3#r$0TdHVAp!51joM}0Q>ehT5i zCFLNW(`<%>Itl{+5cjw;&)sLUaywU$!2(^KlmYxr^~#Mh0YO0ZhXR+K*N(JO6t{km zMUgCcgc2ibR0Z(`y+ypca_dR_@r*bU{3SU&ym;mjTimo@W0XyMMO`u;L{{L(8f?eP& zMqQKe@>O0Y6yuz}2tKwk19|rnfhAcwyYkiOMXp{+IJ=iAgk^>bez|eeb3ucaE}0WwOb6+luWOssy<^5p54^GIT|rV@ z?(*};eKP5!xGjew%>B(vveHN#)*bvMFk^l(qof50nH}qy2l2nr=#!V7VasB?d}d7Io=U7x%a( z%5^KoI$~kB2?&uua>Z2O0-f3A*6yy*w|7JPnkP}gp-EN~Y8`*C!i5LRM5yZOB(iFh zXq`rv>xL94#D#f2K1_>rMy_>vxO_Un(qTJ0k|z5;HZe|x{p};oqWG4@%eKa z`smWoOnTgE2w~z_LPMvA?erT;*m{|CF*#Zx|fa_GMQI~SJ*GBEI=$JPU!?=n)!w45*xJ7GF z%T$XOQO7jY6h~pNFYUzthNk!Coaf=>hVP={3y`G`9281+vEHno38tJVA>_04AoT=k zUUqKBpgo8-431j1sTkGHK?Yd~SuK5SDP%%z=-6_d8*y2-WS=Rj0&8oDPJVl}#hJ!d z3BQ#RLu?=;CVoY`h+(CK5w+)&?B%Hsoh_}s-?C55HPRgm1g$kinZ+n8G zhZkJocpgqAa1FmNiIVh{#Lmiaug@_*27no>QdQh(q9zjE)pchtu^zD;Y=#-_!eSd*Fd#X)VaTAte)N}X42^Y^r$&8NrBQ{3jXFu8T^zU=Pnx_|nA znZwXuHeOxJYP~{u8kSGN?;w` z9796hY2Uc&?$rCc%Im2R?kx7+R$NBu{NKc;0X0j;dK$ z?JkR)Wide9{z45WYtgN<%ym0QjeT?FJK5sY679xF()qivXjXX$ju&yX30ba@4qVCR zArQOehU|(vK6A~;vHx#!0z%`pQa<9}$O-opc1p;hoBOF=Ar6gIOBS2^>0a21I<5Q( zvmUDob4&b8&2oVX2T|q|&amYG{$rECf$7m_hAU{>=XfZWydx_Cm2QvVjtHWK7n{Mx z_9*R@wsD@hhOa499~u3mfsD7i4yJaFzu=$`at-dptNo#QOa?nup{TOtLUO<}vK_V2J>m(DLaqpC#X@= ze5>3#vTd7UzHcZ^5lov96{YdSx;U9j8`u2gXbaBqYk%2)>NZshHX)|?gtWhk!k_^}@u)**aHyFFnFg=o zJxadE*VKW7?9d%<)!wgrA?eN1#?1ga);NsFMaZkD3 zRUeX9#`U{xT;+p%wdrQZM5MegV^bsJ6Cz|xu;GFkK#Y!im|%a6)_c=GVnYoc`O0QO zn1Q(89nok?vHw2Myid}CQ|z;}hKbYi24k7#?_Nj?E-%$4M!?H)&ig?@&AJ%tCOUQ-{R0^moV#i$3)StS^3B9)ioe+SDhk&P$xQ0 z_grRQbPZRAgX^Ux6K?kKYFoz2kM|L(ykUzs9rsWa=vo!AfTTO)%GUxuwZkNvJ3!ro zqG+xVRD7$FJfLqds3EMR|0_@&(tv%wzI|S|IBB z4Mg}>oiM$v^_q=pF_KdylKGWcFL-ZJFfi#e;a;Nx# zK44ZAHnA%ua=K`T43++NmMsa#E#YqM7K+4V+-*TVKcmEylbY(jj#PjWaSh2kidVRn zC`M1LH?yo`u(w3fqCz8y$W60_B(qz%ZGpe4E_ZJFgZ_>7YsME$xoI><-4;nHt}^YHv!5oyEKSIITtXtqt1FK=-%QAIyJ%QAP*_ zh%l`_N$M8RaCmfZ)JR*`qMSA^;-ShD*@G(x1>C{s^t`4_j}9?)SF05Z$T`t4IlcQx zp)oi66=M<@IT0bQq)b)_sI9l+wsSIaelLy%9;q?x4Sqp{ zW%Gstlt#+ClEkdNFzQn%UD#&WbQrzZtD<87-F3EA3d8`G;<&Rashyha{+hvuvtPo~#9fzDY^VVwzhwSD&%4LXvxh#SX6~?c7dw@<{RCp%vt4xEE zQH;>uMV-c4ba6nr^9lVOf$rEluX^eD8J$9+Oyjy15YwUN%}xs<@tDwFMuoKyohu_T zS=sI1LNQCF_SR!|bOrvL9iZd^^m2>j>LEml1O<;I($EbG13M~8u&X5I zMXlKsJdehlKzbrtpj|GfOz=w8DD)E^DQ75r2)OT#bJqx`e+^6M6MhvxeW*&TLQ9wg ziO-8*T@#TTYgc{2`+ z^U$oSeRbB=OFR&i?F-Xl^)#`*l)+z>c(#|2P0gNv13ptQ7iaXPb&S3;9Ur!R5IJYX zd-Pij2NnmgB1zh}{=~*!iN26U)A(6fLZ#hK@ALgwWmeb$`Po90Ip`2MOShLsEjq=~ zwcaq{NA^kuf8p7vCz3rpsNUpljUbEIGa&FPKH6=Xk}9HHYejqytyB@`g$y}Ja>~Kw z5o)FQw{JM6f2R$!kdaVCSWwG%>e^Mc4EFl%WE^cq-!C<-z3*cZWMiq-N-?=ceBSTkZTMJ z#7h>keIl=BLAjHXco~gPpW*f^(Rf=5qf_7fP=Lqk=JeF~lke|NZM+Qzy0CZlXybP> z;-o^i>KLh17{lcSnoA>qUPv+&K*o{9E6dT(q`56i!?8uFp+|d8hDBIJNO&KjD{uaR zFTtbfMZ5z({58XJ8lYkM_8CmUhZs7xsBLZV=->Dn&)CW*cCXntJIb=dvp3*l#rg7` z(-(D#?O@R;jLacTOJ2xFw-Mcu;k4D1wV>l>7i~mL*%{!Wtu~Jn-CiPuz-7(pINOTN zY$FC~O?X)a_Cy0jTYdKwt*+2szGPtBYh&2z$H*@XqO)_CHVPDv&sL+fY`t*cvwALx zYN8Gd^|>NxS44|w)6i>8P?$$RK)}i7NNnftib$YKs_BW#X5sX6a)0?Z0854nJG8Bt zU?EOT`NI|3oC}WE7_SzRZB0Z@*Ms#|%OoFLnl;s zhQRnnjA=bJieh$fS;3}J;-Gp&3veXULcci?@2=(ks%FS~-A+D2ziI6k>f@K!9Sc^z zQv-WO#2ZG4PbqJJe>=|Xd7bpkUh8tr2CZvfy@N^3X0`8)a;D4vJS&(6A&=9PO_P@} z>oZ-%nWjo*wB*h6DZ=u#Lwes?7411o;wYIwP*g z=ESDLX0`p}EMFw@^V|$=?+#(|EAH{Qw6^n%>G>Jaoa{0q*c>rta$ia2URjnduPjVGyhl_wJfB!)+smX_t)*Q66{4l_SY*^HwBdby{yB zfsP+{s7hLw2>0{=XvDd9p~tRz|CjX=`N@J@Exe|T>o6W>ggcB?C;$?htBVZ2g z)(O)z#eIi-Af%-W6@-zVHkyxoh_UVR_GP?rq>Y#<4lT72*ONj>)b|fiT6+zU*;DAj zMor6@i6R^*O?Rm`#IE3)P{*IaWV^Oy4Tm zIt3LllA|CF#xC5`NO!8B&ZDQxH9%bl!#wli-R#``kNprk23a1;Ur~%EHRHIxkVW@2 zhx=E<-v9pH3&yuNnUtDjB%)Ydh@I%unTaoCw`lvek+>z(>I0R+ZGLIw-X}&RzetO< zDg)_^n?gbF2K@IhwX`{}@|cQw>LALQmo;3(PlYxwu;ev^hWNVW{BeqHpk#P9N29Yu zpW`+C^eK33J*e_{@r6Fp+WvdR1iZe3gHg7RKFu=-m$`!hO5Rz!D)<2kg>57STJaNw zvOHJfL-t5vneJ?%PN>yy=%lWFFaKi&|B24pQj$q;Kb-x`P2sIJuCg+>)(Ly9qeS?F zd&-w@;6naO@4#sX6-0H!UkORyMrG->;L2DWYAoK z6c-m?@u6%N0&h)0j}dxS#>M{Z@Kp^L_UwNHn7l@U1+pQQ&9S`yX|V4|zz6?XS}~yY zC+y^(M$N_ouEYpjgwUV;>wg^QPkHcc#n!mW9{mFd?ytEH4+E2Ef?d%4`?me(iIfo| z0uWhsFI8L6o&WF2`{O76CJaD)KdHgG>bEla_Su~Iosd3IAvXIpv_tebTh_4GGTLea=5qCub~TM z_t!E*Xo{El)Q>CFd}d_z|ivRL-{G7*T;W9p}Gbz zzX8UeaJ0k{!%uhykXjL(!~e7)u3ZAhBH1PLKdb`iY;o}cWC-|k>yMXNCt?sxj>mof zr&|Sp=Sn_f{l}^JS>t&jDF8Utk%Ry>`~E-zmRpw^1%gApf2gpIl4wZG_-^}P%#Ik) z;nz9zY!>qJI58lFZK3sn&vDewU3}_lZXEKb!WLcRDYu@!N{J|pN_4WY_yBNif;sR0 zh=sxKoC|624bdvQ;>t0WAgF7zjoK|Ywf+gy>NHO%xiZM{WN=8(Mpvuc)}WML`k*xW z*I^^Wj#-28t5&YRlL$IL2%y!8U@Z2SNnU;SL8Dq;M*V-K#SiRF;=U~ZzNVBZu`%0% z(ysDr*?tJEuS3`#>=m$Jc3pg}7fa1ece0h7B!r+HWlsjmSFh_{lM|(3!ki2vFD_(ad4PY7@rda( z%v~guC~OEat5yodi%tPuZ&9q|*?@n6qu*NL#@fV?x3@eki}_^zh*FEAm}S6z_#3~q z8OsEXnh~QaA@)Sc7H8Z|t@n4Xi_gW%?ZJ`l8sn9QEMczTsMs#49%5&}mg1yqGtMG3 zapvEcXL#o1Z}S245%&fk8&(naZohAz!+!K;ul(pHHbWs97HSY9(+u-W1o%dPv~Yeooy-uY==8R*w<_X9u*B;dw*8BF1yAFK1 zSN^o0WjphVUHz$B&EA-`{-=v$_wt_zdI8*-zNFWqEQ%mSBVaQn1d!p6&w;PN%U_Vs zE>MTE#`vR1o}X;5&wmBNM!=9=qY&64%~#y{%{9gx*Rh1y^VV7p?R&5C$9`##pI5lE zOgmkhNAf^bWwq#@)A>JtE7Qyrh!e|&#mspHu0wOZfb4b4B_f>(se4#xl6%dR5CoLm zj0pGTrIVIF6la~^T^l1Ma51TbLNzmT+Z1#;bcTZC=p{Lh_7$hRn7!|xE?7e1?;C{9e|>J+og9L z)K)JDghC@U9?%RC8fO6IJjn^CPOe-gjDl?wa2SPwjtSbK0ShN@OWZIIz|m(wndB-U zu|D?p%}M&k<~GKTPf@%j7IyJQCb3rpJIj7TLU}rIY{N~+CVt&gRzxG1 ze~KQ7?Z{0aT&`%$!xdZJarJtg$b9?**{M38CP2og0;OQ`J)n*0F!<~l>pE$ z+r4R)pWz%|LS-G#LXJ1;rPWeJ_*%Q>k2QR^;%2<2jt#CAeN<0-qp|Epg3Buy@mLAt zB=fXn1t`^%!VJe%mS za7$xkGJJhdDCgZ(!P70J#%)3=vZH!aqY;yd&BF1YrcI}o6C?&_x+`2`Z2{heeN3^? zY58r?6ch%^?xOE_ZGTqm^yKw`dq9c`7-#f323ux>YGLAxKMoC>^};L~$5=dnfLITk^WPhO_sW?TMze63=N#&iyq)+JPMtp~f; zlGIQ&7#8(9A}EB~O*@I-{LTT!-6d#2!)N&cU)uSu9yYKS&{a@guPQ5=`#pXBLo9`J zWi7@tsh5nA**t-tNzSMY-iY!b7PB|F9(m68r3Ba3pMQ0j<~d_rbwPc}>=ol^8M*5= zb}RCD=785`b+hpNeMqoRk2IZ4`ElV7-+9V$s$%_-{d&SH$LA-pn{~5ofr3YQL_@13 z79;ztKwEfqYwzLmnpC+d`Dx2*S-tbSmjfrydXY144QWV`=WC9}GTb(o+nDRoiB!Z$ z9fyf6r-LdlqG^f+z@5MY=P+T{v*V7(b;aSD!fvZ04({cZ+n~({#DAJ-%W(<)KZP*j z3!uUCtC#;6Od#y-r9~BeRitdr_QyLF<6g=A$d&NvlYmLUBuHlL(Q&Lfkxmav@=3HX z+Dd)V)B|i!b?=1B5z@4a=y#FpaD5gn zrH^$gLBWHg?qKa`r64Br+*5mZ>{_BuI|+-L)waI$|s{@cituv8{OJzrL@P_ zwXbzGil_2Ve0vU9zX!frq;CQghRKcV(XA-OmglKWkoU( z5k~Rif2LuN`fFP%%V4L;Pv^r7;L}ySX1TkwSjYBabgNshB^&EE{De*p&(G+0rd*6z zARAPEEB)^IydtXmJ(3krLc>iNxMS-C@4~yu#^>FazuX_`-lw4{KW~nn*@&aTi#gf( zTG+0h614C8%mwb;k}?zJS5G6N{ z0?YIJCVlcW!a8YtbCq|k5vM7E*8A~N$rqZoFk=l*OGV~rD6~$3Y_lhzuN@X#e=^S# zQqm*FkqWv=j+2SBC(}4T7s)Tb*nt*E49C(q3P^TNdq}`Hr|)|T!5$?m>;n*fivx*w z8z12FEZTfA32eaFVX01)eCc}iUNsw-< zL=_WzdHym_>T6hDjL69*+fBy!kEUA)+}WQxdXq6etET=GpW*I=l;uvmpS2D6{ zm5}21mWn7{uT}2vAC98G?84FF|D_^^Vj;`m{)f2d5Kk)qrV{C;lT3@2k!$sAw4T^} zq9ji^DSnxK?%Bwzqi`gPk zrI=l?2T(@Ghho0>W~5~;97^~dQP=s-<00r|qKysjMe(1xOD{E#rh|VS<6ylI$I5&p z*5-A;t?mA;y6@s4wRpWiS8p@YZ&HdQVa9&A8(QOS+p7lirSomAA?I^P=SDOk>g!-g zK8un<^e6Yg!;&eh2Mg}&e4G-li_(qG=f^H5XJLkS2^r6<&60dan`Wl`yjdb8xJf<@ zLv=nyVP&4PoHlLy`Euh=m^hG^&QCi`3Hy<0aq831q=;~NA3RoS-k9Ypc9L{!DNULR z5Z!L_CG)H7<~#i^0HwRon=HWE%8?EV<2&RQM2-?NeDe3@2XG|j4tJL$0=b{}Hx@(7&Ao_@S! zVXfty*S`3&oX84I*Sg7a)@3P0QlWEh;iBb>c+t-X{pzn7W$9Mix=p6WXAx7kD7g_9 z@nIXk9k0j%WCporZSGxe%%wQ3YFhHMe5S4Pd^cF%Dhbdh!-B~p%Fy^n=omi<-!G5O z0H3AJMRKkFSti%b!EDK=9BWgrr}}stNSmaN`eKA!yVJhpS-v;rV0oN{atF^x6=w9L zA6U!Xk#+KTe=yqxmB=1}bDBop_d}6nSTV9Jl1-_OtEersUhcK9Yo_V^z-^m23Flqo z460?5w&=5zxB1WxBiv7Q)`|{~OAq1{>S=W3ZMg%M9p);>tbrvO$YnF+w+I8hB-~1l z#&Nh7b6I1oyDdy;!Y+FgwNqu>zM|Tw$oROYnvqix$u{+Tm7I!W zG=OX&L^!cNS?UP+0*cuAp~@$ZjlpULFd0+7qW{1`pS%I{(b@5;Su4lG5aL{XJq~6< z9&8kHz>8b*TsDo2h+T3^gUh!X*?oaqsLAN`uQt?uGbm};X@r?hef4E z%h3#6jvKKUF-Pem0;JXSyrmk^gv$@+AiF+i-i>0tkm6M`wBW!3>B@<5-I zXgfugh)M5c61E5$Z)G{usl`j`_@_|79sCYpO9Eu{1E-{kr{@ zHDHfIs77h(UuXTLisee3n!EA6h&}S6wn`oPU~&d`WKvc+9bP7%-9+Wdu$?OAp!~gG zTo``gH-)=Vq#Ijf=6nXvfRZ6O(FH<}=$F$>Zm(K5F%j(7aLssJDHHx;EjYMnG{Emv=n1=a!TCBtQ=y~oCB!== zD#@GX^In7uyq-HtODuo<>m1yAfK`LO}GhGI`+7(-Ay)v5x!5-ea#(3riVN)CWJd zs4czob#(C%Cj-y4cakTver~#ufh6ps{*Dp*t(85UWRDFX)h;k#`hz8_ z$E%A`T8`?)hQ`n1CC+4aV!OaEY@4L$B!;Gh_JEe1vo}(PmxkX=S%fj=x(er9>5}ja zqxShFr5gxO9o68)XfxTIWLi5(xHiWuGjpPMT83EHe~wDira$Wl6f;S-_!-uPy$AHT|*yC@wp8P#hOJTs&O-XgI%} zQyTTWJ8DeB@2Fo$Is+h)`GWZk&*oUA+fD3uV{*w!6tnH8d}n~1Xk>rE*yWo2OmMkT>X_UnoIh7O`5cp~ihAdbU@Ya_V!#%0dp-v41`MTIld7<06 zs|j0_cO@R_#Z%i-sT7mVJ<7N0Pg?-gH{(dL3v}hfFg+?@ft2Ky&SVJ=9 zggLLr@_PL$POR&=Pu7h(w=>QFD^KsA`vt&}6l-I2yhPz}s6E~KgGL5XMi5-F(f6fj z!D`vC!95p6ZwszO?*9ttn)9KO5QY(2ST2f>d0b8!44A!CO z`CyUAPraz;R4c&kG0HLPfmpNq(CiExl+B^Qq|q_8-Lp@Dtc9tpBV+3)`$jF-z{Qy- z+M2|snvlu26(cuBp{nC=@6^6u7d-L=x z_r;nGe(*9Fa$ghRmj<=nO8YVqL-ve8AEoaW&1v@>8m+A)UtBOCfTsvdevx3&Wqh`| zZu2#!)Im8rrfEQ~BXU@YG4hhiHxiAPgM1SEGf&o1_0tgIrc)OCVLhMZ`uLJQ`-iA6 zCn`K0-_|V|OG^p?^)#23dcrk!!#k>zhc!(uy*)xB0-MP(`%^mRR$9j{iibFs5iCSC z#kKpaQPm$_4;N`=1PU62=GLTR`?!Koli+x~`SknhXvwV0(UzF&aPMga?X`sZG6v4n zlKYz0ilV~fKH57L=}tQYxvx(Pi<+9eq$R}==|N?c=XqX_!(tFmk`vbuNDzlVvEISO zv#6_u8|n~CyJ2CaU#eWcCh5F6(YT9ga$HyawgIPMC90AjqCrUTINE+Pn`ol5dhGcj zC%&$v?gid(%boeGF3x8?0uQ|+t-0u7of+pH+s`SY?;IA=yc-3zxoqJ>fFONb&F#CL zGKCMk-@^Hlwhynpmd^OXYQtDuyvgvt+Pm^_DBreiixHZch>?Q^il_g}!PPWQkvhRuziLy0Awq$JCzo+_b-~0Z)zu)6Kj(7fY%;C7_d9M3@ z?&rGC^SsV$uw1Pxcz8i!r-4Jb&3iDJ%HdoG39cKv^yx!0ASHd^bhwnHGDsNBS!e>X z;RJXX#FRIRXwPN#gh`4I#E^`C#N2p7wjJiaR%Y+39&Aa3Vf9$EIw$AVksc!a5s%v= z&S5;?M)!G$Z7j;Y&{>f|UBq~rfBNj!e7NoPnEMm`dfKDUxf@0DM1BsAoNE)p{8gMI z^cHF7+FwS}JUKW&zbd3*RzwWH<~#YMbXkA(JBn)+XmX46v#ycY3bC2ud^GwM%V3vs zawIqopZfi^Vu>#Y4{w#UaFwO(H3wDgqm#e3z7r)xg!$UPkbEZN0bJ*IcIDHdauCHHFrxb) z5S)Fu;`BLbU4pDHOfD-F27V3bT!mC@USZFij7l$i}}-hC+=y3NMGMpkT^6{50#N2+kKUjMG1S@2+$BH*XpfVo-1 z&s#JtOj}W+tG53jYFT;cNGN+C1aVfi;PvX62STB*_KoeMAi>_XAowi0zRR1lJyW*gUyZtec({h|pu3DNY$PUi#`j{DmaMFO^=rtNHWnS;^Bvxksb$ zE&RJ;mT7MnB?TN$#Y4b_Xxx=K$00cn+mq};K3 zwMkX~_|O5g8m5>g>+7mfCAexf#rYfh))4jj2K0+-Oz&0WMqRX9-ax-Pab%I~Ii=rb zdabPECzqnkIZD~c5516@5nr|}5iAyZoYbs#eBd;B4+Lt{wrS>h^mSlnjHT(@om3~% z>-m>J`F~xm6o>e$idWkcCL{tz{Q7HcrLeTE_@|P2_yZ!CMGQT}Pu||DID(eD= z{*p60$mE$_Z*Kfo>j{tb{vh*5>8E@YAFat+x7H~fKIs9~%8<{FJEOikH!47+ah9JZ7~H2)-x9~>Rp`iVXa#i4ME=&eh%-2Kqw!mhc+4*5p7*&zKJF*RjFQ*F zqD7}}c{TxXoPEzDpt3sujoqWN5M2;-f(GnTxQ47@NT6dj$pP#818!^UDEQGH;P;Z< zDb@Uh09Un==MeeU(X$id8o&W8pP|G%5IIjrFUGkQFZv}7&Rlkq`jr;;vBs=EZyPz967q29_03heHGNERz!~4@`hrU*WSL1EFBDJRk<<5N@$9}piH3ci$ z;{c2ZkXFNhhKaq?o7t;8MUtp-g-emqk79rjZ8Di4nNvAJU9!e+@YIqshShX8;f8XJ zPfC*tx;rlEDHV<&B1#3WfMT9|16^j)+S2;6PL1B*cy2gdnxwbun|HR7ib&Clcum?YsXQjNfXxTSSUZLr!8*26?ZnIS@D%I*jBegq?9*ne8UNqij z;ejPz>u3Jpn)~>pk$H>lo0aJUi$K1eB+06bPmjpn_aA6K_?plFB3(sS3|my zKm6%+e<2+4k8>0Y1;`H|ZT}^;LA$exuS9z;R>=fss!Xmj^b@=G+bK&6wb|4uX zx))=sq1Xv2fH?oiWi-~TNW~6|(x3sL7WP^Yw2B^%lpp6YhSDV5F@Y#U*#&9rSdcf@ z)ti7aYx`}SRuZeqN0l%*+O&6GoWm35B?Xm&o)ow~H4ZXv?7O?CkHFiv(JC>AGhKU7 z5=xBn)Jzj(^VL#joTIa&wVoyb(&X7Y-C6`tTJr$2wh~0+n~)ThLm~SVuq${Ou%&X% z*yHMmtC})UbMun)Bc=UWS_}YGbO)2RW~Hz&EGstKK~rME$F(#U1&xM8iFUA3>8e`L zHvwvayOX=e&UAZpbVK^5u-vNRc$b5xw&5^UJ9D`5HJm6v$4C{U2 zsYYkZs5ihx#huBGsCkOnBGAgbj!KQ!UF8pP(>S%ua!((FUvHR}ph^2nwQ}8rgPbxE zEM)@k^C9#)K{ZIv{qk{ZMg3fC&uK}_RptUdc}jGK93j#+qywtav61hLo`~>MC%gn` z#m`**kCh@%?z4A4xvsV#LJt$1GtaPjC+;;4+;^UNPlTqYG6qq%o%L{Z&$=dPcs&-A zoDDeKuO4Xc7Wh+3i!wJm&gNhB|8(i_-TZ{=zL)zGKZe4ft>4$7#d+hKk;^w_ccA+t zVIWFZ`V6mu4faT>3?Jgikc-m*dRwO}jAntm%a>CRSrOf_?ZCx%)&G0uo*1Zw+e#n{ zV|OlNZr{Ob$9XO5`o?j3Oi|p_Nw#>{S-Zv=H}V%#b|SSJnI&@c+;LNe0n0)^fv#2l zx3`a!YuWC~{X#xHxIKSB@ZoW?jNSP9ym8=NeKk;Ue)cS~>H#z)Tsj#!YL8E z)&Q8`I@9TOn7!I!ph)&9FEa2Dx5~j~>lZjK?V{i45ReEiRth1YP249qo;!>il@tfg z*RWE%tQt0R$2nX-mgOvUBw@5mvdz5w#5m*?G*OhKGNYvW`pS96wG!O}-6^3dnH=@h z`MZ42pkP}(%r{AVEud#72|O#VbF40*a!%=O(E$cGZ2DBSzVju;Q1mpTimLRs(z(v! z;~!j1-EIQ;3Ds>t4ImvG=b2}sq8=<5k!j|l zm}fbW#G!Y4dp3gz6VX-A8r{z0gx}*H^9tsQ7w_7&D4#a5WAX1h5|(w65rF9{WAf1k zUA+88r>OZL?XyYd1wjFN?nY|?7VmZpn|rgT7tPh&mD3Mo-gD?~`cpFwZ{b^C+~Rn+ z(7zB^**aR`qPC!KkWCm5_>59t(OH*q3EewMDQVRXEpMQP^O}iM7Yu9PNs1Z_N*FqZ zb8~=l1HYM=3l< zuepu}bK5R>>B2^=nsD25v*#z+i(a`p#|7r3>Hfge5&Qrv6vsCG@HX2 zLnL@uB)*L@q+y*6ywJ-ABd0;$rP<)N-$Uk#g1_UmKqt3g)(>%oyLVb3RuFe66x#Xi zo|?Ic7?uy4a`5iDs)6b~2hEQ&;#=BTb&ES%Ys7s*XNK0|#6c7NT~R~fdE&e;`Eg3A7kPL@bEVtVv8LxrD? zuxzyJRkHOS8n@;&Aw&**B8r6PdrZghNl{8N@%B{0G)I=i`~VkOl_fN zsf|t5bBdmtQO;rO@nJX(A@En!rkUjl+0}dykUNlWsLkRPoCHQlKyTw#*}?6TdaeHS3S3$640s>>1*0id zYxL25dh*!CjZLHox;)dq3Vls`!d`UX;hkWhFk{KxAG0udlH*&evw(8G_G{%}eH*G< zSfZAx@P)acsbVRPCbfMphM5T!9K}Rx>IY2F*b>j~=ls=NpA?@|miLdDE{1QhO zv}sZy|1SIcvPhR{ua7i)<(QnZQ%#duwFk74bht#MVjIwzkdNsi9!g~UYgD%GqJAYX zgOTob`eKll0G-?G17VB{h{Nc?bEVGrqEHy3n%kc8S%>fwlvY+RAKd3$)i?rm@3O)U zx~8KX+yp8X<)-<5=RBO*DYhCe)#wJ7$Nb_!9`lo{GNEn;n4CU-;;86M-q;TEy5G@F zszyELfvhXW<03sX8}ZP)->f@9!T0QR`3+=f>pE)7yT9T1YK3>_8CugDAx46TZmlRB z*58ItrBU^J7v$=Cl<;W@VdBuxnjlM?#s#n|N#iNckq{m^_lg^m!se)vuhTYn^W`dY zQItR{emftW?+IH;ux)4Zpt?w(;7}M^WZ#MCKgvTu23eO@mvy}lSwhBCtp`5;B4 z+DeHTDTV*S-07*cYI%-3I5bFIQ=~{6OUWvA)5@wiE|_$s$6>!%l-elQc+L(MbL&veAiE^L!!>B73H zxz8AJ0^yI{%RPitE>_|qM2njB<1K@ElPeE06&@^^g$z7@$|+nvG5}yYh^rnwkq|^R za)Dx22Xa9;^Y*F?B5u#xKtZjsZ+Q~fw#XZpUBAC7sVVQHvEXu6Dl%neGMdM-oDq>pA@|g_!!FQBAwC$ELY;cR{{NljPXKopob_`&GuXV%(8ZJ zYfw{$GvH&?3J)0<%(f$)uY=n9twbyKi^_zPxLkjh1p}h6s3iwnxZ>_LQ%#OXWC9awY3pbRl{OB zm}`-$DVmA=Y9SzPP%vhBxNwwS30x>bGU;_CYG&`v(4fsvCqUyNMg1&g{R=vBr@NDy zE@!vOv8#vzw?Yoy6ATc@L+FQMO&d*%w9^bNtsow5c7jz97G(iDiMZfVvHE)gbnIgF zN>@Dq$&-0uU|}oERyWE1{p45r8POhPLE1hI)bg#Sxfo6S@`$e71u1F)A;R5$fe>LKHG_%XBm7c{+CxnBHjPAOo zs*)XL9%yxSw=Bt6e5rNF=3z)(2N6Sd zC{Z&(CK5*{$5vgkb5U_5p}FL~N1>Gujb1JGyfk<}<4Pa9=g@Q$b64Cv)rLv*`1b8u znFVOS@Ybci$oJj>2Wv^_W+Wu>_!0JT@JFxiJG6}aNdxzj6kY*#ldh9O<$r2V$~efW z-QuoG`0@J7C(0?m3I3fvDb*xP=y9DY6XN&Ug_FQ5#O9hEab@#M@U+JBv~-KGEnJn5 zd){Ro1oJ%DG&yl)=E_ zy7J}O%s!(eryrY<;neO<;K=(&@r`W03>96Ei}mK0qgX^aR++Oa{0vDYT{Qk z0e(0RsIQ7qG~&H%Dty*D0>fs9i?h7J*y592Uyms)gix1X{XAB}KD7)?Aoa-?&r`5t z&k(+jbQrf^RD$9aia`E4;;g{mm<2J4)%9wT$xE?Du{}g%%N_A59$Zc77K(zMRb&Zo zd7jsw+A#tCJGXC1al>zQR{q5w*ZBvL`SS?$i__es0`eI7oMe%_|MQ>1FaF4CI~itX z|9FA^evbctKFm{77S8GRADMb_TnqmrH$yX7#-GsSU+RcoY>p|fsMA2k#mW>r?}t+1 zCx9=P;BVP4rfwyTNc=-cFF!V+h0W)hTp0`er{R9hjWZW{CxN<6Lin$_{e2Jm^ViF@ z6p;=89p(3=?XNZWpNPw2g&$&0I)JLu4UW;_``4-ZbqKgf@#W$d#9hfwf;!_RH1PcM zhj+%YL<4QV-fKHi{NqH2Z!;=ttB1N&|A~xWqHP4Qt08fVH9~r^_P^cyk6)V5Arny8 ze5n4&&O0wAJ)4SGKe1)F;J0?by|vKaTgQ<9uUP$Ud9Lz^+O~uhzkJ**F~O9`rt|KA zmn-}Qs1d+>kbYnmX`MfH4u5;DFWYW`L@9rCrc6kC{8FNQ3?}V4bLCCXIO@k<@J_`R z5JrQ5>c>++ML7tcdddD>S(bOI`acb|sY-$C!Pd-Vrp>%L{oq<&>PN=gIL8#lIOvR> z6yV?aw$5YvDIid|EJB!nIqh>Kc%yC8+ZSsn}B>LVZJ6$=5O+hzh5@+P&>0y zbZOsAE3D@JTM+)`&vXso5esm8WARTH{Bt}-pse%1qx}1N|8HFW+^7H7n)+{C{_}B} aIVPuaDU +* **New:** [`moose query` CLI command](#moose-query-cli-command) for SQL exploration and code generation +* **New:** [ClickHouse cluster support](#clickhouse-cluster-support) for distributed deployments +* **New:** [Database performance metrics visualization](#database-performance-metrics-visualization) in Boreal project overview + + +## `moose query` CLI command + +Execute SQL queries against your ClickHouse database directly from the command line, with built-in validation, formatting, and code generation. + +**Why it matters:** When building data pipelines, you often need to test SQL queries before adding them to your code. Previously, this meant switching between ClickHouse clients, manually escaping strings, and copying queries back and forth. The `moose query` command streamlines this workflow by letting you validate queries, see results immediately, and generate properly-formatted code ready to paste into your Python or TypeScript application. + + +CLI based querying (automatically set up to query your development ClickHouse instance): +```bash +# Execute a query and see JSON results +moose query "SELECT user_id, COUNT(*) as events FROM user_events GROUP BY user_id LIMIT 10" +{"user_id": "abc-1213", "count": 37} +``` + +Code generation for using the query in your code: +```bash +# Validate and format complex SQL for Python code +moose query --format-query python --prettify < Date: Sun, 23 Nov 2025 19:19:44 -0700 Subject: [PATCH 42/59] icebergs3 engine (#2978) > [!NOTE] > Add end-to-end IcebergS3 engine support (Rust CLI, TS/Py SDKs, generators, and docs) with runtime cred resolution, parsing/serialization, DDL, and tests. > > - **OLAP Engine (Rust/CLI)** > - Add `ClickhouseEngine::IcebergS3` with parsing (`Iceberg(...)`), display/proto serialization (mask creds, omit in proto), DDL generation, and non-alterable params hashing. > - Resolve AWS creds at runtime for `IcebergS3` in `InfrastructureMap::resolve_s3_credentials_from_env`, recalculating `engine_params_hash`. > - Extend partial infra map to accept `IcebergS3` config. > - **Code Generation** > - TS/Py generators emit IcebergS3 engine blocks in generated configs. > - **SDKs/Libraries** > - Python: introduce `IcebergS3Engine`; update validators to forbid ORDER BY/PARTITION BY/SAMPLE BY; wire into internal JSON export. > - TypeScript: add `ClickHouseEngines.IcebergS3` and `IcebergS3Config`; include in infra serialization. > - **Docs** > - Add IcebergS3 usage examples (TS/Py) and notes (read-only, supported formats) in modeling tables docs. > - **Tests** > - Add comprehensive Rust, Python, and TypeScript tests for IcebergS3 parsing/serialization, hashing, validation, and config export. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit a21847af4e0549323912e6c193282fb9fc500a8c. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/framework/core/infrastructure_map.rs | 93 ++-- .../core/partial_infrastructure_map.rs | 24 + .../src/framework/python/generate.rs | 21 + .../src/framework/typescript/generate.rs | 24 +- .../infrastructure/olap/clickhouse/queries.rs | 513 ++++++++++++++++++ .../content/moosestack/olap/model-table.mdx | 43 ++ .../src/pages/moose/olap/model-table.mdx | 42 ++ packages/py-moose-lib/moose_lib/__init__.py | 1 + packages/py-moose-lib/moose_lib/blocks.py | 55 ++ .../py-moose-lib/moose_lib/dmv2/olap_table.py | 9 +- packages/py-moose-lib/moose_lib/internal.py | 25 +- .../py-moose-lib/tests/test_iceberg_config.py | 103 ++++ packages/ts-moose-lib/src/blocks/helpers.ts | 1 + packages/ts-moose-lib/src/dmv2/internal.ts | 37 +- .../ts-moose-lib/src/dmv2/sdk/olapTable.ts | 44 +- .../ts-moose-lib/tests/iceberg-config.test.ts | 56 ++ 16 files changed, 1034 insertions(+), 57 deletions(-) create mode 100644 packages/py-moose-lib/tests/test_iceberg_config.py create mode 100644 packages/ts-moose-lib/tests/iceberg-config.test.ts diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index d1e5670839..10811884b9 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -2195,68 +2195,65 @@ impl InfrastructureMap { for table in self.tables.values_mut() { let mut should_recalc_hash = false; + // Helper closure to resolve AWS credentials for S3-based engines + let resolve_aws_credentials = |access_key: &mut Option, + secret_key: &mut Option, + engine_name: &str| + -> Result<(), String> { + let resolved_access_key = resolve_optional_runtime_env(access_key).map_err( + |e| { + format!( + "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", + table.name, e + ) + }, + )?; + + let resolved_secret_key = resolve_optional_runtime_env(secret_key).map_err( + |e| { + format!( + "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", + table.name, e + ) + }, + )?; + + *access_key = resolved_access_key; + *secret_key = resolved_secret_key; + + log::debug!( + "Resolved {} credentials for table '{}' at runtime", + engine_name, + table.name + ); + + Ok(()) + }; + match &mut table.engine { ClickhouseEngine::S3Queue { aws_access_key_id, aws_secret_access_key, .. } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) - .map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", - table.name, e - ) - })?; - - let resolved_secret_key = - resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", - table.name, e - ) - })?; - - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "S3Queue")?; should_recalc_hash = true; - - log::debug!( - "Resolved S3Queue credentials for table '{}' at runtime", - table.name - ); } ClickhouseEngine::S3 { aws_access_key_id, aws_secret_access_key, .. } => { - // Resolve environment variable markers for AWS credentials - let resolved_access_key = resolve_optional_runtime_env(aws_access_key_id) - .map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsAccessKeyId': {}", - table.name, e - ) - })?; - - let resolved_secret_key = - resolve_optional_runtime_env(aws_secret_access_key).map_err(|e| { - format!( - "Failed to resolve runtime environment variable for table '{}' field 'awsSecretAccessKey': {}", - table.name, e - ) - })?; - - *aws_access_key_id = resolved_access_key; - *aws_secret_access_key = resolved_secret_key; + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "S3")?; + should_recalc_hash = true; + } + ClickhouseEngine::IcebergS3 { + aws_access_key_id, + aws_secret_access_key, + .. + } => { + resolve_aws_credentials(aws_access_key_id, aws_secret_access_key, "IcebergS3")?; should_recalc_hash = true; - - log::debug!( - "Resolved S3 credentials for table '{}' at runtime", - table.name - ); } _ => { // No credentials to resolve for other engine types diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index c2937c5400..29f05d4343 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -156,6 +156,16 @@ struct DistributedConfig { policy_name: Option, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +struct IcebergS3Config { + path: String, + format: String, + aws_access_key_id: Option, + aws_secret_access_key: Option, + compression: Option, +} + #[derive(Debug, Deserialize)] #[serde(tag = "engine", rename_all = "camelCase")] enum EngineConfig { @@ -228,6 +238,9 @@ enum EngineConfig { #[serde(rename = "Distributed")] Distributed(Box), + + #[serde(rename = "IcebergS3")] + IcebergS3(Box), } #[derive(Debug, Deserialize)] @@ -853,6 +866,17 @@ impl PartialInfrastructureMap { policy_name: config.policy_name.clone(), }), + Some(EngineConfig::IcebergS3(config)) => { + // Keep environment variable markers as-is - credentials will be resolved at runtime + Ok(ClickhouseEngine::IcebergS3 { + path: config.path.clone(), + format: config.format.clone(), + aws_access_key_id: config.aws_access_key_id.clone(), + aws_secret_access_key: config.aws_secret_access_key.clone(), + compression: config.compression.clone(), + }) + } + None => Ok(ClickhouseEngine::MergeTree), } } diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index 312cd88f86..a8be4e26d3 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -947,6 +947,27 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri } writeln!(output, " ),").unwrap(); } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + writeln!(output, " engine=IcebergS3Engine(").unwrap(); + writeln!(output, " path={:?},", path).unwrap(); + writeln!(output, " format={:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " aws_access_key_id={:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " aws_secret_access_key={:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression={:?},", comp).unwrap(); + } + writeln!(output, " ),").unwrap(); + } } if let Some(version) = &table.version { writeln!(output, " version={:?},", version).unwrap(); diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index ba37846f47..cffd6cae21 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -826,9 +826,29 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> if let Some(key) = sharding_key { writeln!(output, " shardingKey: {:?},", key).unwrap(); } - if let Some(policy) = policy_name { - writeln!(output, " policyName: {:?},", policy).unwrap(); + if let Some(policy) = policy_name { + writeln!(output, " policyName: {:?},", policy).unwrap(); + } } + crate::infrastructure::olap::clickhouse::queries::ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + writeln!(output, " engine: ClickHouseEngines.IcebergS3,").unwrap(); + writeln!(output, " path: {:?},", path).unwrap(); + writeln!(output, " format: {:?},", format).unwrap(); + if let Some(key_id) = aws_access_key_id { + writeln!(output, " awsAccessKeyId: {:?},", key_id).unwrap(); + } + if let Some(secret) = aws_secret_access_key { + writeln!(output, " awsSecretAccessKey: {:?},", secret).unwrap(); + } + if let Some(comp) = compression { + writeln!(output, " compression: {:?},", comp).unwrap(); + } } } if let Some(version) = &table.version { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 19bcd760c7..154fc53b57 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -322,6 +322,18 @@ pub enum ClickhouseEngine { // Optional policy name policy_name: Option, }, + IcebergS3 { + // S3 path to Iceberg table root + path: String, + // Data format (Parquet or ORC) + format: String, + // AWS access key ID (optional, None for NOSIGN) + aws_access_key_id: Option, + // AWS secret access key (optional) + aws_secret_access_key: Option, + // Compression type (optional: gzip, zstd, etc.) + compression: Option, + }, } // The implementation is not symetric between TryFrom and Into so we @@ -411,6 +423,19 @@ impl Into for ClickhouseEngine { &sharding_key, &policy_name, ), + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => Self::serialize_icebergs3_for_display( + &path, + &format, + &aws_access_key_id, + &aws_secret_access_key, + &compression, + ), // this might sound obvious, but when you edit this function // please check if you have changed the parsing side (try_from) as well // especially if you're an LLM @@ -792,6 +817,7 @@ impl ClickhouseEngine { s if s.starts_with("S3(") => Self::parse_regular_s3(s, value), s if s.starts_with("Buffer(") => Self::parse_regular_buffer(s, value), s if s.starts_with("Distributed(") => Self::parse_regular_distributed(s, value), + s if s.starts_with("Iceberg(") => Self::parse_regular_icebergs3(s, value), _ => Err(value), } } @@ -838,6 +864,104 @@ impl ClickhouseEngine { } } + /// Parse regular Iceberg with parameters + fn parse_regular_icebergs3<'a>( + engine_name: &str, + original_value: &'a str, + ) -> Result { + if let Some(content) = engine_name + .strip_prefix("Iceberg(") + .and_then(|s| s.strip_suffix(")")) + { + Self::parse_icebergs3(content).map_err(|_| original_value) + } else { + Err(original_value) + } + } + + /// Parse Iceberg engine content + /// Format: Iceberg('path', [NOSIGN | 'key', 'secret'], 'format'[, 'compression']) + /// or simplified: Iceberg('path', 'format'[, 'compression']) + fn parse_icebergs3(content: &str) -> Result { + let parts = parse_quoted_csv(content); + + if parts.len() < 2 { + return Err("Iceberg requires at least path and format".to_string()); + } + + let path = parts[0].clone(); + + // Parse authentication and format based on ClickHouse IcebergS3 syntax: + // ENGINE = IcebergS3(url, [, NOSIGN | access_key_id, secret_access_key, [session_token]], format, [,compression]) + // + // Possible patterns: + // 1. Iceberg('path', 'format') - no auth + // 2. Iceberg('path', 'format', 'compression') - no auth with compression + // 3. Iceberg('path', NOSIGN, 'format') - explicit NOSIGN + // 4. Iceberg('path', 'access_key_id', 'secret_access_key', 'format') - with credentials + // 5. Iceberg('path', 'access_key_id', 'secret_access_key', 'format', 'compression') - with credentials and compression + let (format, aws_access_key_id, aws_secret_access_key, extra_params_start) = if parts.len() + >= 2 + && parts[1].to_uppercase() == "NOSIGN" + { + // NOSIGN keyword (no authentication) - format is at position 2 + if parts.len() < 3 { + return Err("Iceberg with NOSIGN requires format parameter".to_string()); + } + (parts[2].clone(), None, None, 3) + } else if parts.len() >= 2 { + let format_at_pos1 = parts[1].to_uppercase(); + let is_pos1_format = format_at_pos1 == "PARQUET" || format_at_pos1 == "ORC"; + + if is_pos1_format { + // Format is at position 1, no credentials + (parts[1].clone(), None, None, 2) + } else if parts.len() >= 4 && !parts[1].is_empty() && !parts[2].is_empty() { + // Check if parts[3] is a format (credentials case) + let format_at_pos3 = parts[3].to_uppercase(); + if format_at_pos3 == "PARQUET" || format_at_pos3 == "ORC" { + // parts[1] and parts[2] are credentials, format at position 3 + ( + parts[3].clone(), + Some(parts[1].clone()), + Some(parts[2].clone()), + 4, + ) + } else { + // Ambiguous case - neither pos1 nor pos3 is a valid format + return Err(format!( + "Invalid Iceberg format. Expected 'Parquet' or 'ORC' at position 2 or 4, got '{}' and '{}'", + parts[1], parts[3] + )); + } + } else { + // Not enough parts for credentials, but parts[1] is not a valid format + return Err(format!( + "Invalid Iceberg format '{}'. Must be 'Parquet' or 'ORC'", + parts[1] + )); + } + } else { + return Err("Iceberg requires at least path and format parameters".to_string()); + }; + + // Parse optional compression (next parameter after format) + let compression = if parts.len() > extra_params_start && parts[extra_params_start] != "null" + { + Some(parts[extra_params_start].clone()) + } else { + None + }; + + Ok(ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + }) + } + /// Parse regular SummingMergeTree with parameters fn parse_regular_summing_merge_tree<'a>( engine_name: &str, @@ -1117,6 +1241,16 @@ impl ClickhouseEngine { sharding_key, policy_name, ), + ClickhouseEngine::IcebergS3 { + path, + format, + compression, + .. // Omit credentials for protobuf + } => Self::serialize_icebergs3( + path, + format, + compression, + ), } } @@ -1393,6 +1527,55 @@ impl ClickhouseEngine { result } + /// Serialize Iceberg engine to string format for display (with masked credentials) + /// Format: Iceberg('url', [NOSIGN | 'access_key_id', 'secret_access_key'], 'format'[, 'compression']) + fn serialize_icebergs3_for_display( + path: &str, + format: &str, + aws_access_key_id: &Option, + aws_secret_access_key: &Option, + compression: &Option, + ) -> String { + let mut result = format!("Iceberg('{}'", path); + + // Add authentication info for display - uses shared masking logic + match (aws_access_key_id, aws_secret_access_key) { + (Some(key_id), Some(secret)) => { + let masked_secret = Self::mask_secret(secret); + result.push_str(&format!(", '{}', '{}'", key_id, masked_secret)); + } + _ => { + // No credentials provided - using NOSIGN for public buckets or IAM roles + result.push_str(", NOSIGN"); + } + } + + // Add format + result.push_str(&format!(", '{}'", format)); + + // Add compression if present + if let Some(comp) = compression { + result.push_str(&format!(", '{}'", comp)); + } + + result.push(')'); + result + } + + /// Serialize Iceberg engine to string format for proto storage (without credentials) + /// Format: Iceberg('url', 'format'[, 'compression']) + fn serialize_icebergs3(path: &str, format: &str, compression: &Option) -> String { + let mut result = format!("Iceberg('{}', '{}'", path, format); + + // Add compression if present + if let Some(comp) = compression { + result.push_str(&format!(", '{}'", comp)); + } + + result.push(')'); + result + } + /// Serialize Distributed engine to string format /// Format: Distributed('cluster', 'database', 'table'[, sharding_key][, 'policy_name']) fn serialize_distributed( @@ -2019,6 +2202,36 @@ impl ClickhouseEngine { hasher.update("null".as_bytes()); } } + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + hasher.update("Iceberg".as_bytes()); + hasher.update(path.as_bytes()); + hasher.update(format.as_bytes()); + + // Hash credentials (consistent with S3 and S3Queue engines) + if let Some(key_id) = aws_access_key_id { + hasher.update(key_id.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + if let Some(secret) = aws_secret_access_key { + hasher.update(secret.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + + // Hash optional parameters + if let Some(comp) = compression { + hasher.update(comp.as_bytes()); + } else { + hasher.update("null".as_bytes()); + } + } } format!("{:x}", hasher.finalize()) @@ -2450,6 +2663,31 @@ pub fn create_table_query( format!("Distributed({})", engine_parts.join(", ")) } + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + let mut engine_parts = vec![format!("'{}'", path)]; + + // Handle credentials using shared helper (same as S3Queue) + engine_parts.extend(ClickhouseEngine::format_s3_credentials_for_ddl( + aws_access_key_id, + aws_secret_access_key, + )); + + // Add format + engine_parts.push(format!("'{}'", format)); + + // Add optional compression + if let Some(comp) = compression { + engine_parts.push(format!("'{}'", comp)); + } + + format!("Iceberg({})", engine_parts.join(", ")) + } }; // Format settings from table.table_settings @@ -5367,4 +5605,279 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; _ => panic!("Expected Distributed engine"), } } + + #[test] + fn test_icebergs3_hash_consistency() { + // Test that identical engines produce identical hashes + let engine1 = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + + let engine2 = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + + let hash1 = engine1.non_alterable_params_hash(); + let hash2 = engine2.non_alterable_params_hash(); + assert_eq!(hash1, hash2); + assert_eq!(hash1.len(), 64); // SHA256 hex string + + // Test that credential changes produce different hashes + let engine_diff_key = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIADIFFERENT".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + let hash_diff_key = engine_diff_key.non_alterable_params_hash(); + assert_ne!( + hash1, hash_diff_key, + "Different access keys should produce different hashes" + ); + + // Test that path changes produce different hashes + let engine_diff_path = ClickhouseEngine::IcebergS3 { + path: "s3://different-bucket/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: Some("gzip".to_string()), + }; + let hash_diff_path = engine_diff_path.non_alterable_params_hash(); + assert_ne!( + hash1, hash_diff_path, + "Different paths should produce different hashes" + ); + + // Test that compression changes produce different hashes + let engine_no_compression = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey".to_string()), + compression: None, + }; + let hash_no_compression = engine_no_compression.non_alterable_params_hash(); + assert_ne!( + hash1, hash_no_compression, + "Different compression should produce different hashes" + ); + + // Test that IcebergS3 hash differs from other engines + let merge_tree = ClickhouseEngine::MergeTree; + assert_ne!(hash1, merge_tree.non_alterable_params_hash()); + } + + #[test] + fn test_icebergs3_display() { + // Test display with credentials + let engine_with_creds = ClickhouseEngine::IcebergS3 { + path: "s3://bucket/warehouse/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("AKIATEST".to_string()), + aws_secret_access_key: Some("secretkey123".to_string()), + compression: Some("gzip".to_string()), + }; + + let display: String = engine_with_creds.clone().into(); + assert!(display.contains("Iceberg")); + assert!(display.contains("s3://bucket/warehouse/table/")); + assert!(display.contains("AKIATEST")); + assert!(display.contains("secr...y123")); // Masked secret (first 4 + ... + last 4) + assert!(display.contains("Parquet")); + assert!(display.contains("gzip")); + + // Test display with NOSIGN + let engine_nosign = ClickhouseEngine::IcebergS3 { + path: "s3://public-bucket/table/".to_string(), + format: "ORC".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: None, + }; + + let display_nosign: String = engine_nosign.into(); + assert!(display_nosign.contains("Iceberg")); + assert!(display_nosign.contains("NOSIGN")); + assert!(display_nosign.contains("ORC")); + } + + #[test] + fn test_icebergs3_protobuf_serialization() { + // Test with credentials (should be excluded from proto) + let engine_with_creds = ClickhouseEngine::IcebergS3 { + path: "s3://bucket/table/".to_string(), + format: "Parquet".to_string(), + aws_access_key_id: Some("key".to_string()), + aws_secret_access_key: Some("secret".to_string()), + compression: None, + }; + + let proto = engine_with_creds.to_proto_string(); + assert!(!proto.contains("key")); // Credentials excluded for security + assert!(!proto.contains("secret")); + assert!(proto.contains("s3://bucket/table/")); + assert!(proto.contains("Parquet")); + + // Test with compression (should be included in proto) + let engine_with_compression = ClickhouseEngine::IcebergS3 { + path: "s3://test-bucket/warehouse/events/".to_string(), + format: "ORC".to_string(), + aws_access_key_id: None, + aws_secret_access_key: None, + compression: Some("gzip".to_string()), + }; + + let proto_with_compression = engine_with_compression.to_proto_string(); + assert!(proto_with_compression.contains("s3://test-bucket/warehouse/events/")); + assert!(proto_with_compression.contains("ORC")); + assert!(proto_with_compression.contains("gzip")); // Compression IS included + } + + #[test] + fn test_icebergs3_parsing() { + // Test 1: Simple format without credentials or compression + let simple = "Iceberg('s3://bucket/table/', 'Parquet')"; + let engine = ClickhouseEngine::try_from(simple).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + assert_eq!(compression, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 2: With credentials (should be parsed now) + let with_creds = "Iceberg('s3://bucket/table/', 'AKIATEST', '[HIDDEN]', 'Parquet')"; + let engine = ClickhouseEngine::try_from(with_creds).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, Some("AKIATEST".to_string())); + assert_eq!(aws_secret_access_key, Some("[HIDDEN]".to_string())); + assert_eq!(compression, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 3: With compression but no credentials - format at position 1 + let with_compression = "Iceberg('s3://bucket/table/', 'ORC', 'gzip')"; + let engine = ClickhouseEngine::try_from(with_compression).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + compression, + aws_access_key_id, + aws_secret_access_key, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "ORC"); + assert_eq!(compression, Some("gzip".to_string())); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 4: Edge case - format name at position 1 with extra params (bug from bot review) + // This tests that we correctly identify format at position 1, not confuse it with credentials + let format_first = + "Iceberg('s3://bucket/table/', 'Parquet', 'extra_param', 'another_param')"; + let engine = ClickhouseEngine::try_from(format_first).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + // extra_param is treated as compression since it's at position 2 (extra_params_start) + assert_eq!(compression, Some("extra_param".to_string())); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 5: With NOSIGN + let with_nosign = "Iceberg('s3://public-bucket/table/', NOSIGN, 'Parquet')"; + let engine = ClickhouseEngine::try_from(with_nosign).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + .. + } => { + assert_eq!(path, "s3://public-bucket/table/"); + assert_eq!(format, "Parquet"); + assert_eq!(aws_access_key_id, None); + assert_eq!(aws_secret_access_key, None); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 6: With credentials AND compression + let full_config = "Iceberg('s3://bucket/table/', 'AKIATEST', 'secret', 'ORC', 'zstd')"; + let engine = ClickhouseEngine::try_from(full_config).unwrap(); + match engine { + ClickhouseEngine::IcebergS3 { + path, + format, + aws_access_key_id, + aws_secret_access_key, + compression, + } => { + assert_eq!(path, "s3://bucket/table/"); + assert_eq!(format, "ORC"); + assert_eq!(aws_access_key_id, Some("AKIATEST".to_string())); + assert_eq!(aws_secret_access_key, Some("secret".to_string())); + assert_eq!(compression, Some("zstd".to_string())); + } + _ => panic!("Expected IcebergS3 engine"), + } + + // Test 7: Invalid format in ambiguous case - should return error + let invalid_format = "Iceberg('s3://bucket/table/', 'InvalidFormat', 'something', 'else')"; + let result = ClickhouseEngine::try_from(invalid_format); + assert!( + result.is_err(), + "Should reject invalid format 'InvalidFormat'" + ); + + // Test 8: Another invalid format edge case + let another_invalid = "Iceberg('s3://bucket/table/', 'BadFormat', 'test')"; + let result2 = ClickhouseEngine::try_from(another_invalid); + assert!(result2.is_err(), "Should reject invalid format 'BadFormat'"); + } } diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index b0f98c4c28..abd8c4df08 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -874,6 +874,49 @@ public_s3 = OlapTable[DataRecord]("public_s3", OlapConfig( Both engines support the same credential management and format options. +#### IcebergS3 +The `IcebergS3` engine provides read-only access to Iceberg tables stored in S3: + + + +```ts filename="IcebergTable.ts" copy +import { OlapTable, ClickHouseEngines, mooseRuntimeEnv } from '@514labs/moose-lib'; + +// Iceberg table with AWS credentials (recommended with mooseRuntimeEnv) +export const icebergEvents = new OlapTable("iceberg_events", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://my-bucket/warehouse/db/table/", + format: "Parquet", // or "ORC" + awsAccessKeyId: mooseRuntimeEnv.get("AWS_ACCESS_KEY_ID"), + awsSecretAccessKey: mooseRuntimeEnv.get("AWS_SECRET_ACCESS_KEY"), +}); +``` + + +```py filename="IcebergTable.py" copy +from moose_lib import OlapTable, OlapConfig, moose_runtime_env +from moose_lib.blocks import IcebergS3Engine + +# Iceberg table with AWS credentials (recommended with moose_runtime_env) +iceberg_events = OlapTable[Event]("iceberg_events", OlapConfig( + engine=IcebergS3Engine( + path="s3://my-bucket/warehouse/db/table/", + format="Parquet", # or "ORC" + aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"), + ) +)) +``` + + + + +- IcebergS3 tables are **read-only** and provide access to the latest state of your Iceberg table +- `orderByFields`, `orderByExpression`, `partitionBy`, and `sampleByExpression` are not supported +- The table automatically reflects the current state of the Iceberg table in S3 +- Supported formats: **Parquet** and **ORC** only + + #### In-Memory Buffer (`Buffer`) The `Buffer` engine provides an in-memory buffer that flushes data to a destination table based on time, row count, or size thresholds: diff --git a/apps/framework-docs/src/pages/moose/olap/model-table.mdx b/apps/framework-docs/src/pages/moose/olap/model-table.mdx index a4082712f6..19b61e2eb4 100644 --- a/apps/framework-docs/src/pages/moose/olap/model-table.mdx +++ b/apps/framework-docs/src/pages/moose/olap/model-table.mdx @@ -857,6 +857,48 @@ public_s3 = OlapTable[DataRecord]("public_s3", OlapConfig( Both engines support the same credential management and format options. +#### Iceberg +The `IcebergS3` engine provides read-only access to Iceberg tables stored in S3: + + +```ts filename="IcebergTable.ts" copy +import { OlapTable, ClickHouseEngines, mooseRuntimeEnv } from '@514labs/moose-lib'; + +// Iceberg table with AWS credentials (recommended with mooseRuntimeEnv) +export const icebergEvents = new OlapTable("iceberg_events", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://my-bucket/warehouse/db/table/", + format: "Parquet", // or "ORC" + awsAccessKeyId: mooseRuntimeEnv.get("AWS_ACCESS_KEY_ID"), + awsSecretAccessKey: mooseRuntimeEnv.get("AWS_SECRET_ACCESS_KEY"), +}); +``` + + + +```py filename="IcebergTable.py" copy +from moose_lib import OlapTable, OlapConfig, moose_runtime_env +from moose_lib.blocks import IcebergS3Engine + +# Iceberg table with AWS credentials (recommended with moose_runtime_env) +iceberg_events = OlapTable[Event]("iceberg_events", OlapConfig( + engine=IcebergS3Engine( + path="s3://my-bucket/warehouse/db/table/", + format="Parquet", # or "ORC" + aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY"), + ) +)) +``` + + + +- IcebergS3 tables are **read-only** and provide access to the latest state of your Iceberg table +- `orderByFields`, `orderByExpression`, `partitionBy`, and `sampleByExpression` are not supported +- The table automatically reflects the current state of the Iceberg table in S3 +- Supported formats: **Parquet** and **ORC** only + + #### In-Memory Buffer (`Buffer`) The `Buffer` engine provides an in-memory buffer that flushes data to a destination table based on time, row count, or size thresholds: diff --git a/packages/py-moose-lib/moose_lib/__init__.py b/packages/py-moose-lib/moose_lib/__init__.py index ed33fe6a6e..1fa7d5ca76 100644 --- a/packages/py-moose-lib/moose_lib/__init__.py +++ b/packages/py-moose-lib/moose_lib/__init__.py @@ -26,6 +26,7 @@ ReplicatedAggregatingMergeTreeEngine, ReplicatedSummingMergeTreeEngine, S3QueueEngine, + IcebergS3Engine, EngineConfig, # Legacy enum (already exported via .blocks import, but explicit for clarity) ClickHouseEngines diff --git a/packages/py-moose-lib/moose_lib/blocks.py b/packages/py-moose-lib/moose_lib/blocks.py index 45f0b85dd6..7b3a69de57 100644 --- a/packages/py-moose-lib/moose_lib/blocks.py +++ b/packages/py-moose-lib/moose_lib/blocks.py @@ -17,6 +17,7 @@ class ClickHouseEngines(Enum): S3 = "S3" Buffer = "Buffer" Distributed = "Distributed" + IcebergS3 = "IcebergS3" ReplicatedMergeTree = "ReplicatedMergeTree" ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree" ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree" @@ -284,6 +285,60 @@ def __post_init__(self): if not self.target_table: raise ValueError("Distributed engine requires 'target_table'") +@dataclass +class IcebergS3Engine(EngineConfig): + """Configuration for IcebergS3 engine - read-only Iceberg table access. + + Provides direct querying of Apache Iceberg tables stored on S3. + Data is not copied; queries stream directly from Parquet/ORC files. + + Args: + path: S3 path to Iceberg table root (e.g., 's3://bucket/warehouse/events/') + format: Data format - 'Parquet' or 'ORC' + aws_access_key_id: AWS access key ID (optional, omit for public buckets or IAM roles) + aws_secret_access_key: AWS secret access key (optional) + compression: Compression type (optional: 'gzip', 'zstd', 'auto') + + Example: + >>> from moose_lib import OlapTable, OlapConfig, moose_runtime_env + >>> from moose_lib.blocks import IcebergS3Engine + >>> + >>> lake_events = OlapTable[Event]( + ... "lake_events", + ... OlapConfig( + ... engine=IcebergS3Engine( + ... path="s3://datalake/events/", + ... format="Parquet", + ... aws_access_key_id=moose_runtime_env.get("AWS_ACCESS_KEY_ID"), + ... aws_secret_access_key=moose_runtime_env.get("AWS_SECRET_ACCESS_KEY") + ... ) + ... ) + ... ) + + Note: + - IcebergS3 engine is read-only + - Does not support ORDER BY, PARTITION BY, or SAMPLE BY clauses + - Queries always see the latest Iceberg snapshot (with metadata cache) + """ + + # Required fields + path: str + format: str + + # Optional fields + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + compression: Optional[str] = None + + def __post_init__(self): + """Validate required fields""" + if not self.path: + raise ValueError("IcebergS3 engine requires 'path'") + if not self.format: + raise ValueError("IcebergS3 engine requires 'format'") + if self.format not in ['Parquet', 'ORC']: + raise ValueError(f"IcebergS3 format must be 'Parquet' or 'ORC', got '{self.format}'") + # ========================== # New Table Configuration (Recommended API) # ========================== diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index d8216fb9bd..c69622f645 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -185,11 +185,12 @@ def model_post_init(self, __context): S3QueueEngine, BufferEngine, DistributedEngine, + IcebergS3Engine, ) - # S3QueueEngine, BufferEngine, and DistributedEngine don't support ORDER BY + # S3QueueEngine, BufferEngine, DistributedEngine, and IcebergS3Engine don't support ORDER BY # Note: S3Engine DOES support ORDER BY (unlike S3Queue) - engines_without_order_by = (S3QueueEngine, BufferEngine, DistributedEngine) + engines_without_order_by = (S3QueueEngine, BufferEngine, DistributedEngine, IcebergS3Engine) if isinstance(self.engine, engines_without_order_by): engine_name = type(self.engine).__name__ @@ -205,6 +206,7 @@ def model_post_init(self, __context): S3QueueEngine, BufferEngine, DistributedEngine, + IcebergS3Engine, ) if isinstance(self.engine, engines_without_sample_by): engine_name = type(self.engine).__name__ @@ -215,12 +217,13 @@ def model_post_init(self, __context): f"Remove sample_by_expression from your configuration." ) - # Only S3QueueEngine, BufferEngine, and DistributedEngine don't support PARTITION BY + # Only S3QueueEngine, BufferEngine, DistributedEngine, and IcebergS3Engine don't support PARTITION BY # S3Engine DOES support PARTITION BY engines_without_partition_by = ( S3QueueEngine, BufferEngine, DistributedEngine, + IcebergS3Engine, ) if isinstance(self.engine, engines_without_partition_by): engine_name = type(self.engine).__name__ diff --git a/packages/py-moose-lib/moose_lib/internal.py b/packages/py-moose-lib/moose_lib/internal.py index 1bd6ab7fba..46fee76921 100644 --- a/packages/py-moose-lib/moose_lib/internal.py +++ b/packages/py-moose-lib/moose_lib/internal.py @@ -167,6 +167,16 @@ class DistributedConfigDict(BaseEngineConfigDict): policy_name: Optional[str] = None +class IcebergS3ConfigDict(BaseEngineConfigDict): + """Configuration for IcebergS3 engine.""" + engine: Literal["IcebergS3"] = "IcebergS3" + path: str + format: str + aws_access_key_id: Optional[str] = None + aws_secret_access_key: Optional[str] = None + compression: Optional[str] = None + + # Discriminated union of all engine configurations EngineConfigDict = Union[ MergeTreeConfigDict, @@ -180,7 +190,8 @@ class DistributedConfigDict(BaseEngineConfigDict): S3QueueConfigDict, S3ConfigDict, BufferConfigDict, - DistributedConfigDict + DistributedConfigDict, + IcebergS3ConfigDict ] @@ -508,7 +519,7 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon Returns: EngineConfigDict with engine-specific configuration """ - from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine + from moose_lib.blocks import S3QueueEngine, S3Engine, BufferEngine, DistributedEngine, IcebergS3Engine # Try S3Queue first if isinstance(engine, S3QueueEngine): @@ -560,6 +571,16 @@ def _convert_engine_instance_to_config_dict(engine: "EngineConfig") -> EngineCon policy_name=engine.policy_name ) + # Try IcebergS3 + if isinstance(engine, IcebergS3Engine): + return IcebergS3ConfigDict( + path=engine.path, + format=engine.format, + aws_access_key_id=engine.aws_access_key_id, + aws_secret_access_key=engine.aws_secret_access_key, + compression=engine.compression + ) + # Try basic engines basic_config = _convert_basic_engine_instance(engine) if basic_config: diff --git a/packages/py-moose-lib/tests/test_iceberg_config.py b/packages/py-moose-lib/tests/test_iceberg_config.py new file mode 100644 index 0000000000..aeee4e289f --- /dev/null +++ b/packages/py-moose-lib/tests/test_iceberg_config.py @@ -0,0 +1,103 @@ +import pytest +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import IcebergS3Engine +from pydantic import BaseModel + + +class SampleData(BaseModel): + id: str + name: str + value: int + + +def test_iceberg_engine_basic_creation(): + """Test basic IcebergS3Engine creation with required fields""" + engine = IcebergS3Engine( + path="s3://bucket/warehouse/table/", + format="Parquet" + ) + assert engine.path == "s3://bucket/warehouse/table/" + assert engine.format == "Parquet" + assert engine.aws_access_key_id is None + assert engine.aws_secret_access_key is None + assert engine.compression is None + + +def test_iceberg_engine_with_all_options(): + """Test IcebergS3Engine with all optional configuration""" + engine = IcebergS3Engine( + path="s3://bucket/table/", + format="ORC", + aws_access_key_id="AKIATEST", + aws_secret_access_key="secret123", + compression="zstd" + ) + assert engine.path == "s3://bucket/table/" + assert engine.format == "ORC" + assert engine.aws_access_key_id == "AKIATEST" + assert engine.aws_secret_access_key == "secret123" + assert engine.compression == "zstd" + + +def test_iceberg_engine_missing_path(): + """Test that missing path raises ValueError""" + with pytest.raises(ValueError, match="IcebergS3 engine requires 'path'"): + IcebergS3Engine(path="", format="Parquet") + + +def test_iceberg_engine_missing_format(): + """Test that missing format raises ValueError""" + with pytest.raises(ValueError, match="IcebergS3 engine requires 'format'"): + IcebergS3Engine(path="s3://bucket/table/", format="") + + +def test_iceberg_engine_invalid_format(): + """Test that invalid format raises ValueError (only Parquet and ORC supported)""" + with pytest.raises(ValueError, match="format must be 'Parquet' or 'ORC'"): + IcebergS3Engine(path="s3://bucket/table/", format="JSON") + + +def test_iceberg_rejects_order_by(): + """Test that IcebergS3 engine rejects ORDER BY clauses (read-only external table)""" + with pytest.raises(ValueError, match="IcebergS3Engine does not support ORDER BY clauses"): + OlapConfig( + engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"), + order_by_fields=["id"] + ) + + +def test_iceberg_rejects_partition_by(): + """Test that IcebergS3 engine rejects PARTITION BY clauses (read-only external table)""" + with pytest.raises(ValueError, match="IcebergS3Engine does not support PARTITION BY clause"): + OlapConfig( + engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"), + partition_by="toYYYYMM(timestamp)" + ) + + +def test_iceberg_rejects_sample_by(): + """Test that IcebergS3 engine rejects SAMPLE BY clauses (read-only external table)""" + with pytest.raises(ValueError, match="IcebergS3Engine does not support SAMPLE BY clause"): + OlapConfig( + engine=IcebergS3Engine(path="s3://bucket/table/", format="Parquet"), + sample_by_expression="cityHash64(id)" + ) + + +def test_iceberg_table_in_olap_table(): + """Test creating OlapTable with IcebergS3Engine and custom settings""" + table = OlapTable[SampleData]( + "lake_events", + OlapConfig( + engine=IcebergS3Engine( + path="s3://datalake/events/", + format="Parquet", + aws_access_key_id="AKIATEST", + aws_secret_access_key="secret123" + ) + ) + ) + assert table.name == "lake_events" + assert isinstance(table.config.engine, IcebergS3Engine) + assert table.config.engine.path == "s3://datalake/events/" + diff --git a/packages/ts-moose-lib/src/blocks/helpers.ts b/packages/ts-moose-lib/src/blocks/helpers.ts index cec7080544..dc2ffd563f 100644 --- a/packages/ts-moose-lib/src/blocks/helpers.ts +++ b/packages/ts-moose-lib/src/blocks/helpers.ts @@ -46,6 +46,7 @@ export enum ClickHouseEngines { S3 = "S3", Buffer = "Buffer", Distributed = "Distributed", + IcebergS3 = "IcebergS3", ReplicatedMergeTree = "ReplicatedMergeTree", ReplicatedReplacingMergeTree = "ReplicatedReplacingMergeTree", ReplicatedAggregatingMergeTree = "ReplicatedAggregatingMergeTree", diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index 75c67c52a4..7f003e79dd 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -157,6 +157,15 @@ interface DistributedEngineConfig { policyName?: string; } +interface IcebergS3EngineConfig { + engine: "IcebergS3"; + path: string; + format: string; + awsAccessKeyId?: string; + awsSecretAccessKey?: string; + compression?: string; +} + /** * Union type for all supported engine configurations */ @@ -172,7 +181,8 @@ type EngineConfig = | S3QueueEngineConfig | S3EngineConfig | BufferEngineConfig - | DistributedEngineConfig; + | DistributedEngineConfig + | IcebergS3EngineConfig; /** * JSON representation of an OLAP table configuration. @@ -587,6 +597,26 @@ function convertDistributedEngineConfig( }; } +/** + * Convert IcebergS3 engine config + */ +function convertIcebergS3EngineConfig( + config: OlapConfig, +): EngineConfig | undefined { + if (!("engine" in config) || config.engine !== ClickHouseEngines.IcebergS3) { + return undefined; + } + + return { + engine: "IcebergS3", + path: config.path, + format: config.format, + awsAccessKeyId: config.awsAccessKeyId, + awsSecretAccessKey: config.awsSecretAccessKey, + compression: config.compression, + }; +} + /** * Convert table configuration to engine config */ @@ -627,6 +657,11 @@ function convertTableConfigToEngineConfig( return convertDistributedEngineConfig(config); } + // Handle IcebergS3 + if (engine === ClickHouseEngines.IcebergS3) { + return convertIcebergS3EngineConfig(config); + } + return undefined; } diff --git a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts index 2ed3871038..8e2b05da25 100644 --- a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts +++ b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts @@ -445,6 +445,47 @@ export type DistributedConfig = Omit< policyName?: string; }; +/** + * Configuration for IcebergS3 engine - read-only Iceberg table access + * + * Provides direct querying of Apache Iceberg tables stored on S3. + * Data is not copied; queries stream directly from Parquet/ORC files. + * + * @template T The data type of the records stored in the table. + * + * @example + * ```typescript + * const lakeEvents = new OlapTable("lake_events", { + * engine: ClickHouseEngines.IcebergS3, + * path: "s3://datalake/events/", + * format: "Parquet", + * awsAccessKeyId: mooseRuntimeEnv.get("AWS_ACCESS_KEY_ID"), + * awsSecretAccessKey: mooseRuntimeEnv.get("AWS_SECRET_ACCESS_KEY") + * }); + * ``` + * + * @remarks + * - IcebergS3 engine is read-only + * - Does not support ORDER BY, PARTITION BY, or SAMPLE BY clauses + * - Queries always see the latest Iceberg snapshot (with metadata cache) + */ +export type IcebergS3Config = Omit< + BaseOlapConfig, + "orderByFields" | "orderByExpression" | "partitionBy" | "sampleByExpression" +> & { + engine: ClickHouseEngines.IcebergS3; + /** S3 path to Iceberg table root (e.g., 's3://bucket/warehouse/events/') */ + path: string; + /** Data format - 'Parquet' or 'ORC' */ + format: "Parquet" | "ORC"; + /** AWS access key ID (optional, omit for NOSIGN/public buckets) */ + awsAccessKeyId?: string; + /** AWS secret access key (optional) */ + awsSecretAccessKey?: string; + /** Compression type (optional: 'gzip', 'zstd', 'auto') */ + compression?: string; +}; + /** * Legacy configuration (backward compatibility) - defaults to MergeTree engine * @template T The data type of the records stored in the table. @@ -463,7 +504,8 @@ type EngineConfig = | S3QueueConfig | S3Config | BufferConfig - | DistributedConfig; + | DistributedConfig + | IcebergS3Config; /** * Union of all engine-specific configurations (new API) diff --git a/packages/ts-moose-lib/tests/iceberg-config.test.ts b/packages/ts-moose-lib/tests/iceberg-config.test.ts new file mode 100644 index 0000000000..fc4e7c372a --- /dev/null +++ b/packages/ts-moose-lib/tests/iceberg-config.test.ts @@ -0,0 +1,56 @@ +import { expect } from "chai"; +import { OlapTable, ClickHouseEngines } from "../src"; + +interface TestData { + id: string; + name: string; + value: number; +} + +describe("IcebergS3 Engine Configuration", () => { + it("should create table with required fields only (Parquet)", () => { + const table = new OlapTable("test_iceberg", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://bucket/warehouse/table/", + format: "Parquet", + }); + + expect(table.name).to.equal("test_iceberg"); + expect((table.config as any).engine).to.equal(ClickHouseEngines.IcebergS3); + expect((table.config as any).path).to.equal("s3://bucket/warehouse/table/"); + expect((table.config as any).format).to.equal("Parquet"); + expect((table.config as any).awsAccessKeyId).to.be.undefined; + expect((table.config as any).awsSecretAccessKey).to.be.undefined; + }); + + it("should create table with all configuration options (ORC with credentials)", () => { + const table = new OlapTable("full_config", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://datalake/warehouse/events/", + format: "ORC", + awsAccessKeyId: "AKIATEST123", + awsSecretAccessKey: "secretkey456", + compression: "zstd", + }); + + expect(table.name).to.equal("full_config"); + expect((table.config as any).path).to.equal( + "s3://datalake/warehouse/events/", + ); + expect((table.config as any).format).to.equal("ORC"); + expect((table.config as any).awsAccessKeyId).to.equal("AKIATEST123"); + expect((table.config as any).awsSecretAccessKey).to.equal("secretkey456"); + expect((table.config as any).compression).to.equal("zstd"); + }); + + it("should work without credentials for public buckets (NOSIGN)", () => { + const table = new OlapTable("public_data", { + engine: ClickHouseEngines.IcebergS3, + path: "s3://public-bucket/data/", + format: "Parquet", + } as const); + + expect((table.config as any).awsAccessKeyId).to.be.undefined; + expect((table.config as any).awsSecretAccessKey).to.be.undefined; + }); +}); From b4ae5f28a23e347ab1a1bdf5b5ed2fa10b62b709 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Mon, 24 Nov 2025 09:48:00 -0500 Subject: [PATCH 43/59] Fix nix flakes install path for templates (#3030) > [!NOTE] > Packages templates into the Nix output and relocates the moose-cli binary with a wrapper so the CLI can resolve templates at the expected path. > > - **Nix flake (`flake.nix`)**: > - **Template packaging**: > - Add `packages.template-packages` derivation to tar each template dir and generate `manifest.toml`. > - **moose-cli output layout**: > - Move real binary to `libexec/moose/moose-cli` and add a `bin/moose-cli` wrapper. > - Copy packaged templates into `$out/template-packages/` for runtime discovery. > - Retain build/linker tweaks (rdkafka dynamic-linking) and environment setup. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 7866acbbe928171ffed635792e01fd58ea06db3e. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- flake.nix | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/flake.nix b/flake.nix index 13ca89b051..95ec173016 100644 --- a/flake.nix +++ b/flake.nix @@ -168,6 +168,45 @@ # Package outputs packages = { + # Template packages - package templates from templates/ directory + template-packages = pkgs.stdenv.mkDerivation { + pname = "moose-template-packages"; + version = "0.0.1"; + + src = ./templates; + + nativeBuildInputs = [ pkgs.gnutar pkgs.gzip ]; + + buildPhase = '' + # Create manifest header + cat > manifest.toml << 'EOF' + [templates] + EOF + + # Package each template directory + for template_dir in */; do + template_name="''${template_dir%/}" + + # Create tarball + tar -czf "$template_name.tgz" \ + --exclude="node_modules" \ + -C "$template_dir" . + + # Add to manifest if template.config.toml exists + if [ -f "$template_dir/template.config.toml" ]; then + echo "" >> manifest.toml + echo "[templates.$template_name]" >> manifest.toml + cat "$template_dir/template.config.toml" >> manifest.toml + fi + done + ''; + + installPhase = '' + mkdir -p $out + cp *.tgz manifest.toml $out/ + ''; + }; + # Rust CLI moose-cli = pkgs.rustPlatform.buildRustPackage { pname = "moose-cli"; @@ -232,6 +271,33 @@ export SHELL="${pkgs.bash}/bin/bash" ''; + # Restructure output to match expected template path + # Real binary at: $out/libexec/moose/moose-cli (3 levels deep) + # Templates at: $out/template-packages/ + # From binary: parent()/parent()/parent()/join("template-packages") = $out/template-packages ✓ + postInstall = '' + # Create nested directory for real binary (3 levels deep from $out) + mkdir -p $out/libexec/moose + mkdir -p $out/template-packages + + # Move binary to nested location + mv $out/bin/moose-cli $out/libexec/moose/moose-cli + + # Copy templates from the template-packages derivation + cp -r ${self'.packages.template-packages}/* $out/template-packages/ + + # Create wrapper script in standard bin location + mkdir -p $out/bin + cat > $out/bin/moose-cli << 'EOF' + #!/usr/bin/env bash + exec "$out/libexec/moose/moose-cli" "$@" + EOF + chmod +x $out/bin/moose-cli + + # Substitute $out with actual path + substituteInPlace $out/bin/moose-cli --replace-fail '$out' "$out" + ''; + meta = with lib; { description = "MooseStack CLI - Build tool for Moose apps"; homepage = "https://www.fiveonefour.com/moose"; From 769eaf7561400382ae3bd78940a4e31183655780 Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Mon, 24 Nov 2025 18:01:01 -0700 Subject: [PATCH 44/59] introspect views/mvs from ch (#3023) > [!NOTE] > Add end-to-end support for introspecting, diffing, reconciling, and executing changes for ClickHouse views/materialized views (SQL resources), including SQL normalization and dependency-aware DDL ordering. > > - **OLAP/ClickHouse**: > - Implement `list_sql_resources` to introspect views/MVs from `system.tables`; reconstruct lineage and setup/teardown. > - Extend SQL parser with normalization, source table extraction, and helpers; enable `sqlparser` visitor feature. > - Add TTL/SETTINGS parsing improvements and tests. > - **Core Planning & Reality Check**: > - Extend `InfraRealityChecker` with SQL resource discrepancies (`unmapped/missing/mismatched_sql_resources`). > - Update `reconcile_with_reality` to handle SQL resources and accept `target_sql_resource_ids`. > - Diff logic (`InfrastructureMap::diff_sql_resources`) and DDL ordering updated; include MV population and dependency-aware teardown/setup. > - **APIs/CLI**: > - Admin endpoints and serverless fetch paths pass table and SQL resource targets; reconciled inframap now includes SQL resources. > - Remote plan/migration routines updated to propagate SQL resource IDs. > - **Models/Proto**: > - Add `database` to `SqlResource`; implement ID, equality (SQL normalization), and (de)serialization. > - **Misc**: > - Python streaming runner: set Kafka consumer `auto_offset_reset='earliest'`. > - Broad unit test additions across modules. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 9b80f40dfe5279ab490ebd03bf9cf2b403ffe99c. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: George Leung --- Cargo.lock | 12 + apps/framework-cli/Cargo.toml | 2 +- apps/framework-cli/src/cli/local_webserver.rs | 19 +- .../src/cli/routines/code_generation.rs | 2 + .../framework-cli/src/cli/routines/migrate.rs | 31 +- apps/framework-cli/src/cli/routines/mod.rs | 34 +- .../framework/core/infra_reality_checker.rs | 188 ++++++++++- .../core/infrastructure/sql_resource.rs | 306 ++++++++++++++++- .../src/framework/core/infrastructure_map.rs | 3 + apps/framework-cli/src/framework/core/plan.rs | 310 ++++++++++++++++-- .../src/infrastructure/olap/clickhouse/mod.rs | 283 +++++++++++++++- .../olap/clickhouse/sql_parser.rs | 266 ++++++++++++++- .../src/infrastructure/olap/ddl_ordering.rs | 11 +- .../src/infrastructure/olap/mod.rs | 25 ++ packages/protobuf/infrastructure_map.proto | 3 + .../streaming/streaming_function_runner.py | 1 + 16 files changed, 1430 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index af9abd18f8..e573711f5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5189,6 +5189,18 @@ checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", "recursive", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] diff --git a/apps/framework-cli/Cargo.toml b/apps/framework-cli/Cargo.toml index 065114d98d..f159eaa128 100644 --- a/apps/framework-cli/Cargo.toml +++ b/apps/framework-cli/Cargo.toml @@ -12,7 +12,7 @@ homepage = "https://www.fiveonefour.com/moose" [dependencies] posthog514client-rs = { path = "../../packages/posthog514client-rs" } -sqlparser = "0.59" +sqlparser = { version = "0.59", features = ["visitor"] } itertools = "0.13.0" openssl = { version = "0.10", features = ["vendored"] } clap = { version = "4.3.17", features = ["derive"] } diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 5df1c173f8..840252b1a9 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3301,21 +3301,25 @@ async fn get_admin_reconciled_inframap( return Ok(current_map); } - // For admin endpoints, reconcile all currently managed tables only - // Pass the managed table names as target_table_names - this ensures that - // reconcile_with_reality only operates on tables that are already managed by Moose + // For admin endpoints, reconcile all currently managed tables and SQL resources only + // Pass the managed table IDs as target_table_ids - this ensures that + // reconcile_with_reality only operates on resources that are already managed by Moose let target_table_ids: HashSet = current_map .tables .values() .map(|t| t.id(¤t_map.default_database)) .collect(); + let target_sql_resource_ids: HashSet = + current_map.sql_resources.keys().cloned().collect(); + let olap_client = clickhouse::create_client(project.clickhouse_config.clone()); crate::framework::core::plan::reconcile_with_reality( project, ¤t_map, &target_table_ids, + &target_sql_resource_ids, olap_client, ) .await @@ -3572,6 +3576,9 @@ mod tests { unmapped_tables: vec![table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let result = find_table_definition("test_table", &discrepancies); @@ -3588,6 +3595,9 @@ mod tests { unmapped_tables: vec![test_table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(test_table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let mut infra_map = create_test_infra_map(); @@ -3620,6 +3630,9 @@ mod tests { unmapped_tables: vec![test_table.clone()], missing_tables: vec![], mismatched_tables: vec![OlapChange::Table(TableChange::Added(test_table.clone()))], + unmapped_sql_resources: vec![], + missing_sql_resources: vec![], + mismatched_sql_resources: vec![], }; let mut infra_map = create_test_infra_map(); diff --git a/apps/framework-cli/src/cli/routines/code_generation.rs b/apps/framework-cli/src/cli/routines/code_generation.rs index 0d0cbe9c3a..1cc563c03a 100644 --- a/apps/framework-cli/src/cli/routines/code_generation.rs +++ b/apps/framework-cli/src/cli/routines/code_generation.rs @@ -255,6 +255,8 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; + // TODO: Also call list_sql_resources to fetch Views/MVs and generate code for them. + // Currently we only generate code for Tables. let (tables, unsupported) = client.list_tables(&db, &project).await.map_err(|e| { RoutineFailure::new( Message::new("Failure".to_string(), "listing tables".to_string()), diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 0e9b4ae33a..6edc7a8bea 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -585,19 +585,28 @@ pub async fn execute_migration( let target_table_ids: HashSet = current_infra_map.tables.keys().cloned().collect(); + let target_sql_resource_ids: HashSet = + current_infra_map.sql_resources.keys().cloned().collect(); + let olap_client = create_client(clickhouse_config.clone()); - reconcile_with_reality(project, ¤t_infra_map, &target_table_ids, olap_client) - .await - .map_err(|e| { - RoutineFailure::new( - Message::new( - "Reconciliation".to_string(), - "Failed to reconcile state with ClickHouse reality".to_string(), - ), - anyhow::anyhow!("{:?}", e), - ) - })? + reconcile_with_reality( + project, + ¤t_infra_map, + &target_table_ids, + &target_sql_resource_ids, + olap_client, + ) + .await + .map_err(|e| { + RoutineFailure::new( + Message::new( + "Reconciliation".to_string(), + "Failed to reconcile state with ClickHouse reality".to_string(), + ), + anyhow::anyhow!("{:?}", e), + ) + })? } else { current_infra_map }; diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 5ff8f8e115..4f875ee89d 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -1020,7 +1020,17 @@ pub async fn remote_plan( .map(|t| t.id(&local_infra_map.default_database)) .collect(); - get_remote_inframap_serverless(project, clickhouse_url, None, table_names).await? + let sql_resource_ids: HashSet = + local_infra_map.sql_resources.keys().cloned().collect(); + + get_remote_inframap_serverless( + project, + clickhouse_url, + None, + &table_names, + &sql_resource_ids, + ) + .await? } else { // Moose server flow display::show_message_wrapper( @@ -1165,13 +1175,23 @@ pub async fn remote_gen_migration( .to_string(), }, ); - let table_ids: HashSet = local_infra_map + + let target_table_ids: HashSet = local_infra_map .tables .values() .map(|t| t.id(&local_infra_map.default_database)) .collect(); - get_remote_inframap_serverless(project, clickhouse_url, redis_url.as_deref(), table_ids) - .await? + let target_sql_resource_ids: HashSet = + local_infra_map.sql_resources.keys().cloned().collect(); + + get_remote_inframap_serverless( + project, + clickhouse_url, + redis_url.as_deref(), + &target_table_ids, + &target_sql_resource_ids, + ) + .await? } }; @@ -1220,7 +1240,8 @@ async fn get_remote_inframap_serverless( project: &Project, clickhouse_url: &str, redis_url: Option<&str>, - target_table_ids: HashSet, + target_table_ids: &HashSet, + target_sql_resource_ids: &HashSet, ) -> anyhow::Result { use crate::framework::core::plan::reconcile_with_reality; use crate::infrastructure::olap::clickhouse::config::parse_clickhouse_connection_string; @@ -1247,7 +1268,8 @@ async fn get_remote_inframap_serverless( reconcile_with_reality( project, &remote_infra_map, - &target_table_ids, + target_table_ids, + target_sql_resource_ids, reconcile_client, ) .await? diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 742d481385..17320e03ca 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -16,8 +16,9 @@ /// - Identifying structural differences in tables use crate::{ framework::core::{ + infrastructure::sql_resource::SqlResource, infrastructure::table::Table, - infrastructure_map::{InfrastructureMap, OlapChange, TableChange}, + infrastructure_map::{Change, InfrastructureMap, OlapChange, TableChange}, }, infrastructure::olap::{OlapChangesError, OlapOperations}, project::Project, @@ -56,6 +57,12 @@ pub struct InfraDiscrepancies { pub missing_tables: Vec, /// Tables that exist in both but have structural differences pub mismatched_tables: Vec, + /// SQL resources (views/MVs) that exist in reality but are not in the map + pub unmapped_sql_resources: Vec, + /// SQL resources that are in the map but don't exist in reality + pub missing_sql_resources: Vec, + /// SQL resources that exist in both but have differences + pub mismatched_sql_resources: Vec, } impl InfraDiscrepancies { @@ -64,6 +71,9 @@ impl InfraDiscrepancies { self.unmapped_tables.is_empty() && self.missing_tables.is_empty() && self.mismatched_tables.is_empty() + && self.unmapped_sql_resources.is_empty() + && self.missing_sql_resources.is_empty() + && self.mismatched_sql_resources.is_empty() } } @@ -292,17 +302,107 @@ impl InfraRealityChecker { } } + // Fetch and compare SQL resources (views and materialized views) + debug!("Fetching actual SQL resources from OLAP databases"); + + let mut actual_sql_resources = Vec::new(); + + // Query each database and merge results + for database in &all_databases { + debug!("Fetching SQL resources from database: {}", database); + let mut db_sql_resources = self + .olap_client + .list_sql_resources(database, &infra_map.default_database) + .await?; + actual_sql_resources.append(&mut db_sql_resources); + } + + debug!( + "Found {} SQL resources across all databases", + actual_sql_resources.len() + ); + + // Create a map of actual SQL resources by name + let actual_sql_resource_map: HashMap = actual_sql_resources + .into_iter() + .map(|r| (r.name.clone(), r)) + .collect(); + + debug!( + "Actual SQL resource IDs: {:?}", + actual_sql_resource_map.keys() + ); + debug!( + "Infrastructure map SQL resource IDs: {:?}", + infra_map.sql_resources.keys() + ); + + // Find unmapped SQL resources (exist in reality but not in map) + let unmapped_sql_resources: Vec<_> = actual_sql_resource_map + .values() + .filter(|resource| !infra_map.sql_resources.contains_key(&resource.name)) + .cloned() + .collect(); + + debug!( + "Found {} unmapped SQL resources: {:?}", + unmapped_sql_resources.len(), + unmapped_sql_resources + .iter() + .map(|r| &r.name) + .collect::>() + ); + + // Find missing SQL resources (in map but don't exist in reality) + let missing_sql_resources: Vec = infra_map + .sql_resources + .keys() + .filter(|id| !actual_sql_resource_map.contains_key(*id)) + .cloned() + .collect(); + + debug!( + "Found {} missing SQL resources: {:?}", + missing_sql_resources.len(), + missing_sql_resources + ); + + // Find mismatched SQL resources (exist in both but differ) + let mut mismatched_sql_resources = Vec::new(); + for (id, desired) in &infra_map.sql_resources { + if let Some(actual) = actual_sql_resource_map.get(id) { + if actual != desired { + debug!("Found mismatch in SQL resource: {}", id); + mismatched_sql_resources.push(OlapChange::SqlResource(Change::Updated { + before: Box::new(actual.clone()), + after: Box::new(desired.clone()), + })); + } + } + } + + debug!( + "Found {} mismatched SQL resources", + mismatched_sql_resources.len() + ); + let discrepancies = InfraDiscrepancies { unmapped_tables, missing_tables, mismatched_tables, + unmapped_sql_resources, + missing_sql_resources, + mismatched_sql_resources, }; debug!( - "Reality check complete. Found {} unmapped, {} missing, and {} mismatched tables", + "Reality check complete. Found {} unmapped, {} missing, and {} mismatched tables, {} unmapped SQL resources, {} missing SQL resources, {} mismatched SQL resources", discrepancies.unmapped_tables.len(), discrepancies.missing_tables.len(), - discrepancies.mismatched_tables.len() + discrepancies.mismatched_tables.len(), + discrepancies.unmapped_sql_resources.len(), + discrepancies.missing_sql_resources.len(), + discrepancies.mismatched_sql_resources.len() ); if discrepancies.is_empty() { @@ -335,6 +435,7 @@ mod tests { // Mock OLAP client for testing struct MockOlapClient { tables: Vec

    , + sql_resources: Vec, } #[async_trait] @@ -346,6 +447,17 @@ mod tests { ) -> Result<(Vec
    , Vec), OlapChangesError> { Ok((self.tables.clone(), vec![])) } + + async fn list_sql_resources( + &self, + _db_name: &str, + _default_database: &str, + ) -> Result< + Vec, + OlapChangesError, + > { + Ok(self.sql_resources.clone()) + } } // Helper function to create a test project @@ -435,6 +547,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..table.clone() }], + sql_resources: vec![], }; // Create empty infrastructure map @@ -504,6 +617,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -578,6 +692,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -645,6 +760,7 @@ mod tests { database: Some(DEFAULT_DATABASE_NAME.to_string()), ..actual_table.clone() }], + sql_resources: vec![], }; let mut infra_map = InfrastructureMap { @@ -689,4 +805,70 @@ mod tests { _ => panic!("Expected TableChange::Updated variant"), } } + + #[tokio::test] + async fn test_reality_checker_sql_resource_mismatch() { + let actual_resource = SqlResource { + name: "test_view".to_string(), + database: None, + setup: vec!["CREATE VIEW test_view AS SELECT 1".to_string()], + teardown: vec!["DROP VIEW test_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let infra_resource = SqlResource { + name: "test_view".to_string(), + database: None, + setup: vec!["CREATE VIEW test_view AS SELECT 2".to_string()], // Difference here + teardown: vec!["DROP VIEW test_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![actual_resource.clone()], + }; + + let mut infra_map = InfrastructureMap { + default_database: DEFAULT_DATABASE_NAME.to_string(), + topics: HashMap::new(), + api_endpoints: HashMap::new(), + tables: HashMap::new(), + views: HashMap::new(), + topic_to_table_sync_processes: HashMap::new(), + topic_to_topic_sync_processes: HashMap::new(), + function_processes: HashMap::new(), + block_db_processes: OlapProcess {}, + consumption_api_web_server: ConsumptionApiWebServer {}, + orchestration_workers: HashMap::new(), + sql_resources: HashMap::new(), + workflows: HashMap::new(), + web_apps: HashMap::new(), + }; + + infra_map + .sql_resources + .insert(infra_resource.name.clone(), infra_resource.clone()); + + let checker = InfraRealityChecker::new(mock_client); + let project = create_test_project(); + + let discrepancies = checker.check_reality(&project, &infra_map).await.unwrap(); + + assert!(discrepancies.unmapped_sql_resources.is_empty()); + assert!(discrepancies.missing_sql_resources.is_empty()); + assert_eq!(discrepancies.mismatched_sql_resources.len(), 1); + + match &discrepancies.mismatched_sql_resources[0] { + OlapChange::SqlResource(Change::Updated { before, after }) => { + assert_eq!(before.name, "test_view"); + assert_eq!(after.name, "test_view"); + assert_eq!(before.setup[0], "CREATE VIEW test_view AS SELECT 1"); + assert_eq!(after.setup[0], "CREATE VIEW test_view AS SELECT 2"); + } + _ => panic!("Expected SqlResource Updated variant"), + } + } } diff --git a/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs b/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs index 5df774e6bf..f7c0b1cbd7 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/sql_resource.rs @@ -1,3 +1,4 @@ +use crate::infrastructure::olap::clickhouse::sql_parser::normalize_sql_for_comparison; use crate::proto::infrastructure_map::SqlResource as ProtoSqlResource; use serde::{Deserialize, Serialize}; @@ -9,11 +10,17 @@ use super::InfrastructureSignature; /// This struct holds information about a SQL resource, including its name, /// setup and teardown scripts, and its data lineage relationships with other /// infrastructure components. -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[derive(Debug, Serialize, Deserialize, Clone, Eq)] pub struct SqlResource { /// The unique name identifier for the SQL resource. pub name: String, + /// The database where this SQL resource exists. + /// - None means use the default database + /// - Some(db) means the resource is in a specific database + #[serde(skip_serializing_if = "Option::is_none", default)] + pub database: Option, + /// A list of SQL commands or script paths executed during the setup phase. pub setup: Vec, /// A list of SQL commands or script paths executed during the teardown phase. @@ -28,10 +35,26 @@ pub struct SqlResource { } impl SqlResource { + /// Returns a unique identifier for this SQL resource. + /// + /// The ID format matches the table ID format: `{database}_{name}` + /// This ensures resources in different databases don't collide. + /// + /// # Arguments + /// * `default_database` - The default database name to use when `database` is None + /// + /// # Returns + /// A string in the format `{database}_{name}` + pub fn id(&self, default_database: &str) -> String { + let db = self.database.as_deref().unwrap_or(default_database); + format!("{}_{}", db, self.name) + } + /// Converts the `SqlResource` struct into its corresponding Protobuf representation. pub fn to_proto(&self) -> ProtoSqlResource { ProtoSqlResource { name: self.name.clone(), + database: self.database.clone(), setup: self.setup.clone(), teardown: self.teardown.clone(), special_fields: Default::default(), @@ -44,6 +67,7 @@ impl SqlResource { pub fn from_proto(proto: ProtoSqlResource) -> Self { Self { name: proto.name, + database: proto.database, setup: proto.setup, teardown: proto.teardown, pulls_data_from: proto @@ -74,3 +98,283 @@ impl DataLineage for SqlResource { self.pushes_data_to.clone() } } + +/// Custom PartialEq implementation that normalizes SQL statements before comparing. +/// This prevents false differences due to cosmetic formatting (whitespace, casing, backticks). +impl PartialEq for SqlResource { + fn eq(&self, other: &Self) -> bool { + // Name must match exactly + if self.name != other.name { + return false; + } + + // Database comparison: treat None as equivalent to any explicit database + // This allows resources from user code (database=None) to match introspected + // resources (database=Some("local")), since both resolve to the same ID + // We don't compare database here because the HashMap key already includes it + + // Data lineage must match exactly + if self.pulls_data_from != other.pulls_data_from + || self.pushes_data_to != other.pushes_data_to + { + return false; + } + + // Setup and teardown scripts must match after normalization + if self.setup.len() != other.setup.len() || self.teardown.len() != other.teardown.len() { + return false; + } + + for (self_sql, other_sql) in self.setup.iter().zip(other.setup.iter()) { + // Pass empty string for default_database since the comparison happens after HashMap + // lookup by ID (which includes database prefix). Both SQL statements are from the + // same database context, so we only need AST-based normalization (backticks, casing, + // whitespace) without database prefix stripping. User-defined SQL typically doesn't + // include explicit database prefixes (e.g., "FROM local.Table"). + let self_normalized = normalize_sql_for_comparison(self_sql, ""); + let other_normalized = normalize_sql_for_comparison(other_sql, ""); + if self_normalized != other_normalized { + return false; + } + } + + for (self_sql, other_sql) in self.teardown.iter().zip(other.teardown.iter()) { + let self_normalized = normalize_sql_for_comparison(self_sql, ""); + let other_normalized = normalize_sql_for_comparison(other_sql, ""); + if self_normalized != other_normalized { + return false; + } + } + + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_resource(name: &str, setup: Vec<&str>, teardown: Vec<&str>) -> SqlResource { + SqlResource { + name: name.to_string(), + database: None, + setup: setup.into_iter().map(String::from).collect(), + teardown: teardown.into_iter().map(String::from).collect(), + pulls_data_from: vec![], + pushes_data_to: vec![], + } + } + + #[test] + fn test_sql_resource_equality_exact_match() { + let resource1 = create_test_resource( + "TestMV", + vec!["CREATE MATERIALIZED VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec!["CREATE MATERIALIZED VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_eq!(resource1, resource2); + } + + #[test] + fn test_sql_resource_equality_with_case_differences() { + let resource_lowercase = create_test_resource( + "TestMV", + vec!["create view TestMV as select count(id) from users"], + vec!["drop view if exists TestMV"], + ); + let resource_uppercase = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT COUNT(id) FROM users"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_eq!(resource_lowercase, resource_uppercase); + } + + #[test] + fn test_sql_resource_equality_comprehensive() { + // User-defined (from TypeScript/Python with backticks and formatting) + let user_defined = create_test_resource( + "BarAggregated_MV", + vec![ + "CREATE MATERIALIZED VIEW IF NOT EXISTS `BarAggregated_MV`\n TO `BarAggregated`\n AS SELECT\n count(`primaryKey`) as totalRows\n FROM `Bar`" + ], + vec!["DROP VIEW IF EXISTS `BarAggregated_MV`"], + ); + + // Introspected from ClickHouse (no backticks, single line, uppercase keywords) + let introspected = create_test_resource( + "BarAggregated_MV", + vec![ + "CREATE MATERIALIZED VIEW IF NOT EXISTS BarAggregated_MV TO BarAggregated AS SELECT COUNT(primaryKey) AS totalRows FROM Bar" + ], + vec!["DROP VIEW IF EXISTS `BarAggregated_MV`"], + ); + + assert_eq!(user_defined, introspected); + } + + #[test] + fn test_sql_resource_inequality_different_names() { + let resource1 = create_test_resource( + "MV1", + vec!["CREATE VIEW MV1 AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS MV1"], + ); + let resource2 = create_test_resource( + "MV2", + vec!["CREATE VIEW MV2 AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS MV2"], + ); + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_inequality_different_sql() { + let resource1 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT col1 FROM table"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT col2 FROM table"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_inequality_different_data_lineage() { + let mut resource1 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + resource1.pulls_data_from = vec![InfrastructureSignature::Table { + id: "Table1".to_string(), + }]; + + let mut resource2 = create_test_resource( + "TestMV", + vec!["CREATE VIEW TestMV AS SELECT * FROM source"], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + resource2.pulls_data_from = vec![InfrastructureSignature::Table { + id: "Table2".to_string(), + }]; + + assert_ne!(resource1, resource2); + } + + #[test] + fn test_sql_resource_equality_multiple_statements() { + let resource1 = create_test_resource( + "TestMV", + vec![ + "CREATE VIEW TestMV AS SELECT * FROM source", + "CREATE INDEX idx ON TestMV (col1)", + ], + vec!["DROP VIEW IF EXISTS TestMV"], + ); + let resource2 = create_test_resource( + "TestMV", + vec![ + "create view TestMV as select * from source", + "create index idx on TestMV (col1)", + ], + vec!["drop view if exists TestMV"], + ); + + assert_eq!(resource1, resource2); + } + + #[test] + fn test_sql_resource_id_with_database() { + // Test with explicit database + let resource_with_db = SqlResource { + name: "MyView".to_string(), + database: Some("custom".to_string()), + setup: vec![], + teardown: vec![], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + assert_eq!(resource_with_db.id("default"), "custom_MyView"); + + // Test with None database (uses default) + let resource_no_db = SqlResource { + name: "MyView".to_string(), + database: None, + setup: vec![], + teardown: vec![], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + assert_eq!(resource_no_db.id("default"), "default_MyView"); + } + + #[test] + fn test_sql_resource_equality_ignores_database_field() { + // Resources with different database fields should be equal if they have the same name + // This is because the HashMap key already includes the database, so we don't need to + // compare it during equality checks + let resource_no_db = SqlResource { + name: "MyView".to_string(), + database: None, + setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()], + teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let resource_with_db = SqlResource { + name: "MyView".to_string(), + database: Some("local".to_string()), + setup: vec!["CREATE VIEW MyView AS SELECT * FROM table1".to_string()], + teardown: vec!["DROP VIEW IF EXISTS MyView".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // These should be equal because database is not compared in PartialEq + assert_eq!(resource_no_db, resource_with_db); + } + + #[test] + fn test_sql_resource_equality_with_normalized_sql() { + // Test that SQL normalization handles whitespace and formatting differences + let resource_formatted = SqlResource { + name: "TestView".to_string(), + database: None, + setup: vec![ + "CREATE VIEW IF NOT EXISTS TestView \n AS SELECT\n `primaryKey`,\n `utcTimestamp`,\n `textLength`\n FROM `Bar`\n WHERE `hasText` = true".to_string() + ], + teardown: vec!["DROP VIEW IF EXISTS `TestView`".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let resource_compact = SqlResource { + name: "TestView".to_string(), + database: None, + setup: vec![ + "CREATE VIEW IF NOT EXISTS TestView AS SELECT primaryKey, utcTimestamp, textLength FROM Bar WHERE hasText = true".to_string() + ], + teardown: vec!["DROP VIEW IF EXISTS `TestView`".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // These should be equal after SQL normalization + assert_eq!(resource_formatted, resource_compact); + } +} diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 10811884b9..95ef3f7465 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -4547,6 +4547,7 @@ mod diff_sql_resources_tests { fn create_sql_resource(name: &str, setup: Vec<&str>, teardown: Vec<&str>) -> SqlResource { SqlResource { name: name.to_string(), + database: None, setup: setup.iter().map(|s| s.to_string()).collect(), teardown: teardown.iter().map(|s| s.to_string()).collect(), pulls_data_from: vec![], @@ -4782,6 +4783,7 @@ mod diff_sql_resources_tests { let mv_before = SqlResource { name: "events_summary_mv".to_string(), + database: None, setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name FROM events".to_string()], teardown: vec!["DROP VIEW events_summary_mv".to_string()], pulls_data_from: vec![InfrastructureSignature::Table { @@ -4794,6 +4796,7 @@ mod diff_sql_resources_tests { let mv_after = SqlResource { name: "events_summary_mv".to_string(), + database: None, setup: vec!["CREATE MATERIALIZED VIEW events_summary_mv TO events_summary_table AS SELECT id, name, timestamp FROM events".to_string()], teardown: vec!["DROP VIEW events_summary_mv".to_string()], pulls_data_from: vec![InfrastructureSignature::Table { diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 573941f10b..d02d7765df 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -14,7 +14,7 @@ /// The resulting plan is then used by the execution module to apply the changes. use crate::framework::core::infra_reality_checker::{InfraRealityChecker, RealityCheckError}; use crate::framework::core::infrastructure_map::{ - InfraChanges, InfrastructureMap, OlapChange, TableChange, + Change, InfraChanges, InfrastructureMap, OlapChange, TableChange, }; use crate::framework::core::primitive_map::PrimitiveMap; use crate::framework::core::state_storage::StateStorage; @@ -76,7 +76,8 @@ pub enum PlanningError { /// # Arguments /// * `project` - The project configuration /// * `infra_map` - The infrastructure map to update -/// * `target_table_names` - Names of tables to include from unmapped tables (tables in DB but not in current inframap). Only unmapped tables with names in this set will be added to the reconciled inframap. +/// * `target_table_ids` - Tables to include from unmapped tables (tables in DB but not in current inframap). Only unmapped tables with names in this set will be added to the reconciled inframap. +/// * `target_sql_resource_ids` - SQL resources to include from unmapped SQL resources. /// * `olap_client` - The OLAP client to use for checking reality /// /// # Returns @@ -85,6 +86,7 @@ pub async fn reconcile_with_reality( project: &Project, current_infra_map: &InfrastructureMap, target_table_ids: &HashSet, + target_sql_resource_ids: &HashSet, olap_client: T, ) -> Result { info!("Reconciling infrastructure map with actual database state"); @@ -258,6 +260,60 @@ pub async fn reconcile_with_reality( } } + // Handle SQL resources reconciliation + debug!("Reconciling SQL resources (views and materialized views)"); + + // Remove missing SQL resources (in map but don't exist in reality) + for missing_sql_resource_id in discrepancies.missing_sql_resources { + debug!( + "Removing missing SQL resource from infrastructure map: {}", + missing_sql_resource_id + ); + reconciled_map + .sql_resources + .remove(&missing_sql_resource_id); + } + + // Add unmapped SQL resources (exist in database but not in current infrastructure map) + // Only include resources whose names are in target_sql_resource_ids to avoid managing external resources + for unmapped_sql_resource in discrepancies.unmapped_sql_resources { + let name = &unmapped_sql_resource.name; + + if target_sql_resource_ids.contains(name) { + debug!( + "Adding unmapped SQL resource found in reality to infrastructure map: {}", + name + ); + reconciled_map + .sql_resources + .insert(name.clone(), unmapped_sql_resource); + } + } + + // Update mismatched SQL resources (exist in both but differ) + for change in discrepancies.mismatched_sql_resources { + match change { + OlapChange::SqlResource(Change::Updated { before, .. }) => { + // We use 'before' (the actual resource from reality) because we want the + // reconciled map to reflect the current state of the database. + // This ensures the subsequent diff against the target map will correctly + // identify that the current state differs from the desired state. + let name = &before.name; + debug!( + "Updating mismatched SQL resource in infrastructure map to match reality: {}", + name + ); + reconciled_map.sql_resources.insert(name.clone(), *before); + } + _ => { + log::warn!( + "Unexpected change type in mismatched_sql_resources: {:?}", + change + ); + } + } + } + info!("Infrastructure map successfully reconciled with actual database state"); Ok(reconciled_map) } @@ -342,6 +398,7 @@ pub async fn plan_changes( .values() .map(|t| t.id(&target_infra_map.default_database)) .collect(), + &target_infra_map.sql_resources.keys().cloned().collect(), olap_client, ) .await? @@ -403,6 +460,7 @@ pub async fn plan_changes( #[cfg(test)] mod tests { use super::*; + use crate::framework::core::infrastructure::sql_resource::SqlResource; use crate::framework::core::infrastructure::table::{ Column, ColumnType, IntType, OrderBy, Table, }; @@ -419,6 +477,7 @@ mod tests { // Mock OLAP client for testing struct MockOlapClient { tables: Vec
    , + sql_resources: Vec, } #[async_trait] @@ -430,6 +489,14 @@ mod tests { ) -> Result<(Vec
    , Vec), OlapChangesError> { Ok((self.tables.clone(), vec![])) } + + async fn list_sql_resources( + &self, + _db_name: &str, + _default_database: &str, + ) -> Result, OlapChangesError> { + Ok(self.sql_resources.clone()) + } } // Helper function to create a test table @@ -514,6 +581,7 @@ mod tests { // Create mock OLAP client with one table let mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; // Create empty infrastructure map (no tables) @@ -537,36 +605,42 @@ mod tests { let mut target_ids = HashSet::new(); - // Reconcile the infrastructure map + // Test 1: Empty target_ids = no managed tables, so unmapped tables are filtered out + // External tables are not accidentally included let reconciled = reconcile_with_reality( &project, &infra_map, - &target_ids, + &HashSet::new(), + &HashSet::new(), MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }, ) .await .unwrap(); - // The reconciled map should not contain the unmapped table (ignoring unmapped tables) + // With empty target_ids, the unmapped table should NOT be added (external table) assert_eq!(reconciled.tables.len(), 0); target_ids.insert("test_unmapped_table_1_0_0".to_string()); - // Reconcile the infrastructure map + // Test 2: Non-empty target_ids = only include if in set + // This is the behavior used by `moose dev`, `moose prod`, etc. let reconciled = reconcile_with_reality( &project, &infra_map, &target_ids, + &HashSet::new(), MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }, ) .await .unwrap(); - // The reconciled map should not contain the unmapped table (ignoring unmapped tables) + // When target_ids contains the table ID, it's included assert_eq!(reconciled.tables.len(), 1); } @@ -576,7 +650,10 @@ mod tests { let table = create_test_table("missing_table"); // Create mock OLAP client with no tables - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; // Create infrastructure map with one table let mut infra_map = InfrastructureMap::default(); @@ -601,7 +678,10 @@ mod tests { assert_eq!(discrepancies.missing_tables[0], "missing_table"); // Create another mock client for the reconciliation - let reconcile_mock_client = MockOlapClient { tables: vec![] }; + let reconcile_mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; let target_table_ids = HashSet::new(); @@ -610,6 +690,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -648,6 +729,7 @@ mod tests { database: Some(db_name.clone()), ..actual_table.clone() }], + sql_resources: vec![], }; // Create infrastructure map with the infra table (no extra column) @@ -678,6 +760,7 @@ mod tests { database: Some(db_name.clone()), ..actual_table.clone() }], + sql_resources: vec![], }; let target_table_ids = HashSet::new(); @@ -686,6 +769,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -709,6 +793,7 @@ mod tests { // Create mock OLAP client with the table let mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; // Create infrastructure map with the same table @@ -735,6 +820,7 @@ mod tests { // Create another mock client for reconciliation let reconcile_mock_client = MockOlapClient { tables: vec![table.clone()], + sql_resources: vec![], }; let target_table_ids = HashSet::new(); @@ -743,6 +829,7 @@ mod tests { &project, &infra_map, &target_table_ids, + &HashSet::new(), reconcile_mock_client, ) .await @@ -794,13 +881,20 @@ mod tests { ); // Also verify that reconciliation preserves the database name - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; - let target_table_ids = HashSet::new(); - let reconciled = - reconcile_with_reality(&project, &loaded_map, &target_table_ids, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &loaded_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); assert_eq!( reconciled.default_database, CUSTOM_DB_NAME, @@ -847,13 +941,20 @@ mod tests { ); // Now test reconciliation - this is where the fix should be applied - let mock_client = MockOlapClient { tables: vec![] }; + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![], + }; - let target_table_ids = HashSet::new(); - let reconciled = - reconcile_with_reality(&project, &loaded_map, &target_table_ids, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &loaded_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); // After reconciliation, the database name should be set from the project config assert_eq!( @@ -938,6 +1039,7 @@ mod tests { let mock_client = MockOlapClient { tables: vec![table_from_reality], + sql_resources: vec![], }; // Create infrastructure map with the table including cluster_name @@ -949,12 +1051,16 @@ mod tests { // Create test project let project = create_test_project(); - let target_table_names = HashSet::new(); // Reconcile the infrastructure map - let reconciled = - reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); // The reconciled map should preserve cluster_name from the infra map assert_eq!(reconciled.tables.len(), 1); @@ -993,6 +1099,7 @@ mod tests { // Create mock OLAP client with the reality table let mock_client = MockOlapClient { tables: vec![reality_table.clone()], + sql_resources: vec![], }; // Create infrastructure map with the infra table @@ -1004,12 +1111,16 @@ mod tests { // Create test project let project = create_test_project(); - let target_table_names = HashSet::new(); // Reconcile the infrastructure map - let reconciled = - reconcile_with_reality(&project, &infra_map, &target_table_names, mock_client) - .await - .unwrap(); + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); // The reconciled map should still have the table assert_eq!(reconciled.tables.len(), 1); @@ -1029,4 +1140,141 @@ mod tests { "columns should be updated from reality" ); } + + #[tokio::test] + async fn test_reconcile_sql_resources_with_empty_filter_ignores_external() { + // Create a SQL resource that exists in the database but not in the infra map + let sql_resource = SqlResource { + name: "unmapped_view".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW unmapped_view AS SELECT * FROM source".to_string()], + teardown: vec!["DROP VIEW IF EXISTS unmapped_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![sql_resource.clone()], + }; + + let infra_map = InfrastructureMap::default(); + let project = create_test_project(); + + // Empty target_sql_resource_ids means no managed resources - external resources are filtered out + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &HashSet::new(), + mock_client, + ) + .await + .unwrap(); + + // Empty filter = no managed resources, so unmapped SQL resource is NOT included (external) + assert_eq!(reconciled.sql_resources.len(), 0); + } + + #[tokio::test] + async fn test_reconcile_sql_resources_with_specific_filter() { + // Create two SQL resources in the database + let view_a = SqlResource { + name: "view_a".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW view_a AS SELECT * FROM table_a".to_string()], + teardown: vec!["DROP VIEW IF EXISTS view_a".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let view_b = SqlResource { + name: "view_b".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW view_b AS SELECT * FROM table_b".to_string()], + teardown: vec!["DROP VIEW IF EXISTS view_b".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![view_a.clone(), view_b.clone()], + }; + + let infra_map = InfrastructureMap::default(); + let project = create_test_project(); + + // Only include view_a in the filter + let mut target_sql_resource_ids = HashSet::new(); + target_sql_resource_ids.insert(view_a.name.clone()); + + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &target_sql_resource_ids, + mock_client, + ) + .await + .unwrap(); + + // Only view_a should be included, view_b should be filtered out + assert_eq!(reconciled.sql_resources.len(), 1); + assert!(reconciled.sql_resources.contains_key(&view_a.name)); + assert!(!reconciled.sql_resources.contains_key(&view_b.name)); + } + + #[tokio::test] + async fn test_reconcile_sql_resources_missing_and_mismatched() { + // Create SQL resource that's in the infra map + let existing_view = SqlResource { + name: "existing_view".to_string(), + database: None, + setup: vec!["CREATE VIEW existing_view AS SELECT * FROM old_table".to_string()], + teardown: vec!["DROP VIEW IF EXISTS existing_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + // Reality has a different version (mismatched) + let reality_view = SqlResource { + name: "existing_view".to_string(), + database: Some("test".to_string()), + setup: vec!["CREATE VIEW existing_view AS SELECT * FROM new_table".to_string()], + teardown: vec!["DROP VIEW IF EXISTS existing_view".to_string()], + pulls_data_from: vec![], + pushes_data_to: vec![], + }; + + let mock_client = MockOlapClient { + tables: vec![], + sql_resources: vec![reality_view.clone()], + }; + + // Create infra map with the existing view + let mut infra_map = InfrastructureMap::default(); + infra_map + .sql_resources + .insert(existing_view.name.clone(), existing_view.clone()); + + let project = create_test_project(); + let mut target_sql_resource_ids = HashSet::new(); + target_sql_resource_ids.insert(existing_view.name.clone()); + + let reconciled = reconcile_with_reality( + &project, + &infra_map, + &HashSet::new(), + &target_sql_resource_ids, + mock_client, + ) + .await + .unwrap(); + + // The view should be updated to match reality + assert_eq!(reconciled.sql_resources.len(), 1); + let reconciled_view = reconciled.sql_resources.get(&reality_view.name).unwrap(); + assert_eq!(reconciled_view.setup, reality_view.setup); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 936f39e168..3d932f8a8d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -46,17 +46,20 @@ use queries::{ use serde::{Deserialize, Serialize}; use sql_parser::{ extract_engine_from_create_table, extract_indexes_from_create_table, - extract_sample_by_from_create_table, extract_table_settings_from_create_table, + extract_sample_by_from_create_table, extract_source_tables_from_query, + extract_table_settings_from_create_table, normalize_sql_for_comparison, split_qualified_name, }; use std::collections::HashMap; use std::ops::Deref; use std::sync::LazyLock; use self::model::ClickHouseSystemTable; +use crate::framework::core::infrastructure::sql_resource::SqlResource; use crate::framework::core::infrastructure::table::{ Column, ColumnMetadata, ColumnType, DataEnum, EnumMember, EnumValue, EnumValueMetadata, OrderBy, Table, TableIndex, METADATA_PREFIX, }; +use crate::framework::core::infrastructure::InfrastructureSignature; use crate::framework::core::infrastructure_map::{PrimitiveSignature, PrimitiveTypes}; use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::versions::Version; @@ -1964,6 +1967,284 @@ impl OlapOperations for ConfiguredDBClient { ); Ok((tables, unsupported_tables)) } + + /// Retrieves all SQL resources (views and materialized views) from the ClickHouse database + /// + /// # Arguments + /// * `db_name` - The name of the database to list SQL resources from + /// * `default_database` - The default database name for resolving unqualified table references + /// + /// # Returns + /// * `Result, OlapChangesError>` - A list of SqlResource objects + /// + /// # Details + /// This implementation: + /// 1. Queries system.tables for views and materialized views + /// 2. Parses the CREATE statements to extract dependencies + /// 3. Reconstructs SqlResource objects with setup and teardown scripts + /// 4. Extracts data lineage (pulls_data_from and pushes_data_to) + async fn list_sql_resources( + &self, + db_name: &str, + default_database: &str, + ) -> Result, OlapChangesError> { + debug!( + "Starting list_sql_resources operation for database: {}", + db_name + ); + + // We query `as_select` from system.tables to get the clean SELECT statement + // without the view's column definitions (e.g., `CREATE VIEW v (col1 Type) AS ...`). + // This avoids complex parsing logic to strip those columns manually. + let query = format!( + r#" + SELECT + name, + database, + engine, + create_table_query, + as_select + FROM system.tables + WHERE database = '{}' + AND engine IN ('View', 'MaterializedView') + AND NOT name LIKE '.%' + ORDER BY name + "#, + db_name + ); + debug!("Executing SQL resources query: {}", query); + + let mut cursor = self + .client + .query(&query) + .fetch::<(String, String, String, String, String)>() + .map_err(|e| { + debug!("Error fetching SQL resources: {}", e); + OlapChangesError::DatabaseError(e.to_string()) + })?; + + let mut sql_resources = Vec::new(); + + while let Some((name, database, engine, create_query, as_select)) = cursor + .next() + .await + .map_err(|e| OlapChangesError::DatabaseError(e.to_string()))? + { + debug!("Processing SQL resource: {} (engine: {})", name, engine); + debug!("Create query: {}", create_query); + + // Reconstruct SqlResource based on engine type + let sql_resource = match engine.as_str() { + "MaterializedView" => reconstruct_sql_resource_from_mv( + name, + create_query, + as_select, + database, + default_database, + )?, + "View" => { + reconstruct_sql_resource_from_view(name, as_select, database, default_database)? + } + _ => { + warn!("Unexpected engine type for SQL resource: {}", engine); + continue; + } + }; + + sql_resources.push(sql_resource); + } + + debug!( + "Completed list_sql_resources operation, found {} SQL resources", + sql_resources.len() + ); + Ok(sql_resources) + } +} + +static MATERIALIZED_VIEW_TO_PATTERN: LazyLock = LazyLock::new(|| { + // Pattern to extract TO from CREATE MATERIALIZED VIEW + regex::Regex::new(r"(?i)\bTO\s+([a-zA-Z0-9_.`]+)") + .expect("MATERIALIZED_VIEW_TO_PATTERN regex should compile") +}); + +/// Reconstructs a SqlResource from a materialized view's CREATE statement +/// +/// # Arguments +/// * `name` - The name of the materialized view +/// * `create_query` - The CREATE MATERIALIZED VIEW statement from ClickHouse +/// * `as_select` - The SELECT part of the query (clean, from system.tables) +/// * `database` - The database where the view is located +/// * `default_database` - The default database for resolving unqualified table references +/// +/// # Returns +/// * `Result` - The reconstructed SqlResource +fn reconstruct_sql_resource_from_mv( + name: String, + create_query: String, + as_select: String, + _database: String, + default_database: &str, +) -> Result { + // Extract target table from create_query + // We use a regex on the raw create_query because it's simpler than full parsing and + // avoids issues with ClickHouse's specific column definition syntax in CREATE statements. + // as_select doesn't contain the TO clause, so we must check the original query. + let target_table = MATERIALIZED_VIEW_TO_PATTERN + .captures(&create_query) + .and_then(|caps| caps.get(1)) + .map(|m| m.as_str().replace('`', "")) // Strip backticks from table name + .ok_or_else(|| { + OlapChangesError::DatabaseError(format!( + "Could not find TO target in materialized view definition: {}", + name + )) + })?; + + // Reconstruct the canonical CREATE statement + // CREATE MATERIALIZED VIEW IF NOT EXISTS name TO target AS select + let setup_raw = format!( + "CREATE MATERIALIZED VIEW IF NOT EXISTS {} TO {} AS {}", + name, target_table, as_select + ); + + // Normalize the SQL for consistent comparison with user-defined views + let setup = normalize_sql_for_comparison(&setup_raw, default_database); + + // Generate teardown script + let teardown = format!("DROP VIEW IF EXISTS `{}`", name); + + // Parse as_select to get source tables (lineage) + // as_select is clean SQL, so we can use standard parser logic + let source_tables = extract_source_tables_from_query(&as_select).map_err(|e| { + OlapChangesError::DatabaseError(format!( + "Failed to extract source tables from MV {}: {}", + name, e + )) + })?; + + // Extract pulls_data_from (source tables) + let pulls_data_from = source_tables + .into_iter() + .map(|table_ref| { + // Get the table name, strip version suffix if present + let table_name = table_ref.table; + let (base_name, _version) = extract_version_from_table_name(&table_name); + + // Use database from table reference if available, otherwise use default + let qualified_id = if let Some(db) = table_ref.database { + if db == default_database { + base_name + } else { + format!("{}_{}", db, base_name) + } + } else { + base_name + }; + + InfrastructureSignature::Table { id: qualified_id } + }) + .collect(); + + // Extract pushes_data_to (target table for MV) + let (target_base_name, _version) = extract_version_from_table_name(&target_table); + // target_table might be qualified (db.table) + let (target_db, target_name_only) = split_qualified_name(&target_base_name); + + let target_qualified_id = if let Some(target_db) = target_db { + if target_db == default_database { + target_name_only + } else { + format!("{}_{}", target_db, target_name_only) + } + } else { + target_name_only + }; + + let pushes_data_to = vec![InfrastructureSignature::Table { + id: target_qualified_id, + }]; + + Ok(SqlResource { + name, + database: Some(_database), + setup: vec![setup], + teardown: vec![teardown], + pulls_data_from, + pushes_data_to, + }) +} + +/// Reconstructs a SqlResource from a view's CREATE statement +/// +/// # Arguments +/// * `name` - The name of the view +/// * `as_select` - The SELECT part of the query (clean, from system.tables) +/// * `database` - The database where the view is located +/// * `default_database` - The default database for resolving unqualified table references +/// +/// # Returns +/// * `Result` - The reconstructed SqlResource +fn reconstruct_sql_resource_from_view( + name: String, + as_select: String, + _database: String, + default_database: &str, +) -> Result { + // Reconstruct the canonical CREATE statement using as_select. + // This ensures we have a clean definition without column types in the header, + // matching how Moose generates views (just `CREATE VIEW ... AS SELECT ...`). + // CREATE VIEW IF NOT EXISTS name AS select + let setup_raw = format!("CREATE VIEW IF NOT EXISTS {} AS {}", name, as_select); + + // Normalize the SQL for consistent comparison with user-defined views + let setup = normalize_sql_for_comparison(&setup_raw, default_database); + + // Generate teardown script + let teardown = format!("DROP VIEW IF EXISTS `{}`", name); + + // Parse as_select to get source tables (lineage) + let source_tables = extract_source_tables_from_query(&as_select).map_err(|e| { + OlapChangesError::DatabaseError(format!( + "Failed to extract source tables from View {}: {}", + name, e + )) + })?; + + // Extract pulls_data_from (source tables) + let pulls_data_from = source_tables + .into_iter() + .map(|table_ref| { + // Get the table name, strip version suffix if present + let table_name = table_ref.table; + let (base_name, _version) = extract_version_from_table_name(&table_name); + + // Use database from table reference if available, otherwise use default + let qualified_id = if let Some(db) = table_ref.database { + if db == default_database { + base_name + } else { + format!("{}_{}", db, base_name) + } + } else { + base_name + }; + + InfrastructureSignature::Table { id: qualified_id } + }) + .collect(); + + // Regular views don't push data to tables + let pushes_data_to = vec![]; + + Ok(SqlResource { + name, + database: Some(_database), + setup: vec![setup], + teardown: vec![teardown], + pulls_data_from, + pushes_data_to, + }) } /// Regex pattern to find keywords that terminate an ORDER BY clause diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs index 873008a0b6..f0a37cd903 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs @@ -5,11 +5,13 @@ use crate::infrastructure::olap::clickhouse::model::ClickHouseIndex; use sqlparser::ast::{ - Expr, ObjectName, Query, Select, SelectItem, Statement, TableFactor, TableWithJoins, + Expr, ObjectName, ObjectNamePart, Query, Select, SelectItem, SetExpr, Statement, TableFactor, + TableWithJoins, VisitMut, VisitorMut, }; use sqlparser::dialect::ClickHouseDialect; use sqlparser::parser::Parser; use std::collections::HashSet; +use std::ops::ControlFlow; #[derive(Debug, Clone, PartialEq)] pub struct MaterializedViewStatement { @@ -406,6 +408,166 @@ pub fn extract_indexes_from_create_table(sql: &str) -> Result { + default_database: &'a str, +} + +impl<'a> VisitorMut for Normalizer<'a> { + type Break = (); + + fn pre_visit_table_factor( + &mut self, + table_factor: &mut TableFactor, + ) -> ControlFlow { + if let TableFactor::Table { name, .. } = table_factor { + // Strip default database prefix + if name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + name.0.remove(0); + } + } + } + // Unquote table names + for part in &mut name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + } + ControlFlow::Continue(()) + } + + fn pre_visit_expr(&mut self, expr: &mut Expr) -> ControlFlow { + match expr { + Expr::Identifier(ident) => { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + Expr::Function(func) => { + // Uppercase function names (e.g. count -> COUNT) + if let Some(ObjectNamePart::Identifier(ident)) = func.name.0.last_mut() { + let upper = ident.value.to_uppercase(); + if matches!( + upper.as_str(), + "COUNT" + | "SUM" + | "AVG" + | "MIN" + | "MAX" + | "ABS" + | "COALESCE" + | "IF" + | "DISTINCT" + ) { + ident.value = upper; + } + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + _ => {} + } + ControlFlow::Continue(()) + } + + fn pre_visit_statement(&mut self, statement: &mut Statement) -> ControlFlow { + if let Statement::CreateView { name, to, .. } = statement { + // Strip default database prefix from view name + if name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + name.0.remove(0); + } + } + } + + for part in &mut name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + if let Some(to_name) = to { + // Strip default database prefix from TO table + if to_name.0.len() == 2 { + if let ObjectNamePart::Identifier(ident) = &to_name.0[0] { + if ident.value.eq_ignore_ascii_case(self.default_database) { + to_name.0.remove(0); + } + } + } + + for part in &mut to_name.0 { + if let ObjectNamePart::Identifier(ident) = part { + ident.quote_style = None; + ident.value = ident.value.replace('`', ""); + } + } + } + } + ControlFlow::Continue(()) + } + + fn pre_visit_query(&mut self, query: &mut Query) -> ControlFlow { + // Handle SELECT items (including aliases) + if let SetExpr::Select(select) = &mut *query.body { + for item in &mut select.projection { + if let SelectItem::ExprWithAlias { alias, .. } = item { + alias.quote_style = None; + alias.value = alias.value.replace('`', ""); + } + } + } + ControlFlow::Continue(()) + } +} + +pub fn normalize_sql_for_comparison(sql: &str, default_database: &str) -> String { + // 1. Parse with sqlparser (AST-based structural normalization) + // This handles stripping default database prefixes (e.g., `local.Table` -> `Table`) + // and basic unquoting where the parser understands the structure. + let dialect = ClickHouseDialect {}; + let intermediate = match Parser::parse_sql(&dialect, sql) { + Ok(mut ast) => { + if ast.is_empty() { + return sql.trim().to_string(); + } + + // 2. Walk AST to normalize (strip database prefixes, unquote) + let mut normalizer = Normalizer { default_database }; + for statement in &mut ast { + let _ = statement.visit(&mut normalizer); + } + + // 3. Convert back to string + ast[0].to_string() + } + Err(_e) => { + // Fallback if parsing fails: rudimentary string replacement + let mut result = sql.to_string(); + if !default_database.is_empty() { + let prefix_pattern = format!("{}.", default_database); + result = result.replace(&prefix_pattern, ""); + } + result + } + }; + + intermediate.trim().to_string() +} + pub fn parse_create_materialized_view( sql: &str, ) -> Result { @@ -447,7 +609,7 @@ pub fn parse_create_materialized_view( let select_statement = format!("{}", query); // Extract source tables from the query - let source_tables = extract_source_tables_from_query(query)?; + let source_tables = extract_source_tables_from_query_ast(query)?; Ok(MaterializedViewStatement { view_name, @@ -483,7 +645,7 @@ pub fn parse_insert_select(sql: &str) -> Result String { format!("{}", name).replace('`', "") } -fn split_qualified_name(name: &str) -> (Option, String) { +pub fn split_qualified_name(name: &str) -> (Option, String) { if let Some(dot_pos) = name.rfind('.') { let database = name[..dot_pos].to_string(); let table = name[dot_pos + 1..].to_string(); @@ -525,7 +687,25 @@ fn split_qualified_name(name: &str) -> (Option, String) { } } -fn extract_source_tables_from_query(query: &Query) -> Result, SqlParseError> { +pub fn extract_source_tables_from_query(sql: &str) -> Result, SqlParseError> { + let dialect = ClickHouseDialect {}; + let ast = Parser::parse_sql(&dialect, sql)?; + + if ast.len() != 1 { + // Should be exactly one query + return Err(SqlParseError::UnsupportedStatement); + } + + if let Statement::Query(query) = &ast[0] { + extract_source_tables_from_query_ast(query) + } else { + Err(SqlParseError::UnsupportedStatement) + } +} + +fn extract_source_tables_from_query_ast( + query: &Query, +) -> Result, SqlParseError> { let mut tables = HashSet::new(); extract_tables_from_query_recursive(query, &mut tables)?; Ok(tables.into_iter().collect()) @@ -1557,4 +1737,80 @@ pub mod tests { let indexes = extract_indexes_from_create_table(NESTED_OBJECTS_SQL).unwrap(); assert_eq!(indexes.len(), 0); } + + #[test] + fn test_normalize_sql_removes_backticks() { + let input = "SELECT `column1`, `column2` FROM `table_name`"; + let result = normalize_sql_for_comparison(input, ""); + assert!(!result.contains('`')); + assert!(result.contains("column1")); + assert!(result.contains("table_name")); + } + + #[test] + fn test_normalize_sql_uppercases_keywords() { + let input = "select count(id) as total from users where active = true"; + let result = normalize_sql_for_comparison(input, ""); + assert!(result.contains("SELECT")); + assert!(result.contains("COUNT")); + assert!(result.contains("AS")); + assert!(result.contains("FROM")); + assert!(result.contains("WHERE")); + } + + #[test] + fn test_normalize_sql_collapses_whitespace() { + let input = "SELECT\n col1,\n col2\n FROM\n my_table"; + let result = normalize_sql_for_comparison(input, ""); + assert!(!result.contains('\n')); + assert_eq!(result, "SELECT col1, col2 FROM my_table"); + } + + #[test] + fn test_normalize_sql_removes_database_prefix() { + let input = "SELECT * FROM mydb.table1 JOIN mydb.table2"; + let result = normalize_sql_for_comparison(input, "mydb"); + assert!(!result.contains("mydb.")); + assert!(result.contains("table1")); + assert!(result.contains("table2")); + } + + #[test] + fn test_normalize_sql_comprehensive() { + // Test with all differences at once + let user_sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS `MV`\n TO `Target`\n AS SELECT\n count(`id`) as total\n FROM `Source`"; + let ch_sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS MV TO Target AS SELECT COUNT(id) AS total FROM Source"; + + let normalized_user = normalize_sql_for_comparison(user_sql, ""); + let normalized_ch = normalize_sql_for_comparison(ch_sql, ""); + + assert_eq!(normalized_user, normalized_ch); + } + + #[test] + fn test_normalize_sql_with_database_prefix() { + let user_sql = "CREATE VIEW `MyView` AS SELECT `col` FROM `MyTable`"; + let ch_sql = "CREATE VIEW local.MyView AS SELECT col FROM local.MyTable"; + + let normalized_user = normalize_sql_for_comparison(user_sql, "local"); + let normalized_ch = normalize_sql_for_comparison(ch_sql, "local"); + + assert_eq!(normalized_user, normalized_ch); + } + + #[test] + fn test_normalize_sql_handles_backticks_on_reserved_keyword_aliases() { + // ClickHouse automatically adds backticks around reserved keywords like "table" + let ch_sql = "CREATE MATERIALIZED VIEW mv AS SELECT date, 'value' AS `table` FROM source"; + // User code typically doesn't have backticks + let user_sql = "CREATE MATERIALIZED VIEW mv AS SELECT date, 'value' AS table FROM source"; + + let normalized_ch = normalize_sql_for_comparison(ch_sql, ""); + let normalized_user = normalize_sql_for_comparison(user_sql, ""); + + assert_eq!(normalized_ch, normalized_user); + // Both should normalize to the version without backticks + assert!(normalized_ch.contains("AS table")); + assert!(!normalized_ch.contains("AS `table`")); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index d3f5644820..ca9589426b 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -2215,6 +2215,7 @@ mod tests { // Create SQL resource for a materialized view let mv_sql_resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2360,6 +2361,7 @@ mod tests { // Create SQL resource for a materialized view let mv_sql_resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2388,7 +2390,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table A depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: mv_sql_resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2400,7 +2402,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table B depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: mv_sql_resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2509,6 +2511,7 @@ mod tests { // Create SQL resource for materialized view let resource = SqlResource { name: "mv_a_to_b".to_string(), + database: None, setup: vec![ "CREATE MATERIALIZED VIEW mv_a_to_b TO table_b AS SELECT * FROM table_a" .to_string(), @@ -2615,7 +2618,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table A depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: resource.name.clone(), }], pushes_data_to: vec![], }, @@ -2626,7 +2629,7 @@ mod tests { dependency_info: DependencyInfo { // For teardown: Table B depends on MV being gone first pulls_data_from: vec![InfrastructureSignature::SqlResource { - id: "mv_a_to_b".to_string(), + id: resource.name.clone(), }], pushes_data_to: vec![], }, diff --git a/apps/framework-cli/src/infrastructure/olap/mod.rs b/apps/framework-cli/src/infrastructure/olap/mod.rs index 9df6920774..ac80ec4d9e 100644 --- a/apps/framework-cli/src/infrastructure/olap/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/mod.rs @@ -1,5 +1,6 @@ use clickhouse::ClickhouseChangesError; +use crate::framework::core::infrastructure::sql_resource::SqlResource; use crate::infrastructure::olap::clickhouse::TableWithUnsupportedType; use crate::{ framework::core::infrastructure::table::Table, framework::core::infrastructure_map::OlapChange, @@ -52,6 +53,30 @@ pub trait OlapOperations { db_name: &str, project: &Project, ) -> Result<(Vec
    , Vec), OlapChangesError>; + + /// Retrieves all SQL resources (views and materialized views) from the database + /// + /// # Arguments + /// + /// * `db_name` - The name of the database to list SQL resources from + /// * `default_database` - The default database name for resolving unqualified table references + /// + /// # Returns + /// + /// * `Result, OlapChangesError>` - A list of SqlResource objects + /// + /// # Errors + /// + /// Returns `OlapChangesError` if: + /// - The database connection fails + /// - The database doesn't exist + /// - The query execution fails + /// - SQL parsing fails + async fn list_sql_resources( + &self, + db_name: &str, + default_database: &str, + ) -> Result, OlapChangesError>; } /// This method dispatches the execution of the changes to the right olap storage. diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index 36cdaa2920..96887f6167 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -412,6 +412,9 @@ message SqlResource { repeated string teardown = 3; repeated InfrastructureSignature pulls_data_from = 5; repeated InfrastructureSignature pushes_data_to = 6; + // Optional database name for multi-database support + // When not specified, uses the global ClickHouse config database + optional string database = 7; } message InfrastructureSignature { diff --git a/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py b/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py index 87075aa027..fd3ff066ff 100644 --- a/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py +++ b/packages/py-moose-lib/moose_lib/streaming/streaming_function_runner.py @@ -318,6 +318,7 @@ def _sr_json_deserializer(m: bytes): sasl_mechanism=sasl_config.get("mechanism"), security_protocol=args.security_protocol, enable_auto_commit=False, # Disable auto-commit for at-least-once semantics + auto_offset_reset='earliest', ) consumer = get_kafka_consumer(**kwargs) return consumer From bf7abcb12f699b9220af4607bad4ec444c62834d Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Mon, 24 Nov 2025 21:45:07 -0500 Subject: [PATCH 45/59] Add safe-chain to nix infra (#3032) Wrote this today https://github.com/LucioFranco/safe-chain-nix > [!NOTE] > [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) is generating a summary for commit 5ecd96328100b71fb2946a886cf33ba57c582a34. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- flake.lock | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++- flake.nix | 22 ++++++++++++++------- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/flake.lock b/flake.lock index 5058b35559..5c221858bd 100644 --- a/flake.lock +++ b/flake.lock @@ -18,6 +18,24 @@ "type": "github" } }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1761373498, @@ -53,7 +71,8 @@ "inputs": { "flake-parts": "flake-parts", "nixpkgs": "nixpkgs", - "rust-overlay": "rust-overlay" + "rust-overlay": "rust-overlay", + "safe-chain-nix": "safe-chain-nix" } }, "rust-overlay": { @@ -75,6 +94,42 @@ "repo": "rust-overlay", "type": "github" } + }, + "safe-chain-nix": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1764035635, + "narHash": "sha256-BQXF3dmdSY9k6fS82HdmYjAsrU+Y3o7rDK8rh9oYY6g=", + "owner": "LucioFranco", + "repo": "safe-chain-nix", + "rev": "b286fdec32d947cd1f25c37b4b3c4ecca6f796b2", + "type": "github" + }, + "original": { + "owner": "LucioFranco", + "repo": "safe-chain-nix", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 95ec173016..f2617ad586 100644 --- a/flake.nix +++ b/flake.nix @@ -8,6 +8,10 @@ url = "github:oxalica/rust-overlay"; inputs.nixpkgs.follows = "nixpkgs"; }; + safe-chain-nix = { + url = "github:LucioFranco/safe-chain-nix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; }; outputs = @@ -36,6 +40,9 @@ overlays = [ (import inputs.rust-overlay) ]; }; + # Safe-chain wrapper for malware protection + safeChain = inputs.safe-chain-nix.lib.${system}.safeChain; + # Rust toolchain rustToolchain = pkgs.rust-bin.stable.latest.default.override { extensions = [ @@ -45,19 +52,20 @@ ]; }; - # Node.js with PNPM - nodejs = pkgs.nodejs_20; + # Node.js with PNPM (wrapped with safe-chain for malware protection) + nodejs = safeChain.wrapNode pkgs.nodejs_20; pnpm = pkgs.pnpm; - # Python with required packages + # Python with required packages (wrapped with safe-chain for malware protection) python = pkgs.python312; - pythonEnv = python.withPackages ( + pythonEnv = (python.withPackages ( ps: with ps; [ pip setuptools wheel ] - ); + )); + wrappedPython = safeChain.wrapPython pythonEnv; # Common build inputs commonBuildInputs = @@ -137,11 +145,11 @@ name = "moose"; buildInputs = [ - # Languages + # Languages (with safe-chain malware protection) rustToolchain nodejs pnpm - pythonEnv + wrappedPython # Development tools pkgs.git From afe842dd9b9ac339be690fdb15c66082e666c5d3 Mon Sep 17 00:00:00 2001 From: Dave Seleno <958603+onelesd@users.noreply.github.com> Date: Mon, 24 Nov 2025 20:53:55 -0800 Subject: [PATCH 46/59] fix unhandled LowCardinality(String) column types in clickhouse_rs client (#3025) [clickhouse_rs](https://docs.rs/clickhouse-rs/latest/clickhouse_rs/) doesn't support selecting LowCardinality(String) but the http client does. this refactors moose peek and moose query to use the official [clickhouse](https://docs.rs/clickhouse/latest/clickhouse/) http client which is better supported. --- > [!NOTE] > Switches moose peek/query to the HTTP-based ClickHouse client with JSON output and removes the deprecated clickhouse-rs client and related code. > > - **CLI (ClickHouse query path)**: > - Replace native `clickhouse-rs` usage with HTTP `clickhouse` client in `cli/routines/{peek,query}.rs`. > - Add `infrastructure/olap/clickhouse_http_client.rs` with JSONEachRow HTTP querying via `reqwest`. > - Remove legacy `clickhouse_alt_client.rs`; update call sites and error handling. > - **OLAP ClickHouse module**: > - Clean up `mod.rs` by dropping `clickhouse_rs`-specific code (e.g., `check_table_size`). > - **Core/Planning**: > - Remove `PlanningError::Clickhouse` variant tied to `clickhouse_rs`. > - **Dependencies**: > - Remove `clickhouse-rs` from `apps/framework-cli/Cargo.toml` (keep `clickhouse` crate); lockfile updates accordingly. > - **Docs**: > - Add Rust CLI debug invocation instructions in `AGENTS.md`. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit dddd524023be049c3a4f33f65282d2b0c20558e5. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- AGENTS.md | 5 + Cargo.lock | 123 ------ apps/framework-cli/Cargo.toml | 1 - apps/framework-cli/src/cli/routines/peek.rs | 83 ++-- apps/framework-cli/src/cli/routines/query.rs | 75 ++-- apps/framework-cli/src/framework/core/plan.rs | 4 - .../src/infrastructure/olap/clickhouse/mod.rs | 47 +-- .../olap/clickhouse_alt_client.rs | 360 ------------------ .../olap/clickhouse_http_client.rs | 108 ++++++ .../src/infrastructure/olap/mod.rs | 2 +- 10 files changed, 198 insertions(+), 610 deletions(-) delete mode 100644 apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs create mode 100644 apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs diff --git a/AGENTS.md b/AGENTS.md index 142f137fbd..1e4c0fa1ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -15,6 +15,11 @@ Multi-language monorepo (Rust CLI + TypeScript/Python libraries) using PNPM work ### Rust - **Build**: `cargo build` +- **Debug CLI**: Use debug build with verbose logging for ALL moose CLI commands: + ```bash + RUST_LOG=debug RUST_BACKTRACE=1 MOOSE_LOGGER__LEVEL=Debug ~/repos/moosestack/target/debug/moose-cli + ``` + Example: `RUST_LOG=debug RUST_BACKTRACE=1 MOOSE_LOGGER__LEVEL=Debug ~/repos/moosestack/target/debug/moose-cli init my-app typescript` - **Test all**: `cargo test` - **Test single**: `cargo test ` or `cargo test --package --test ` - **Lint**: `cargo clippy --all-targets -- -D warnings` (REQUIRED pre-commit, no warnings allowed) diff --git a/Cargo.lock b/Cargo.lock index e573711f5b..efb795dffe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,28 +556,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "chrono-tz" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" -dependencies = [ - "chrono", - "chrono-tz-build", - "phf", -] - -[[package]] -name = "chrono-tz-build" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" -dependencies = [ - "parse-zoneinfo", - "phf", - "phf_codegen", -] - [[package]] name = "cityhash-rs" version = "1.0.1" @@ -664,45 +642,6 @@ dependencies = [ "syn", ] -[[package]] -name = "clickhouse-rs" -version = "1.1.0-alpha.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802fe62a5480415bcdbb5217b3ea029d748c9a3ce3b884767cf58888e33e7f65" -dependencies = [ - "byteorder", - "chrono", - "chrono-tz", - "clickhouse-rs-cityhash-sys", - "combine", - "crossbeam", - "either", - "futures-core", - "futures-sink", - "futures-util", - "hostname", - "lazy_static", - "log", - "lz4", - "native-tls", - "percent-encoding", - "pin-project", - "thiserror 1.0.69", - "tokio", - "tokio-native-tls", - "url", - "uuid", -] - -[[package]] -name = "clickhouse-rs-cityhash-sys" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4baf9d4700a28d6cb600e17ed6ae2b43298a5245f1f76b4eab63027ebfd592b9" -dependencies = [ - "cc", -] - [[package]] name = "clickhouse-types" version = "0.1.0" @@ -844,19 +783,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -1787,17 +1713,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "hostname" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" -dependencies = [ - "libc", - "match_cfg", - "winapi", -] - [[package]] name = "http" version = "0.2.12" @@ -2572,9 +2487,6 @@ name = "log" version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" -dependencies = [ - "serde", -] [[package]] name = "logos" @@ -2619,25 +2531,6 @@ dependencies = [ "hashbrown 0.15.5", ] -[[package]] -name = "lz4" -version = "1.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" -dependencies = [ - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "lz4_flex" version = "0.11.5" @@ -2702,12 +2595,6 @@ dependencies = [ "malachite-nz", ] -[[package]] -name = "match_cfg" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" - [[package]] name = "matchers" version = "0.2.0" @@ -2870,7 +2757,6 @@ dependencies = [ "chrono", "clap", "clickhouse", - "clickhouse-rs", "comfy-table", "config", "constant_time_eq", @@ -3463,15 +3349,6 @@ dependencies = [ "windows-link 0.2.1", ] -[[package]] -name = "parse-zoneinfo" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" -dependencies = [ - "regex", -] - [[package]] name = "password-hash" version = "0.5.0" diff --git a/apps/framework-cli/Cargo.toml b/apps/framework-cli/Cargo.toml index f159eaa128..1af2883114 100644 --- a/apps/framework-cli/Cargo.toml +++ b/apps/framework-cli/Cargo.toml @@ -28,7 +28,6 @@ serde = { version = "1.0", features = ["derive"] } config = { version = "0.13.1", features = ["toml"] } home = "0.5.5" clickhouse = { version = "0.14.0", features = ["uuid", "native-tls"] } -clickhouse-rs = { version = "1.1.0-alpha.1", features = ["tls"] } handlebars = "5.1" rdkafka = { version = "0.38", features = ["ssl"] } rdkafka-sys = "4.7" # Needed for rd_kafka_wait_destroyed diff --git a/apps/framework-cli/src/cli/routines/peek.rs b/apps/framework-cli/src/cli/routines/peek.rs index 7e8db9bdd1..affadb731c 100644 --- a/apps/framework-cli/src/cli/routines/peek.rs +++ b/apps/framework-cli/src/cli/routines/peek.rs @@ -6,7 +6,7 @@ use crate::cli::display::Message; use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::infrastructure::olap::clickhouse::mapper::std_table_to_clickhouse_table; -use crate::infrastructure::olap::clickhouse_alt_client::{get_pool, select_some_as_json}; +use crate::infrastructure::olap::clickhouse_http_client::create_query_client; use crate::project::Project; use super::{setup_redis_client, RoutineFailure, RoutineSuccess}; @@ -49,13 +49,8 @@ pub async fn peek( file: Option, is_stream: bool, ) -> Result { - let pool = get_pool(&project.clickhouse_config); - let mut client = pool.get_handle().await.map_err(|_| { - RoutineFailure::error(Message::new( - "Failed".to_string(), - "Error connecting to storage".to_string(), - )) - })?; + // Get HTTP-based ClickHouse client + let client = create_query_client(&project.clickhouse_config); let redis_client = setup_redis_client(project.clone()).await.map_err(|e| { RoutineFailure::error(Message { @@ -163,22 +158,64 @@ pub async fn peek( )) })?; - Box::pin( - select_some_as_json( - &project.clickhouse_config.db_name, - &table_ref, - &mut client, - limit as i64, - ) - .await - .map_err(|_| { - RoutineFailure::error(Message::new( - "Failed".to_string(), - "Error selecting data".to_string(), - )) - })? - .map(|result| anyhow::Ok(result?)), + // Build the SELECT query + let order_by = match &table_ref.order_by { + crate::framework::core::infrastructure::table::OrderBy::Fields(fields) + if !fields.is_empty() => + { + format!( + "ORDER BY {}", + crate::infrastructure::olap::clickhouse::model::wrap_and_join_column_names( + fields, ", " + ) + ) + } + crate::framework::core::infrastructure::table::OrderBy::SingleExpr(expr) => { + format!("ORDER BY {expr}") + } + _ => { + // Fall back to primary key + let key_columns: Vec = table_ref + .primary_key_columns() + .iter() + .map(|s| s.to_string()) + .collect(); + + if key_columns.is_empty() { + "".to_string() + } else { + format!( + "ORDER BY {}", + crate::infrastructure::olap::clickhouse::model::wrap_and_join_column_names( + &key_columns, + ", " + ) + ) + } + } + }; + + let query = format!( + "SELECT * FROM \"{}\".\"{}\" {} LIMIT {}", + project.clickhouse_config.db_name, table_ref.name, order_by, limit + ); + + info!("Peek query: {}", query); + + // Execute query + let rows = crate::infrastructure::olap::clickhouse_http_client::query_as_json_stream( + &client, &query, ) + .await + .map_err(|e| { + RoutineFailure::error(Message::new( + "Peek".to_string(), + format!("ClickHouse query error: {}", e), + )) + })?; + + // Convert Vec to stream + Box::pin(tokio_stream::iter(rows.into_iter().map(anyhow::Ok))) }; let mut success_count = 0; diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs index bb060310ff..44405801c4 100644 --- a/apps/framework-cli/src/cli/routines/query.rs +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -6,10 +6,9 @@ use crate::cli::display::Message; use crate::cli::routines::{setup_redis_client, RoutineFailure, RoutineSuccess}; use crate::framework::core::infrastructure_map::InfrastructureMap; -use crate::infrastructure::olap::clickhouse_alt_client::{get_pool, row_to_json}; +use crate::infrastructure::olap::clickhouse_http_client::create_query_client; use crate::project::Project; -use futures::StreamExt; use log::info; use std::io::Read; use std::path::PathBuf; @@ -113,15 +112,8 @@ pub async fn query( info!("Executing SQL: {}", sql_query); - // Get ClickHouse connection pool - let pool = get_pool(&project.clickhouse_config); - - let mut client = pool.get_handle().await.map_err(|_| { - RoutineFailure::error(Message::new( - "Failed".to_string(), - "Error connecting to storage".to_string(), - )) - })?; + // Get HTTP-based ClickHouse client + let client = create_query_client(&project.clickhouse_config); let redis_client = setup_redis_client(project.clone()).await.map_err(|e| { RoutineFailure::error(Message { @@ -131,7 +123,6 @@ pub async fn query( })?; // Validate that infrastructure state exists and is accessible. - // The value is not used further, but we fail early if it cannot be loaded. let _infra = InfrastructureMap::load_from_redis(&redis_client) .await .map_err(|_| { @@ -147,40 +138,27 @@ pub async fn query( )) })?; - // Execute query and stream results - let mut stream = client.query(&sql_query).stream(); - - let mut success_count = 0; - let mut enum_mappings: Vec>> = Vec::new(); - - while let Some(row_result) = stream.next().await { - let row = match row_result { - Ok(row) => row, - Err(e) => { - return Err(RoutineFailure::new( - Message::new("Query".to_string(), "ClickHouse query error".to_string()), - e, - )); - } - }; - - // Create enum mappings on first row (one None entry per column) - if enum_mappings.is_empty() { - enum_mappings = vec![None; row.len()]; - } + // Execute query and get results + let rows = crate::infrastructure::olap::clickhouse_http_client::query_as_json_stream( + &client, &sql_query, + ) + .await + .map_err(|e| { + RoutineFailure::error(Message::new( + "Query".to_string(), + format!("ClickHouse query error: {}", e), + )) + })?; - // Reuse peek's row_to_json with enum mappings - let value = row_to_json(&row, &enum_mappings).map_err(|e| { - RoutineFailure::new( - Message::new( - "Query".to_string(), - "Failed to convert row to JSON".to_string(), - ), - e, - ) - })?; + // Stream results to stdout + let success_count = rows.len().min(limit as usize); + for (idx, row) in rows.iter().enumerate() { + if idx >= limit as usize { + info!("Reached limit of {} rows", limit); + break; + } - let json = serde_json::to_string(&value).map_err(|e| { + let json = serde_json::to_string(row).map_err(|e| { RoutineFailure::new( Message::new( "Query".to_string(), @@ -192,16 +170,9 @@ pub async fn query( println!("{}", json); info!("{}", json); - success_count += 1; - - // Check limit to avoid unbounded queries - if success_count >= limit { - info!("Reached limit of {} rows", limit); - break; - } } - // Add newline for output cleanliness (like peek does) + // Add newline for output cleanliness println!(); Ok(RoutineSuccess::success(Message::new( diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index d02d7765df..c3978121c2 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -38,10 +38,6 @@ pub enum PlanningError { #[error("Failed to load primitive map")] PrimitiveMapLoading(#[from] crate::framework::core::primitive_map::PrimitiveMapLoadingError), - /// Error occurred while connecting to the Clickhouse database - #[error("Failed to connect to state storage")] - Clickhouse(#[from] clickhouse_rs::errors::Error), - /// Error occurred while connecting to Kafka #[error("Failed to connect to streaming engine")] Kafka(#[from] KafkaError), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 3d932f8a8d..87a820c531 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -14,7 +14,6 @@ //! //! ## Dependencies //! - clickhouse: Client library for ClickHouse database -//! - clickhouse-rs: Alternative ClickHouse client //! - Framework core types and infrastructure //! //! ## Version Support @@ -32,9 +31,8 @@ //! ``` use clickhouse::Client; -use clickhouse_rs::ClientHandle; + use errors::ClickhouseError; -use itertools::Itertools; use log::{debug, info, warn}; use mapper::{std_column_to_clickhouse_column, std_table_to_clickhouse_table}; use model::ClickHouseColumn; @@ -1499,49 +1497,6 @@ pub async fn fetch_tables_with_version( Ok(tables) } -/// Gets the number of rows in a table -/// -/// # Arguments -/// * `table_name` - Name of the table to check -/// * `config` - ClickHouse configuration -/// * `clickhouse` - Client handle for database operations -/// -/// # Returns -/// * `Result` - Number of rows in the table -/// -/// # Details -/// - Uses COUNT(*) for accurate row count -/// - Properly escapes table and database names -/// - Handles empty tables correctly -/// -/// # Example -/// ```rust -/// let size = check_table_size("users_1_0_0", &config, &mut client).await?; -/// println!("Table has {} rows", size); -/// ``` -pub async fn check_table_size( - table_name: &str, - config: &ClickHouseConfig, - clickhouse: &mut ClientHandle, -) -> Result { - info!("Checking size of {} table", table_name); - let result = clickhouse - .query(&format!( - "select count(*) from \"{}\".\"{}\"", - config.db_name.clone(), - table_name - )) - .fetch_all() - .await?; - let rows = result.rows().collect_vec(); - - let result: u64 = match rows.len() { - 1 => rows[0].get(0)?, - _ => panic!("Expected 1 result, got {:?}", rows.len()), - }; - Ok(result as i64) -} - pub struct TableWithUnsupportedType { pub database: String, pub name: String, diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs deleted file mode 100644 index 6924750754..0000000000 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse_alt_client.rs +++ /dev/null @@ -1,360 +0,0 @@ -/// # ClickHouse Alternative Client Module -/// -/// This module provides an alternative client implementation for interacting with ClickHouse. -/// It focuses on JSON serialization of query results. -/// -/// The module includes functionality for: -/// - Converting ClickHouse data types to JSON -/// - Querying tables and returning results as JSON -/// -/// This client is used primarily for data exploration (e.g., the peek command). -use std::num::TryFromIntError; -use std::str::FromStr; -use std::time::Duration; - -use chrono::{DateTime, Days, NaiveDate}; -use clickhouse_rs::errors::FromSqlError; -use clickhouse_rs::types::{ColumnType, Row}; -use clickhouse_rs::types::{FromSql, FromSqlResult, Options, ValueRef}; -use clickhouse_rs::ClientHandle; -use futures::stream::BoxStream; -use futures::StreamExt; -use itertools::{Either, Itertools}; -use log::{info, warn}; -use serde::Serialize; -use serde_json::{json, Map, Value}; - -use crate::framework::core::infrastructure::table::{EnumValue, OrderBy}; -use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; -use crate::infrastructure::olap::clickhouse::model::{ - wrap_and_join_column_names, ClickHouseColumnType, ClickHouseTable, -}; - -/// Creates a ClickHouse connection pool with the provided configuration. -/// -/// # Arguments -/// * `click_house_config` - ClickHouse configuration -/// -/// # Returns -/// * `clickhouse_rs::Pool` - Connection pool for ClickHouse -pub fn get_pool(click_house_config: &ClickHouseConfig) -> clickhouse_rs::Pool { - let address = format!( - "tcp://{}:{}", - click_house_config.host, click_house_config.native_port - ); - - if click_house_config.use_ssl && click_house_config.native_port == 9000 { - warn!( - "The default secure native port is 9440 instead of 9000. You may get a timeout error." - ) - } - - clickhouse_rs::Pool::new( - Options::from_str(&address) - .unwrap() - .secure(click_house_config.use_ssl) - .connection_timeout(Duration::from_secs(20)) - .database(&click_house_config.db_name) - .username(&click_house_config.user) - .password(&click_house_config.password), - ) -} - -/// Wrapper for ValueRef to implement FromSql trait. -struct ValueRefWrapper<'a>(ValueRef<'a>); -impl<'a> FromSql<'a> for ValueRefWrapper<'a> { - fn from_sql(value: ValueRef<'a>) -> FromSqlResult> { - Ok(ValueRefWrapper(value)) - } -} - -/// Converts a ClickHouse ValueRef to a JSON Value. -/// -/// This function handles all ClickHouse data types and converts them to appropriate -/// JSON representations. It also handles enum mappings for string enums. -/// -/// # Arguments -/// * `value_ref` - ClickHouse value reference -/// * `enum_mapping` - Optional mapping for enum values -/// -/// # Returns -/// * `Result` - JSON value or error -fn value_to_json( - value_ref: &ValueRef, - enum_mapping: &Option>, -) -> Result { - let result = match value_ref { - ValueRef::Bool(v) => json!(v), - ValueRef::UInt8(v) => json!(v), - ValueRef::UInt16(v) => json!(v), - ValueRef::UInt32(v) => json!(v), - ValueRef::UInt64(v) => json!(v), - ValueRef::Int8(v) => json!(v), - ValueRef::Int16(v) => json!(v), - ValueRef::Int32(v) => json!(v), - ValueRef::Int64(v) => json!(v), - // TODO: base64 encode if type is Bytes (probably Uint8Array in TS) - // In clickhouse the String type means arbitrary bytes - ValueRef::String(v) => json!(String::from_utf8_lossy(v)), - ValueRef::Float32(v) => json!(v), - ValueRef::Float64(v) => json!(v), - ValueRef::Date(v) => { - let unix_epoch = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); - let naive_date = unix_epoch.checked_add_days(Days::new((*v).into())).ok_or( - clickhouse_rs::errors::Error::FromSql(FromSqlError::OutOfRange), - )?; - json!(naive_date.to_string()) - } - - // in the following two cases the timezones are dropped - ValueRef::DateTime(t, _tz) => { - json!(DateTime::from_timestamp((*t).into(), 0) - .ok_or(clickhouse_rs::errors::Error::FromSql( - FromSqlError::OutOfRange - ))? - .to_rfc3339()) - } - ValueRef::DateTime64(value, (precision, _tz)) => { - // See to_datetime_opt in clickhouse-rs - let base10: i64 = 10; - - let nano = if *precision < 19 { - value * base10.pow(9 - precision) - } else { - 0_i64 - }; - - let sec = nano / 1_000_000_000; - let nsec: u32 = (nano - sec * 1_000_000_000).try_into().unwrap(); // always in range - - json!(DateTime::from_timestamp(sec, nsec).ok_or( - clickhouse_rs::errors::Error::FromSql(FromSqlError::OutOfRange) - )?) - } - - ValueRef::Nullable(Either::Left(_)) => Value::Null, - ValueRef::Nullable(Either::Right(v)) => value_to_json(v.as_ref(), enum_mapping)?, - ValueRef::Array(_t, values) => json!(values - .iter() - .map(|v| value_to_json(v, enum_mapping)) - .collect::, clickhouse_rs::errors::Error>>()?), - ValueRef::Decimal(d) => json!(f64::from(d.clone())), // consider using arbitrary_precision in serde_json - ValueRef::Uuid(_) => json!(value_ref.to_string()), - ValueRef::Enum16(_mapping, i) => convert_enum(i.internal(), enum_mapping), - ValueRef::Enum8(_mapping, i) => convert_enum(i.internal(), enum_mapping), - ValueRef::Ipv4(ip) => { - let ip_str = format!("{}.{}.{}.{}", ip[0], ip[1], ip[2], ip[3]); - json!(ip_str) - } - ValueRef::Ipv6(ip) => { - json!(ip - .chunks(2) - .map(|chunk| { format!("{:02x}{:02x}", chunk[0], chunk[1]) }) - .join(":")) - } - ValueRef::Map(_, _, m) => Value::Object( - m.iter() - .map(|(k, v)| { - Ok::<_, clickhouse_rs::errors::Error>(( - k.to_string(), - value_to_json(v, enum_mapping)?, - )) - }) - .collect::>()?, - ), - }; - Ok(result) -} - -/// Converts an enum value to a JSON value. -/// -/// This function handles both integer and string enums. For string enums, -/// it uses the provided mapping to convert the integer value to a string. -/// -/// # Arguments -/// * `i` - Enum integer value -/// * `enum_mapping` - Optional mapping for enum values -/// -/// # Returns -/// * `Value` - JSON value for the enum -fn convert_enum(i: I, enum_mapping: &Option>) -> Value -where - I: Serialize, - usize: TryFrom, -{ - match enum_mapping { - None => json!(i), - // unwrap is safe because of the invariant - - // enum_mapping is Some only when the TS enum has string values - Some(values) => json!(values[usize::try_from(i).unwrap() - 1]), - } -} - -/// Converts a ClickHouse row to a JSON object. -/// -/// This function converts each column in the row to a JSON value and -/// combines them into a JSON object. -/// -/// # Arguments -/// * `row` - ClickHouse row -/// * `enum_mappings` - Enum mappings for each column -/// -/// # Returns -/// * `Result` - JSON object or error -pub fn row_to_json( - row: &Row<'_, C>, - enum_mappings: &[Option>], -) -> Result -where - C: ColumnType, -{ - // can we use visitors to construct the JSON string directly, - // without constructing the Value::Object first - let mut result = Map::with_capacity(row.len()); - - for (i, enum_mapping) in enum_mappings.iter().enumerate() { - let value = value_to_json(&row.get::(i).unwrap().0, enum_mapping); - result.insert(row.name(i)?.into(), value?); - } - Ok(Value::Object(result)) -} - -/// Converts a ClickHouse column type to an enum mapping. -/// -/// This function extracts the enum mapping from a ClickHouse column type -/// if it's an enum with string values. -/// -/// # Arguments -/// * `t` - ClickHouse column type -/// -/// # Returns -/// * `Option>` - Enum mapping or None -fn column_type_to_enum_mapping(t: &ClickHouseColumnType) -> Option> { - match t { - ClickHouseColumnType::String - | ClickHouseColumnType::FixedString(_) - | ClickHouseColumnType::Boolean - | ClickHouseColumnType::ClickhouseInt(_) - | ClickHouseColumnType::ClickhouseFloat(_) - | ClickHouseColumnType::Decimal { .. } - | ClickHouseColumnType::DateTime - | ClickHouseColumnType::Date32 - | ClickHouseColumnType::Date - | ClickHouseColumnType::Map(_, _) - | ClickHouseColumnType::DateTime64 { .. } - | ClickHouseColumnType::IpV4 - | ClickHouseColumnType::IpV6 - | ClickHouseColumnType::Json(_) - | ClickHouseColumnType::Uuid - | ClickHouseColumnType::AggregateFunction { .. } - | ClickHouseColumnType::SimpleAggregateFunction { .. } - | ClickHouseColumnType::Bytes => None, - ClickHouseColumnType::Array(t) => column_type_to_enum_mapping(t.as_ref()), - ClickHouseColumnType::NamedTuple(_) | ClickHouseColumnType::Nested(_) => { - // Not entire sure I understand what this method does... do we just ignore the nested type? - todo!("Implement the nested type mapper") - } - // Geometry types have no enum mapping - ClickHouseColumnType::Point - | ClickHouseColumnType::Ring - | ClickHouseColumnType::LineString - | ClickHouseColumnType::MultiLineString - | ClickHouseColumnType::Polygon - | ClickHouseColumnType::MultiPolygon => None, - ClickHouseColumnType::Enum(values) => values.values.first().and_then(|m| match m.value { - EnumValue::Int(_) => None, - EnumValue::String(_) => Some( - values - .values - .iter() - .map(|member| match &member.value { - EnumValue::Int(_) => panic!("Mixed enum values."), - EnumValue::String(s) => s.as_str(), - }) - .collect::>(), - ), - }), - ClickHouseColumnType::Nullable(inner) => column_type_to_enum_mapping(inner), - ClickHouseColumnType::LowCardinality(inner) => column_type_to_enum_mapping(inner), - } -} - -/// Executes a SELECT query and returns the results as a stream of JSON objects. -/// -/// # Arguments -/// * `db_name` - Database name -/// * `table` - Table to query -/// * `client` - ClickHouse client -/// * `limit_offset_clause` - LIMIT/OFFSET clause for the query -/// -/// # Returns -/// * `Result>, clickhouse_rs::errors::Error>` - Stream of JSON objects or error -async fn select_as_json<'a>( - db_name: &str, - table: &'a ClickHouseTable, - client: &'a mut ClientHandle, - limit_offset_clause: &str, -) -> Result>, clickhouse_rs::errors::Error> -{ - let enum_mapping: Vec>> = table - .columns - .iter() - .map(|c| column_type_to_enum_mapping(&c.column_type)) - .collect(); - - let order_by = match &table.order_by { - OrderBy::Fields(v) if !v.is_empty() => { - format!("ORDER BY {}", wrap_and_join_column_names(v, ", ")) - } - OrderBy::SingleExpr(expr) => format!("ORDER BY {expr}"), - _ => { - // Fall back to primary key columns only if no explicit order_by is specified - let key_columns: Vec = table - .primary_key_columns() - .iter() - .map(|s| s.to_string()) - .collect(); - - if key_columns.is_empty() { - "".to_string() - } else { - format!( - "ORDER BY {}", - wrap_and_join_column_names(&key_columns, ", ") - ) - } - } - }; - - let query = &format!( - "select * from \"{}\".\"{}\" {} {}", - db_name, table.name, order_by, limit_offset_clause - ); - info!("select_as_json query: {}", query); - let stream = client - .query(query) - .stream() - .map(move |row| row_to_json(&row?, &enum_mapping)); - info!("select_as_json got data load stream."); - Ok(Box::pin(stream)) -} - -/// Executes a SELECT query with a LIMIT clause and returns the results as a stream of JSON objects. -/// -/// # Arguments -/// * `db_name` - Database name -/// * `table` - Table to query -/// * `client` - ClickHouse client -/// * `limit` - Limit for the query -/// -/// # Returns -/// * `Result>, clickhouse_rs::errors::Error>` - Stream of JSON objects or error -pub async fn select_some_as_json<'a>( - db_name: &str, - table: &'a ClickHouseTable, - client: &'a mut ClientHandle, - limit: i64, -) -> Result>, clickhouse_rs::errors::Error> -{ - select_as_json(db_name, table, client, &format!("limit {limit}")).await -} diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs new file mode 100644 index 0000000000..a02afdce0e --- /dev/null +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs @@ -0,0 +1,108 @@ +//! HTTP-based ClickHouse client for query operations +//! +//! This module provides query functionality using the HTTP-based `clickhouse` crate. +//! Unlike the native protocol client (clickhouse-rs), this client: +//! - Supports all ClickHouse types including LowCardinality +//! - Uses JSON format for data serialization +//! - Is actively maintained +//! - Aligns with how consumption APIs access ClickHouse + +use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; +use crate::infrastructure::olap::clickhouse::{create_client, ConfiguredDBClient}; +use log::debug; +use serde_json::Value; + +/// Create a configured HTTP client for query operations +/// +/// # Arguments +/// * `clickhouse_config` - ClickHouse configuration +/// +/// # Returns +/// * `ConfiguredDBClient` - Configured client ready for queries +pub fn create_query_client(clickhouse_config: &ClickHouseConfig) -> ConfiguredDBClient { + create_client(clickhouse_config.clone()) +} + +/// Execute a SELECT query and return results as JSON +/// +/// # Arguments +/// * `client` - Configured ClickHouse client +/// * `query` - SQL query string +/// +/// # Returns +/// * Vec of JSON objects (one per row) +/// +/// # Implementation Note +/// Uses direct HTTP request to ClickHouse with JSONEachRow format since the +/// clickhouse crate doesn't natively support serde_json::Value deserialization. +pub async fn query_as_json_stream( + client: &ConfiguredDBClient, + query: &str, +) -> Result, Box> { + debug!("Executing HTTP query: {}", query); + + let config = &client.config; + let protocol = if config.use_ssl { "https" } else { "http" }; + let url = format!("{}://{}:{}", protocol, config.host, config.host_port); + + // Use reqwest to make a raw HTTP request with JSONEachRow format + let http_client = reqwest::Client::new(); + let response = http_client + .post(&url) + .query(&[("database", &config.db_name)]) + .query(&[("default_format", "JSONEachRow")]) + .basic_auth(&config.user, Some(&config.password)) + .body(query.to_string()) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let error_text = response.text().await.unwrap_or_default(); + return Err(format!("ClickHouse query failed ({}): {}", status, error_text).into()); + } + + let text = response.text().await?; + + // Parse each line as a separate JSON object (JSONEachRow format) + let mut results = Vec::new(); + for line in text.lines() { + if !line.trim().is_empty() { + let value: Value = serde_json::from_str(line)?; + results.push(value); + } + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + #[ignore] // Requires running ClickHouse instance + async fn test_query_as_json_stream() { + let config = ClickHouseConfig { + db_name: "default".to_string(), + host: "localhost".to_string(), + host_port: 8123, + native_port: 9000, + user: "default".to_string(), + password: "".to_string(), + use_ssl: false, + host_data_path: None, + additional_databases: vec![], + clusters: None, + }; + + let client = create_query_client(&config); + let rows = query_as_json_stream(&client, "SELECT 1 as num, 'test' as text") + .await + .expect("Query should succeed"); + + assert_eq!(rows.len(), 1); + assert_eq!(rows[0]["num"], 1); + assert_eq!(rows[0]["text"], "test"); + } +} diff --git a/apps/framework-cli/src/infrastructure/olap/mod.rs b/apps/framework-cli/src/infrastructure/olap/mod.rs index ac80ec4d9e..fe056314c5 100644 --- a/apps/framework-cli/src/infrastructure/olap/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/mod.rs @@ -8,7 +8,7 @@ use crate::{ }; pub mod clickhouse; -pub mod clickhouse_alt_client; +pub mod clickhouse_http_client; pub mod ddl_ordering; #[derive(Debug, thiserror::Error)] From 7864be5b333a45b3e4b381b30b23295556c6391d Mon Sep 17 00:00:00 2001 From: Jonathan Widjaja Date: Tue, 25 Nov 2025 12:02:57 -0700 Subject: [PATCH 47/59] fix array literals in extract_source_tables_from_query (#3034) > [!NOTE] > Adds regex-based fallback to extract source tables (handling ClickHouse array literals), refactors MV/view reconstruction into shared logic, and strips backticks from MV TO targets with comprehensive tests. > > - **Parser (sql_parser.rs)**: > - Add `extract_source_tables_from_query_regex` regex fallback for `FROM`/`JOIN` to handle ClickHouse-specific array literal syntax. > - Expose and use `LazyLock` regex `FROM_JOIN_TABLE_PATTERN`. > - Keep existing AST-based `extract_source_tables_from_query` and tests. > - **View/MV Reconstruction (mod.rs)**: > - Refactor into `reconstruct_sql_resource_common` shared logic for views and materialized views. > - Integrate parser fallback: try AST parser, then regex fallback with `default_database`. > - Strip backticks from MV `TO` target; preserve/qualify target IDs correctly. > - Pass actual `database` (remove unused `_database`). > - **Tests**: > - Add tests covering regex fallback with array literals, MV target backtick stripping, standard SQL paths, and default DB application. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit b6be521cb4d405f87651343e78d69198295ef1bc. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/infrastructure/olap/clickhouse/mod.rs | 241 ++++++++++++------ .../olap/clickhouse/sql_parser.rs | 102 ++++++++ 2 files changed, 268 insertions(+), 75 deletions(-) diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 87a820c531..757bf54b4d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -45,7 +45,8 @@ use serde::{Deserialize, Serialize}; use sql_parser::{ extract_engine_from_create_table, extract_indexes_from_create_table, extract_sample_by_from_create_table, extract_source_tables_from_query, - extract_table_settings_from_create_table, normalize_sql_for_comparison, split_qualified_name, + extract_source_tables_from_query_regex, extract_table_settings_from_create_table, + normalize_sql_for_comparison, split_qualified_name, }; use std::collections::HashMap; use std::ops::Deref; @@ -2038,17 +2039,14 @@ fn reconstruct_sql_resource_from_mv( name: String, create_query: String, as_select: String, - _database: String, + database: String, default_database: &str, ) -> Result { - // Extract target table from create_query - // We use a regex on the raw create_query because it's simpler than full parsing and - // avoids issues with ClickHouse's specific column definition syntax in CREATE statements. - // as_select doesn't contain the TO clause, so we must check the original query. + // Extract target table from create_query for MV let target_table = MATERIALIZED_VIEW_TO_PATTERN .captures(&create_query) .and_then(|caps| caps.get(1)) - .map(|m| m.as_str().replace('`', "")) // Strip backticks from table name + .map(|m| m.as_str().replace('`', "")) .ok_or_else(|| { OlapChangesError::DatabaseError(format!( "Could not find TO target in materialized view definition: {}", @@ -2056,54 +2054,8 @@ fn reconstruct_sql_resource_from_mv( )) })?; - // Reconstruct the canonical CREATE statement - // CREATE MATERIALIZED VIEW IF NOT EXISTS name TO target AS select - let setup_raw = format!( - "CREATE MATERIALIZED VIEW IF NOT EXISTS {} TO {} AS {}", - name, target_table, as_select - ); - - // Normalize the SQL for consistent comparison with user-defined views - let setup = normalize_sql_for_comparison(&setup_raw, default_database); - - // Generate teardown script - let teardown = format!("DROP VIEW IF EXISTS `{}`", name); - - // Parse as_select to get source tables (lineage) - // as_select is clean SQL, so we can use standard parser logic - let source_tables = extract_source_tables_from_query(&as_select).map_err(|e| { - OlapChangesError::DatabaseError(format!( - "Failed to extract source tables from MV {}: {}", - name, e - )) - })?; - - // Extract pulls_data_from (source tables) - let pulls_data_from = source_tables - .into_iter() - .map(|table_ref| { - // Get the table name, strip version suffix if present - let table_name = table_ref.table; - let (base_name, _version) = extract_version_from_table_name(&table_name); - - // Use database from table reference if available, otherwise use default - let qualified_id = if let Some(db) = table_ref.database { - if db == default_database { - base_name - } else { - format!("{}_{}", db, base_name) - } - } else { - base_name - }; - - InfrastructureSignature::Table { id: qualified_id } - }) - .collect(); - // Extract pushes_data_to (target table for MV) let (target_base_name, _version) = extract_version_from_table_name(&target_table); - // target_table might be qualified (db.table) let (target_db, target_name_only) = split_qualified_name(&target_base_name); let target_qualified_id = if let Some(target_db) = target_db { @@ -2120,14 +2072,20 @@ fn reconstruct_sql_resource_from_mv( id: target_qualified_id, }]; - Ok(SqlResource { + // Reconstruct with MV-specific CREATE statement + let setup_raw = format!( + "CREATE MATERIALIZED VIEW IF NOT EXISTS {} TO {} AS {}", + name, target_table, as_select + ); + + reconstruct_sql_resource_common( name, - database: Some(_database), - setup: vec![setup], - teardown: vec![teardown], - pulls_data_from, + setup_raw, + as_select, + database, + default_database, pushes_data_to, - }) + ) } /// Reconstructs a SqlResource from a view's CREATE statement @@ -2143,28 +2101,57 @@ fn reconstruct_sql_resource_from_mv( fn reconstruct_sql_resource_from_view( name: String, as_select: String, - _database: String, + database: String, default_database: &str, ) -> Result { - // Reconstruct the canonical CREATE statement using as_select. - // This ensures we have a clean definition without column types in the header, - // matching how Moose generates views (just `CREATE VIEW ... AS SELECT ...`). - // CREATE VIEW IF NOT EXISTS name AS select + // Views don't push data to tables + let pushes_data_to = vec![]; + + // Reconstruct with view-specific CREATE statement let setup_raw = format!("CREATE VIEW IF NOT EXISTS {} AS {}", name, as_select); - // Normalize the SQL for consistent comparison with user-defined views + reconstruct_sql_resource_common( + name, + setup_raw, + as_select, + database, + default_database, + pushes_data_to, + ) +} + +/// Common logic for reconstructing SqlResource from MV or View +fn reconstruct_sql_resource_common( + name: String, + setup_raw: String, + as_select: String, + database: String, + default_database: &str, + pushes_data_to: Vec, +) -> Result { + // Normalize the SQL for consistent comparison let setup = normalize_sql_for_comparison(&setup_raw, default_database); // Generate teardown script let teardown = format!("DROP VIEW IF EXISTS `{}`", name); // Parse as_select to get source tables (lineage) - let source_tables = extract_source_tables_from_query(&as_select).map_err(|e| { - OlapChangesError::DatabaseError(format!( - "Failed to extract source tables from View {}: {}", - name, e - )) - })?; + // Try standard SQL parser first, but fall back to regex if it fails + let source_tables = match extract_source_tables_from_query(&as_select) { + Ok(tables) => tables, + Err(e) => { + warn!( + "Could not parse {} query with standard SQL parser ({}), using regex fallback", + name, e + ); + extract_source_tables_from_query_regex(&as_select, default_database).map_err(|e| { + OlapChangesError::DatabaseError(format!( + "Failed to extract source tables from {} using regex fallback: {}", + name, e + )) + })? + } + }; // Extract pulls_data_from (source tables) let pulls_data_from = source_tables @@ -2189,12 +2176,9 @@ fn reconstruct_sql_resource_from_view( }) .collect(); - // Regular views don't push data to tables - let pushes_data_to = vec![]; - Ok(SqlResource { name, - database: Some(_database), + database: Some(database), setup: vec![setup], teardown: vec![teardown], pulls_data_from, @@ -3259,4 +3243,111 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra "Column TTL should remain unchanged" ); } + + #[test] + fn test_reconstruct_sql_resource_from_mv_with_standard_sql() { + let create_query = + "CREATE MATERIALIZED VIEW test_mv TO target_table AS SELECT id FROM source".to_string(); + let as_select = "SELECT id FROM source".to_string(); + + let result = reconstruct_sql_resource_from_mv( + "test_mv".to_string(), + create_query, + as_select, + "mydb".to_string(), + "mydb", + ) + .unwrap(); + + assert_eq!(result.name, "test_mv"); + assert_eq!(result.pulls_data_from.len(), 1); + assert_eq!(result.pushes_data_to.len(), 1); + match &result.pushes_data_to[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "target_table"), + _ => panic!("Expected Table signature"), + } + } + + #[test] + fn test_reconstruct_sql_resource_from_mv_with_clickhouse_array_syntax() { + // Reproduces customer issue: MV with ClickHouse array literals + let create_query = + "CREATE MATERIALIZED VIEW test_mv TO target AS SELECT * FROM source".to_string(); + let as_select = r#" + SELECT name, count() as total + FROM mydb.source_table + WHERE arrayExists(x -> (lower(name) LIKE x), ['pattern1', 'pattern2']) + AND status NOT IN ['active', 'pending'] + GROUP BY name + "# + .to_string(); + + // Should not panic, should use regex fallback + let result = reconstruct_sql_resource_from_mv( + "test_mv".to_string(), + create_query, + as_select, + "mydb".to_string(), + "mydb", + ) + .unwrap(); + + assert_eq!(result.name, "test_mv"); + // Regex fallback should extract source_table + assert_eq!(result.pulls_data_from.len(), 1); + match &result.pulls_data_from[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "source_table"), + _ => panic!("Expected Table signature"), + } + } + + #[test] + fn test_reconstruct_sql_resource_from_view_with_clickhouse_array_syntax() { + let as_select = r#" + SELECT id, name + FROM db1.table1 + WHERE status IN ['active', 'pending'] + "# + .to_string(); + + // Should not panic, should use regex fallback + let result = reconstruct_sql_resource_from_view( + "test_view".to_string(), + as_select, + "db1".to_string(), + "db1", + ) + .unwrap(); + + assert_eq!(result.name, "test_view"); + assert_eq!(result.pulls_data_from.len(), 1); + match &result.pulls_data_from[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "table1"), + _ => panic!("Expected Table signature"), + } + assert_eq!(result.pushes_data_to.len(), 0); + } + + #[test] + fn test_reconstruct_sql_resource_from_mv_strips_backticks_from_target() { + // Tests the backtick stripping fix in target table extraction + let create_query = + "CREATE MATERIALIZED VIEW mv TO `my_db`.`my_target` AS SELECT * FROM src".to_string(); + let as_select = "SELECT * FROM src".to_string(); + + let result = reconstruct_sql_resource_from_mv( + "mv".to_string(), + create_query, + as_select, + "my_db".to_string(), + "my_db", + ) + .unwrap(); + + // Target table name should have backticks stripped + match &result.pushes_data_to[0] { + InfrastructureSignature::Table { id } => assert_eq!(id, "my_target"), + _ => panic!("Expected Table signature"), + } + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs index f0a37cd903..fd67a4c273 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs @@ -12,6 +12,7 @@ use sqlparser::dialect::ClickHouseDialect; use sqlparser::parser::Parser; use std::collections::HashSet; use std::ops::ControlFlow; +use std::sync::LazyLock; #[derive(Debug, Clone, PartialEq)] pub struct MaterializedViewStatement { @@ -703,6 +704,48 @@ pub fn extract_source_tables_from_query(sql: &str) -> Result } } +static FROM_JOIN_TABLE_PATTERN: LazyLock = LazyLock::new(|| { + // Pattern to extract table names from FROM and JOIN clauses + // Matches: FROM schema.table, JOIN schema.table, FROM table, etc. + // Captures optional schema and required table name + regex::Regex::new(r"(?i)\b(?:FROM|JOIN)\s+(?:([a-zA-Z0-9_`]+)\.)?([a-zA-Z0-9_`]+)") + .expect("FROM_JOIN_TABLE_PATTERN regex should compile") +}); + +/// Extracts table names from a SQL query using regex fallback. +/// Used when the standard SQL parser fails (e.g., ClickHouse-specific syntax like array literals). +/// +/// This is a simplified fallback that pattern-matches FROM/JOIN clauses rather than +/// parsing the full AST. It won't catch tables in subqueries, but it's sufficient for +/// basic dependency tracking when full parsing isn't possible. +pub fn extract_source_tables_from_query_regex( + sql: &str, + default_database: &str, +) -> Result, SqlParseError> { + let mut tables = Vec::new(); + + for captures in FROM_JOIN_TABLE_PATTERN.captures_iter(sql) { + let database = captures.get(1).map(|m| m.as_str().replace('`', "")); + let table = captures + .get(2) + .map(|m| m.as_str().replace('`', "")) + .ok_or(SqlParseError::UnsupportedStatement)?; + + tables.push(TableReference { + database: database.or_else(|| Some(default_database.to_string())), + table, + alias: None, + }); + } + + if tables.is_empty() { + // No tables found - this might be a problem, but don't fail hard + // The view might have tables in subqueries that regex can't catch + } + + Ok(tables) +} + fn extract_source_tables_from_query_ast( query: &Query, ) -> Result, SqlParseError> { @@ -1813,4 +1856,63 @@ pub mod tests { assert!(normalized_ch.contains("AS table")); assert!(!normalized_ch.contains("AS `table`")); } + + #[test] + fn test_extract_source_tables_with_standard_sql() { + let sql = "SELECT a.id, b.name FROM users a JOIN orders b ON a.id = b.user_id"; + let result = extract_source_tables_from_query(sql).unwrap(); + + assert_eq!(result.len(), 2); + let table_names: Vec<&str> = result.iter().map(|t| t.table.as_str()).collect(); + assert!(table_names.contains(&"users")); + assert!(table_names.contains(&"orders")); + } + + #[test] + fn test_extract_source_tables_regex_fallback_with_clickhouse_array_literals() { + // Reproduces customer bug: ClickHouse array literal syntax ['item1', 'item2'] + // causes standard SQL parser to fail at the '[' character. + // This tests the regex fallback successfully extracts tables despite parse failure. + let sql = r#" + SELECT name, count() as total + FROM mydb.endpoint_process + WHERE arrayExists(x -> (lower(name) LIKE x), ['pattern1', 'pattern2']) + AND status NOT IN ['completed', 'failed'] + GROUP BY name + "#; + + // Standard parser should fail on '[' in array literals + let parse_result = extract_source_tables_from_query(sql); + assert!( + parse_result.is_err(), + "Expected parser to fail on ClickHouse array syntax" + ); + + // Regex fallback should succeed and extract the correct table with schema + let result = extract_source_tables_from_query_regex(sql, "default").unwrap(); + + assert_eq!(result.len(), 1); + assert_eq!(result[0].table, "endpoint_process"); + assert_eq!(result[0].database, Some("mydb".to_string())); + } + + #[test] + fn test_extract_source_tables_regex_handles_joins_and_defaults() { + // Tests regex fallback extracts FROM/JOIN tables, handles backticks, + // and applies default database to unqualified names + let sql = "SELECT * FROM `schema1`.`table1` JOIN table2 ON table1.id = table2.id"; + + let result = extract_source_tables_from_query_regex(sql, "default_db").unwrap(); + + assert_eq!(result.len(), 2); + + let tables: Vec<(Option, String)> = result + .iter() + .map(|t| (t.database.clone(), t.table.clone())) + .collect(); + + // table1 has schema, table2 gets default_db + assert!(tables.contains(&(Some("schema1".to_string()), "table1".to_string()))); + assert!(tables.contains(&(Some("default_db".to_string()), "table2".to_string()))); + } } From 88cb2f6ef8f419aeec88be71229564319a30bfb3 Mon Sep 17 00:00:00 2001 From: George Leung Date: Tue, 25 Nov 2025 11:54:17 -0800 Subject: [PATCH 48/59] use --clickhouse-url and parse_clickhouse_connection_string (#3022) > [!NOTE] > Switches CLI flags to --clickhouse-url, adds robust ClickHouse URL parsing/conversion, refactors codegen/packaging to use project.source_dir, and updates docs accordingly. > > - **CLI** > - Rename flags to `--clickhouse-url` for `seed clickhouse` and `db pull` (keep alias `--connection-string`). > - Update prompts, logs, errors, and success messages to reference ClickHouse URL. > - Add helpers to resolve/override serverless URLs from flags/env. > - **ClickHouse URL Parsing** > - Introduce `parse_clickhouse_connection_string_with_metadata` with percent-decoding, native-to-HTTP(S) conversion notice, display-safe URL, and database detection (queries `database()` when not explicit). > - **Code Generation / DB Pull** > - Use new parsing + `create_client` in `create_client_and_db`. > - External models writing generalized to accept `source_dir`; path handling switched from `APP_DIR` to `project.source_dir`. > - **Build & Docker Packager** > - Package copy lists replace `APP_DIR` with `project.source_dir`. > - **Docs** > - Update examples and references to `--clickhouse-url` across CLI, local dev, getting started, and db-pull guides. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 65a29a3084cebe56712dd03969e99b101b4a2606. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- apps/framework-cli/src/cli.rs | 14 +- apps/framework-cli/src/cli/commands.rs | 12 +- apps/framework-cli/src/cli/routines/build.rs | 16 +- .../src/cli/routines/code_generation.rs | 147 +++++++----------- .../src/cli/routines/docker_packager.rs | 6 +- .../src/cli/routines/seed_data.rs | 22 +-- .../infrastructure/olap/clickhouse/config.rs | 94 +++++++++-- .../moose/getting-started/from-clickhouse.mdx | 6 +- .../src/pages/moose/local-dev.mdx | 2 +- .../src/pages/moose/moose-cli.mdx | 8 +- .../src/pages/moose/olap/db-pull.mdx | 22 +-- .../src/pages/moose/olap/external-tables.mdx | 2 +- 12 files changed, 200 insertions(+), 151 deletions(-) diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index a93717a5ab..87a2de1ad6 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -401,7 +401,7 @@ pub async fn top_command_handler( let save_choice = prompt_user( "\n Would you like to save this connection string to your system keychain for easy `moose db pull` later? [Y/n]", Some("Y"), - Some("You can always pass --connection-string explicitly to override."), + Some("You can always pass --clickhouse-url explicitly to override."), )?; let save = save_choice.trim().is_empty() @@ -428,7 +428,7 @@ pub async fn top_command_handler( let success_message = if let Some(connection_string) = normalized_url { format!( - "\n\n{post_install_message}\n\n🔗 Your ClickHouse connection string:\n{}\n\n📋 After setting up your development environment, open a new terminal and seed your local database:\n moose seed clickhouse --connection-string \"{}\" --limit 1000\n\n💡 Tip: Save the connection string as an environment variable for future use:\n export MOOSE_REMOTE_CLICKHOUSE_URL=\"{}\"\n", + "\n\n{post_install_message}\n\n🔗 Your ClickHouse connection string:\n{}\n\n📋 After setting up your development environment, open a new terminal and seed your local database:\n moose seed clickhouse --clickhouse-url \"{}\" --limit 1000\n\n💡 Tip: Save the connection string as an environment variable for future use:\n export MOOSE_REMOTE_CLICKHOUSE_URL=\"{}\"\n", connection_string, connection_string, connection_string @@ -1263,7 +1263,7 @@ pub async fn top_command_handler( Commands::Db(DbArgs { command: DbCommands::Pull { - connection_string, + clickhouse_url, file_path, }, }) => { @@ -1277,7 +1277,7 @@ pub async fn top_command_handler( machine_id.clone(), HashMap::new(), ); - let resolved_connection_string: String = match connection_string { + let resolved_clickhouse_url: String = match clickhouse_url { Some(s) => s.clone(), None => { let repo = KeyringSecretRepository; @@ -1285,13 +1285,13 @@ pub async fn top_command_handler( Ok(Some(s)) => s, Ok(None) => return Err(RoutineFailure::error(Message { action: "DB Pull".to_string(), - details: "No connection string provided and none saved. Pass --connection-string or save one during `moose init --from-remote`.".to_string(), + details: "No ClickHouse URL provided and none saved. Pass --clickhouse-url or save one during `moose init --from-remote`.".to_string(), })), Err(e) => { return Err(RoutineFailure::error(Message { action: "DB Pull".to_string(), details: format!( - "Failed to read saved connection string from keychain: {e:?}" + "Failed to read saved ClickHouse URL from keychain: {e:?}" ), })); } @@ -1299,7 +1299,7 @@ pub async fn top_command_handler( } }; - db_pull(&resolved_connection_string, &project, file_path.as_deref()) + db_pull(&resolved_clickhouse_url, &project, file_path.as_deref()) .await .map_err(|e| { RoutineFailure::new( diff --git a/apps/framework-cli/src/cli/commands.rs b/apps/framework-cli/src/cli/commands.rs index 5c62990cc2..02b5247fee 100644 --- a/apps/framework-cli/src/cli/commands.rs +++ b/apps/framework-cli/src/cli/commands.rs @@ -362,9 +362,9 @@ pub struct SeedCommands { pub enum SeedSubcommands { /// Seed ClickHouse tables with data Clickhouse { - /// ClickHouse connection string (e.g. 'clickhouse://explorer@play.clickhouse.com:9440/default') - #[arg(long, value_name = "CONNECTION_STRING")] - connection_string: Option, + /// ClickHouse connection URL (e.g. 'clickhouse://explorer@play.clickhouse.com:9440/default') + #[arg(long, alias = "connection-string")] + clickhouse_url: Option, /// Limit the number of rows to copy per table (default: 1000) #[arg( long, @@ -396,9 +396,9 @@ pub struct DbArgs { pub enum DbCommands { /// Update DB schema for EXTERNALLY_MANAGED tables Pull { - /// ClickHouse connection string (e.g. 'E.g. https://play.clickhouse.com/?user=explorer') - #[arg(long, value_name = "CONNECTION_STRING")] - connection_string: Option, + /// ClickHouse connection URL (e.g. 'https://play.clickhouse.com/?user=explorer') + #[arg(long, alias = "connection-string")] + clickhouse_url: Option, /// File storing the EXTERNALLY_MANAGED table definitions, defaults to app/external_models.py or app/externalModels.ts #[arg(long)] file_path: Option, diff --git a/apps/framework-cli/src/cli/routines/build.rs b/apps/framework-cli/src/cli/routines/build.rs index 2d94270740..441425f1f8 100644 --- a/apps/framework-cli/src/cli/routines/build.rs +++ b/apps/framework-cli/src/cli/routines/build.rs @@ -37,10 +37,10 @@ use crate::project::Project; use crate::project::ProjectFileError; use crate::utilities::constants::LIB_DIR; use crate::utilities::constants::PACKAGE_JSON; +use crate::utilities::constants::PROJECT_CONFIG_FILE; use crate::utilities::constants::REQUIREMENTS_TXT; use crate::utilities::constants::SETUP_PY; use crate::utilities::constants::TSCONFIG_JSON; -use crate::utilities::constants::{APP_DIR, PROJECT_CONFIG_FILE}; use crate::utilities::package_managers::{detect_package_manager, get_lock_file_path}; use crate::utilities::system; use crate::utilities::system::copy_directory; @@ -192,10 +192,20 @@ pub fn build_package(project: &Project) -> Result { // Files to include in the package let files_to_copy = match project.language { SupportedLanguages::Typescript => { - vec![APP_DIR, PROJECT_CONFIG_FILE, PACKAGE_JSON, TSCONFIG_JSON] + vec![ + &project.source_dir, + PROJECT_CONFIG_FILE, + PACKAGE_JSON, + TSCONFIG_JSON, + ] } SupportedLanguages::Python => { - vec![APP_DIR, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, SETUP_PY] + vec![ + &project.source_dir, + PROJECT_CONFIG_FILE, + REQUIREMENTS_TXT, + SETUP_PY, + ] } }; diff --git a/apps/framework-cli/src/cli/routines/code_generation.rs b/apps/framework-cli/src/cli/routines/code_generation.rs index 1cc563c03a..80b8baf3d6 100644 --- a/apps/framework-cli/src/cli/routines/code_generation.rs +++ b/apps/framework-cli/src/cli/routines/code_generation.rs @@ -7,15 +7,15 @@ use crate::framework::core::partial_infrastructure_map::LifeCycle; use crate::framework::languages::SupportedLanguages; use crate::framework::python::generate::tables_to_python; use crate::framework::typescript::generate::tables_to_typescript; -use crate::infrastructure::olap::clickhouse::ConfiguredDBClient; +use crate::infrastructure::olap::clickhouse::{create_client, ConfiguredDBClient}; use crate::infrastructure::olap::OlapOperations; use crate::project::Project; use crate::utilities::constants::{ - APP_DIR, PYTHON_EXTERNAL_FILE, PYTHON_MAIN_FILE, TYPESCRIPT_EXTERNAL_FILE, TYPESCRIPT_MAIN_FILE, + PYTHON_EXTERNAL_FILE, PYTHON_MAIN_FILE, TYPESCRIPT_EXTERNAL_FILE, TYPESCRIPT_MAIN_FILE, }; use crate::utilities::git::create_code_generation_commit; +use clickhouse::Client; use log::debug; -use reqwest::Url; use std::borrow::Cow; use std::env; use std::io::Write; @@ -59,90 +59,51 @@ fn should_be_externally_managed(table: &Table) -> bool { pub async fn create_client_and_db( remote_url: &str, ) -> Result<(ConfiguredDBClient, String), RoutineFailure> { - let mut url = Url::parse(remote_url).map_err(|e| { - RoutineFailure::error(Message::new( - "Invalid URL".to_string(), - format!("Failed to parse remote_url '{remote_url}': {e}"), - )) + use crate::infrastructure::olap::clickhouse::config::parse_clickhouse_connection_string_with_metadata; + + // Parse the connection string with metadata + let parsed = parse_clickhouse_connection_string_with_metadata(remote_url).map_err(|e| { + RoutineFailure::new( + Message::new( + "Invalid URL".to_string(), + format!("Failed to parse ClickHouse URL '{remote_url}'"), + ), + e, + ) })?; - if url.scheme() == "clickhouse" { + // Show user-facing message if native protocol was converted + if parsed.was_native_protocol { debug!("Only HTTP(s) supported. Transforming native protocol connection string."); - let is_secure = match (url.host_str(), url.port()) { - (_, Some(9000)) => false, - (_, Some(9440)) => true, - (Some(host), _) if host == "localhost" || host == "127.0.0.1" => false, - _ => true, - }; - let (new_port, new_scheme) = if is_secure { - (8443, "https") - } else { - (8123, "http") - }; - url = Url::parse(&remote_url.replacen("clickhouse", new_scheme, 1)).unwrap(); - url.set_port(Some(new_port)).unwrap(); - - let path_segments = url.path().split('/').collect::>(); - if path_segments.len() == 2 && path_segments[0].is_empty() { - let database = path_segments[1].to_string(); - url.set_path(""); - url.query_pairs_mut().append_pair("database", &database); - }; - - let display_url = if url.password().is_some() { - let mut cloned = url.clone(); - cloned.set_password(Some("******")).unwrap(); - Cow::Owned(cloned) - } else { - Cow::Borrowed(&url) - }; show_message!( MessageType::Highlight, Message { action: "Protocol".to_string(), - details: format!("native protocol detected. Converting to HTTP(s): {display_url}"), + details: format!( + "native protocol detected. Converting to HTTP(s): {}", + parsed.display_url + ), } ); } - let mut client = clickhouse::Client::default().with_url(remote_url); - let url_username = url.username(); - let url_username = if !url_username.is_empty() { - url_username.to_string() - } else { - match url.query_pairs().find(|(key, _)| key == "user") { - None => String::new(), - Some((_, v)) => v.to_string(), - } - }; - if !url_username.is_empty() { - client = client - .with_user(percent_encoding::percent_decode_str(&url_username).decode_utf8_lossy()) - } - if let Some(password) = url.password() { - client = client - .with_password(percent_encoding::percent_decode_str(password).decode_utf8_lossy()); - } - - let url_db = url - .query_pairs() - .filter_map(|(k, v)| { - if k == "database" { - Some(v.to_string()) - } else { - None - } - }) - .last(); - - let client = ConfiguredDBClient { - client, - config: Default::default(), - }; + let mut config = parsed.config; + + // If database wasn't explicitly specified in URL, query the server for the current database + let db_name = if !parsed.database_was_explicit { + // create_client(config) calls `with_database(config.database)` when we're not sure which DB is the real default + let client = Client::default() + .with_url(format!( + "{}://{}:{}", + if config.use_ssl { "https" } else { "http" }, + config.host, + config.host_port + )) + .with_user(config.user.to_string()) + .with_password(config.password.to_string()); - let db = match url_db { - None => client - .client + // No database was specified in URL, query the server + client .query("select database()") .fetch_one::() .await @@ -151,25 +112,32 @@ pub async fn create_client_and_db( Message::new("Failure".to_string(), "fetching database".to_string()), e, ) - })?, - Some(db) => db, + })? + } else { + config.db_name.clone() }; - Ok((client, db)) + // Update config with detected database name if it changed + if db_name != config.db_name { + config.db_name = db_name.clone(); + } + + Ok((create_client(config), db_name)) } fn write_external_models_file( language: SupportedLanguages, tables: &[Table], file_path: Option<&str>, + source_dir: &str, ) -> Result<(), RoutineFailure> { let file = match (language, file_path) { (_, Some(path)) => Cow::Borrowed(path), (SupportedLanguages::Typescript, None) => { - Cow::Owned(format!("{APP_DIR}/{TYPESCRIPT_EXTERNAL_FILE}")) + Cow::Owned(format!("{source_dir}/{TYPESCRIPT_EXTERNAL_FILE}")) } (SupportedLanguages::Python, None) => { - Cow::Owned(format!("{APP_DIR}/{PYTHON_EXTERNAL_FILE}")) + Cow::Owned(format!("{source_dir}/{PYTHON_EXTERNAL_FILE}")) } }; match language { @@ -294,7 +262,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine .create(true) .write(true) .truncate(true) - .open(format!("{APP_DIR}/{TYPESCRIPT_EXTERNAL_FILE}")) + .open(format!("{}/{TYPESCRIPT_EXTERNAL_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -313,7 +281,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; - let main_path = format!("{APP_DIR}/{TYPESCRIPT_MAIN_FILE}"); + let main_path = format!("{}/{TYPESCRIPT_MAIN_FILE}", project.source_dir); let import_stmt = "import \"./externalModels\";"; let needs_import = match std::fs::read_to_string(&main_path) { Ok(contents) => !contents.contains(import_stmt), @@ -348,7 +316,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine let table_definitions = tables_to_typescript(&managed, None); let mut file = std::fs::OpenOptions::new() .append(true) - .open(format!("{APP_DIR}/{TYPESCRIPT_MAIN_FILE}")) + .open(format!("{}/{TYPESCRIPT_MAIN_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -378,7 +346,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine .create(true) .write(true) .truncate(true) - .open(format!("{APP_DIR}/{PYTHON_EXTERNAL_FILE}")) + .open(format!("{}/{PYTHON_EXTERNAL_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -397,7 +365,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine e, ) })?; - let main_path = format!("{APP_DIR}/{PYTHON_MAIN_FILE}"); + let main_path = format!("{}/{PYTHON_MAIN_FILE}", project.source_dir); let import_stmt = "from .external_models import *"; let needs_import = match std::fs::read_to_string(&main_path) { Ok(contents) => !contents.contains(import_stmt), @@ -431,7 +399,7 @@ pub async fn db_to_dmv2(remote_url: &str, dir_path: &Path) -> Result<(), Routine let table_definitions = tables_to_python(&managed, None); let mut file = std::fs::OpenOptions::new() .append(true) - .open(format!("{APP_DIR}/{PYTHON_MAIN_FILE}")) + .open(format!("{}/{PYTHON_MAIN_FILE}", project.source_dir)) .map_err(|e| { RoutineFailure::new( Message::new( @@ -536,7 +504,12 @@ pub async fn db_pull( // Keep a stable ordering for deterministic output tables_for_external_file.sort_by(|a, b| a.name.cmp(&b.name)); - write_external_models_file(project.language, &tables_for_external_file, file_path)?; + write_external_models_file( + project.language, + &tables_for_external_file, + file_path, + &project.source_dir, + )?; match create_code_generation_commit( ".".as_ref(), diff --git a/apps/framework-cli/src/cli/routines/docker_packager.rs b/apps/framework-cli/src/cli/routines/docker_packager.rs index 075c80121c..262724ddef 100644 --- a/apps/framework-cli/src/cli/routines/docker_packager.rs +++ b/apps/framework-cli/src/cli/routines/docker_packager.rs @@ -3,8 +3,8 @@ use crate::cli::display::with_spinner_completion; use crate::cli::routines::util::ensure_docker_running; use crate::framework::languages::SupportedLanguages; use crate::utilities::constants::{ - APP_DIR, OLD_PROJECT_CONFIG_FILE, PACKAGE_JSON, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, - SETUP_PY, TSCONFIG_JSON, + OLD_PROJECT_CONFIG_FILE, PACKAGE_JSON, PROJECT_CONFIG_FILE, REQUIREMENTS_TXT, SETUP_PY, + TSCONFIG_JSON, }; use crate::utilities::docker::DockerClient; use crate::utilities::nodejs_version::determine_node_version_from_package_json; @@ -588,7 +588,7 @@ pub fn build_dockerfile( // Copy app & etc to packager directory let project_root_path = project.project_location.clone(); let items_to_copy = vec![ - APP_DIR, + &project.source_dir, PACKAGE_JSON, SETUP_PY, REQUIREMENTS_TXT, diff --git a/apps/framework-cli/src/cli/routines/seed_data.rs b/apps/framework-cli/src/cli/routines/seed_data.rs index a222a2ccb6..b827a5b617 100644 --- a/apps/framework-cli/src/cli/routines/seed_data.rs +++ b/apps/framework-cli/src/cli/routines/seed_data.rs @@ -23,7 +23,7 @@ fn validate_database_name(db_name: &str) -> Result<(), RoutineFailure> { if db_name.is_empty() { Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - "No database specified in connection string and unable to determine current database" + "No database specified in ClickHouse URL and unable to determine current database" .to_string(), ))) } else { @@ -343,7 +343,7 @@ fn get_tables_to_seed(infra_map: &InfrastructureMap, table_name: Option) /// table validation, and data copying async fn seed_clickhouse_operation( project: &Project, - connection_string: &str, + clickhouse_url: &str, table: Option, limit: Option, order_by: Option<&str>, @@ -351,11 +351,11 @@ async fn seed_clickhouse_operation( // Load infrastructure map let infra_map = load_infrastructure_map(project).await?; - // Parse connection string - let remote_config = parse_clickhouse_connection_string(connection_string).map_err(|e| { + // Parse ClickHouse URL + let remote_config = parse_clickhouse_connection_string(clickhouse_url).map_err(|e| { RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - format!("Invalid connection string: {e}"), + format!("Invalid ClickHouse URL: {e}"), )) })?; @@ -432,13 +432,13 @@ pub async fn handle_seed_command( ) -> Result { match &seed_args.command { Some(SeedSubcommands::Clickhouse { - connection_string, + clickhouse_url, limit, all, table, order_by, }) => { - let resolved_connection_string = match connection_string { + let resolved_clickhouse_url = match clickhouse_url { Some(s) => s.clone(), None => { let repo = KeyringSecretRepository; @@ -447,27 +447,27 @@ pub async fn handle_seed_command( Ok(None) => { return Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - "No connection string provided and none saved. Pass --connection-string or save one via `moose init --from-remote`.".to_string(), + "No ClickHouse URL provided and none saved. Pass --clickhouse-url or save one via `moose init --from-remote`.".to_string(), ))) } Err(e) => { return Err(RoutineFailure::error(Message::new( "SeedClickhouse".to_string(), - format!("Failed to read saved connection string from keychain: {e:?}"), + format!("Failed to read saved ClickHouse URL from keychain: {e:?}"), ))) } } } }; - info!("Running seed clickhouse command with connection string: {resolved_connection_string}"); + info!("Running seed clickhouse command with ClickHouse URL: {resolved_clickhouse_url}"); let (local_db_name, remote_db_name, summary) = with_spinner_completion_async( "Initializing database seeding operation...", "Database seeding completed", seed_clickhouse_operation( project, - &resolved_connection_string, + &resolved_clickhouse_url, table.clone(), if *all { None } else { Some(*limit) }, order_by.as_deref(), diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs index 521902cc01..2e97df1daf 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/config.rs @@ -64,15 +64,46 @@ impl Default for ClickHouseConfig { } } +/// Result of parsing a ClickHouse connection string, including conversion metadata +#[derive(Debug, Clone)] +pub struct ParsedConnectionString { + pub config: ClickHouseConfig, + pub was_native_protocol: bool, + pub display_url: String, + pub database_was_explicit: bool, +} + /// Parses a ClickHouse connection string (URL) into a ClickHouseConfig /// /// Supports multiple URL schemes (https, clickhouse) and extracts database name from path or query parameter. /// Automatically determines SSL usage based on scheme and port. +/// Percent-decodes username and password for proper handling of special characters. pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result { + parse_clickhouse_connection_string_with_metadata(conn_str).map(|parsed| parsed.config) +} + +/// Parses a ClickHouse connection string with metadata about conversions performed +/// +/// Returns additional information useful for displaying user-facing messages, +/// such as whether native protocol conversion occurred and a display-safe URL. +pub fn parse_clickhouse_connection_string_with_metadata( + conn_str: &str, +) -> anyhow::Result { let url = Url::parse(conn_str)?; + let was_native_protocol = url.scheme() == "clickhouse"; - let user = url.username().to_string(); - let password = url.password().unwrap_or("").to_string(); + // Percent-decode username and password to handle special characters + let user = percent_encoding::percent_decode_str(url.username()) + .decode_utf8_lossy() + .to_string(); + let password = url + .password() + .map(|p| { + percent_encoding::percent_decode_str(p) + .decode_utf8_lossy() + .to_string() + }) + .unwrap_or_default(); let host = url.host_str().unwrap_or("localhost").to_string(); let mut http_port: Option = None; @@ -99,23 +130,39 @@ pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result (db, true), + None => ("default".to_string(), false), + } + }; + let config = ClickHouseConfig { - db_name, - user, - password, + db_name: db_name.clone(), + user: user.clone(), + password: password.clone(), use_ssl, - host, + host: host.clone(), host_port: http_port, native_port, host_data_path: None, @@ -123,7 +170,26 @@ pub fn parse_clickhouse_connection_string(conn_str: &str) -> anyhow::Result ```bash filename="Terminal" copy -moose seed clickhouse --connection-string --limit 100000 +moose seed clickhouse --clickhouse-url --limit 100000 ``` **Connection String Format:** @@ -347,12 +347,12 @@ clickhouse://username:password@host:9440/database ```bash filename="Terminal" copy -moose seed clickhouse --connection-string clickhouse://explorer:@play.clickhouse.com:9440/default --limit 100000 +moose seed clickhouse --clickhouse-url clickhouse://explorer:@play.clickhouse.com:9440/default --limit 100000 ``` ```bash filename="Terminal" copy -# You can omit --connection-string by setting an env var +# You can omit --clickhouse-url by setting an env var export MOOSE_SEED_CLICKHOUSE_URL='clickhouse://username:password@host:9440/database' # copy a limited number of rows (batched under the hood) diff --git a/apps/framework-docs/src/pages/moose/local-dev.mdx b/apps/framework-docs/src/pages/moose/local-dev.mdx index b9ee8c1e2a..e8f751bc23 100644 --- a/apps/framework-docs/src/pages/moose/local-dev.mdx +++ b/apps/framework-docs/src/pages/moose/local-dev.mdx @@ -272,7 +272,7 @@ export REMOTE_CLICKHOUSE_URL="https://username:password@host:8443/?database=defa ```toml copy # moose.config.toml [http_server_config] -on_first_start_script = "moose db pull --connection-string $REMOTE_CLICKHOUSE_URL" +on_first_start_script = "moose db pull --clickhouse-url $REMOTE_CLICKHOUSE_URL" ``` See the full guide: [/moose/olap/db-pull](/moose/olap/db-pull) diff --git a/apps/framework-docs/src/pages/moose/moose-cli.mdx b/apps/framework-docs/src/pages/moose/moose-cli.mdx index 85ef07b9bc..5531829d2d 100644 --- a/apps/framework-docs/src/pages/moose/moose-cli.mdx +++ b/apps/framework-docs/src/pages/moose/moose-cli.mdx @@ -73,9 +73,9 @@ moose clean ### Seed (ClickHouse) Seed your local ClickHouse from a remote ClickHouse instance. ```bash -moose seed clickhouse [--connection-string ] [--table ] [--limit | --all] +moose seed clickhouse [--clickhouse-url ] [--table ] [--limit | --all] ``` -- `--connection-string`: Remote ClickHouse connection string. If omitted, the CLI uses `MOOSE_SEED_CLICKHOUSE_URL`. +- `--clickhouse-url`: Remote ClickHouse connection URL. If omitted, the CLI uses `MOOSE_SEED_CLICKHOUSE_URL`. - `--table`: Seed only the specified table (default: all Moose tables). - `--limit`: Copy up to N rows (mutually exclusive with `--all`). Large limits are automatically batched. - `--all`: Copy entire table(s) in batches (mutually exclusive with `--limit`). @@ -272,9 +272,9 @@ moose generate migration --url https:// --token --save ### DB Pull (External Tables) Refresh `EXTERNALLY_MANAGED` table definitions from a remote ClickHouse instance. ```bash -moose db pull --connection-string [--file-path ] +moose db pull --clickhouse-url [--file-path ] ``` -- `--connection-string`: ClickHouse URL; native `clickhouse://` is auto-converted to HTTP(S). Include `?database=` or the CLI will query the current database. +- `--clickhouse-url`: ClickHouse URL; native `clickhouse://` is auto-converted to HTTP(S). Include `?database=` or the CLI will query the current database. - `--file-path`: Optional override for the generated external models file (defaults to `app/externalModels.ts` or `app/external_models.py`). Notes: diff --git a/apps/framework-docs/src/pages/moose/olap/db-pull.mdx b/apps/framework-docs/src/pages/moose/olap/db-pull.mdx index f4eede0e2f..254da6243d 100644 --- a/apps/framework-docs/src/pages/moose/olap/db-pull.mdx +++ b/apps/framework-docs/src/pages/moose/olap/db-pull.mdx @@ -34,13 +34,13 @@ Examples: ```bash filename="Terminal" copy # Native (auto-converted to HTTPS + 8443) -moose db pull --connection-string "clickhouse://explorer@play.clickhouse.com:9440/default" +moose db pull --clickhouse-url "clickhouse://explorer@play.clickhouse.com:9440/default" # HTTPS (explicit database via query param) -moose db pull --connection-string "https://play.clickhouse.com/?user=explorer&database=default" +moose db pull --clickhouse-url "https://play.clickhouse.com/?user=explorer&database=default" # Local HTTP -moose db pull --connection-string "http://localhost:8123/?user=default&database=default" +moose db pull --clickhouse-url "http://localhost:8123/?user=default&database=default" ``` ## What gets written @@ -62,8 +62,8 @@ Important: When you run `db pull` the CLI does the following: -- Loads your project’s infrastructure map and identifies tables marked as `EXTERNALLY_MANAGED`. -- Connects to the remote ClickHouse specified by `--connection-string` and introspects the live schemas for those tables. +- Loads your project's infrastructure map and identifies tables marked as `EXTERNALLY_MANAGED`. +- Connects to the remote ClickHouse specified by `--clickhouse-url` and introspects the live schemas for those tables. - Regenerates a single external models file that mirrors the remote schema. - Adds any newly detected external tables from the remote database to the generated file so your code stays in sync as sources evolve. - Does not change any fully managed tables, your `app/index.ts``app/main.py`, or the database itself. @@ -92,18 +92,18 @@ When you run `db pull` the CLI does the following: ## Command ```bash filename="Terminal" copy -moose db pull --connection-string [--file-path ] +moose db pull --clickhouse-url [--file-path ] ``` -- **--connection-string**: Required. ClickHouse URL (native or HTTP/S) +- **--clickhouse-url**: Required. ClickHouse URL (native or HTTP/S) - **--file-path**: Optional. Override the default output file. The file at this path will be regenerated (overwritten) on each run. ## Typical Use Cases ### Remote schema changed; update local types - Your DBA, CDC pipeline (e.g., ClickPipes), or ETL job updated a table’s schema. To keep your code accurate and type-safe, refresh your external models so queries, APIs, and materialized views reference the correct columns and types. + Your DBA, CDC pipeline (e.g., ClickPipes), or ETL job updated a table's schema. To keep your code accurate and type-safe, refresh your external models so queries, APIs, and materialized views reference the correct columns and types. ```bash filename="Terminal" copy - moose db pull --connection-string + moose db pull --clickhouse-url ``` This updates only `EXTERNALLY_MANAGED` models and leaves managed code untouched. @@ -115,7 +115,7 @@ moose db pull --connection-string [--file-path ] Add to `moose.config.toml`: ```toml filename="moose.config.toml" copy [http_server_config] - on_first_start_script = "moose db pull --connection-string $REMOTE_CLICKHOUSE_URL" + on_first_start_script = "moose db pull --clickhouse-url $REMOTE_CLICKHOUSE_URL" ``` This runs once when the dev server first starts. To run after code reloads, use `on_reload_complete_script`. If you run this frequently, prefer HTTP(S) URLs and cache credentials via env/secrets to avoid friction. @@ -132,7 +132,7 @@ moose db pull --connection-string [--file-path ] ### A new CDC/external table appeared; add it to code Your CDC pipeline created a new table (or exposed a new stream). Pull to add the new table to your external models file automatically. ```bash filename="Terminal" copy - moose db pull --connection-string + moose db pull --clickhouse-url ``` The regenerated external models file will now include the newly discovered external table. diff --git a/apps/framework-docs/src/pages/moose/olap/external-tables.mdx b/apps/framework-docs/src/pages/moose/olap/external-tables.mdx index e82e7e6c17..46960347e2 100644 --- a/apps/framework-docs/src/pages/moose/olap/external-tables.mdx +++ b/apps/framework-docs/src/pages/moose/olap/external-tables.mdx @@ -198,7 +198,7 @@ For more on how migration plans are generated and what shows up in `plan.yaml`, For `EXTERNALLY_MANAGED` tables, keep your code in sync with the live database by running DB Pull. You can do it manually or automate it in dev. ```bash filename="Terminal" copy -moose db pull --connection-string +moose db pull --clickhouse-url ``` From 0b1b4644bcc1be82b00212fb8a3f1fbc6dd2a153 Mon Sep 17 00:00:00 2001 From: George Leung Date: Tue, 25 Nov 2025 16:38:35 -0800 Subject: [PATCH 49/59] fix typescript-tests template (#3038) somehow `Type instantiation is excessively deep and possibly infinite` happened today --- > [!NOTE] > Migrates the TypeScript tests template to zod v4 with a stricter output schema and disables the TS backward-compatibility E2E job in the workflow/status checks. > > - **Templates (TypeScript tests)**: > - Switch `zod` import to `zod/v4` in `templates/typescript-tests/src/apis/barExpressMcp.ts`. > - Tighten `outputSchema.rows` to `z.array(z.record(z.string(), z.any()))`. > - **CI**: > - Comment out `test-e2e-backward-compatibility-typescript` from `changes` job `needs` and remove it from success/failure checks in `.github/workflows/test-framework-cli.yaml`. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 8dc3d7ea427f8665e00e9ffcf3e75a19966ed5b6. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .github/workflows/test-framework-cli.yaml | 4 +--- templates/typescript-tests/src/apis/barExpressMcp.ts | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-framework-cli.yaml b/.github/workflows/test-framework-cli.yaml index ef36fb0486..28fae0f83b 100644 --- a/.github/workflows/test-framework-cli.yaml +++ b/.github/workflows/test-framework-cli.yaml @@ -908,7 +908,7 @@ jobs: test-e2e-typescript-tests, test-e2e-python-default, test-e2e-python-tests, - test-e2e-backward-compatibility-typescript, +# test-e2e-backward-compatibility-typescript, test-e2e-backward-compatibility-python, test-e2e-cluster-typescript, test-e2e-cluster-python, @@ -939,7 +939,6 @@ jobs: [[ "${{ needs.test-e2e-typescript-tests.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-python-default.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-python-tests.result }}" == "failure" ]] || \ - [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "failure" ]] || \ [[ "${{ needs.test-e2e-cluster-python.result }}" == "failure" ]] || \ @@ -956,7 +955,6 @@ jobs: [[ "${{ needs.test-e2e-typescript-tests.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-python-default.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-python-tests.result }}" == "success" ]] && \ - [[ "${{ needs.test-e2e-backward-compatibility-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-backward-compatibility-python.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-cluster-typescript.result }}" == "success" ]] && \ [[ "${{ needs.test-e2e-cluster-python.result }}" == "success" ]] && \ diff --git a/templates/typescript-tests/src/apis/barExpressMcp.ts b/templates/typescript-tests/src/apis/barExpressMcp.ts index 62d2047b41..61a9abfd8b 100644 --- a/templates/typescript-tests/src/apis/barExpressMcp.ts +++ b/templates/typescript-tests/src/apis/barExpressMcp.ts @@ -14,7 +14,7 @@ import express from "express"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; -import { z } from "zod"; +import { z } from "zod/v4"; import { WebApp, getMooseUtils } from "@514labs/moose-lib"; // Create Express application @@ -50,7 +50,7 @@ const serverFactory = () => { }, outputSchema: { rows: z - .array(z.record(z.any())) + .array(z.record(z.string(), z.any())) .describe("Query results as array of row objects"), rowCount: z.number().describe("Number of rows returned"), }, From 0ed40730faab0bc8da54bcbfbca5c58285425513 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Wed, 26 Nov 2025 10:29:13 -0500 Subject: [PATCH 50/59] ENG-1270: convert from log to tracing (#3026) > [!NOTE] > Replaces fern/log with tracing + tracing-subscriber (with EnvFilter/JSON/file rotation) and OTEL tracing appender, updating all modules to use tracing macros. > > - **Logging overhaul (CLI)**: > - Replace `fern`/`log` with `tracing` + `tracing-subscriber` (EnvFilter, JSON/text formats, optional modern/legacy formatter). > - Implement OTEL export via `opentelemetry-appender-tracing`; add batch log processor and resource labels. > - Add date-based file writer compatible with previous naming; cleanup old logs; session/machine IDs in events. > - Introduce legacy format layer to match prior output; opt-in modern formatting via env var. > - **Codewide changes**: > - Swap `log::{...}` macros for `tracing::{...}` across CLI, framework, infra, and MCP modules; adjust some log levels (e.g., HTTP metrics to trace). > - **Dependencies**: > - Remove `fern`, `log`, and `opentelemetry-appender-log`. > - Add `tracing-subscriber`, `tracing-serde`, and `opentelemetry-appender-tracing`; update lockfile accordingly. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 936b9d4609c2d492fdb4edf61136d846671f05ca. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- Cargo.lock | 38 +- apps/framework-cli/Cargo.toml | 5 +- apps/framework-cli/src/cli.rs | 2 +- .../src/cli/display/infrastructure.rs | 2 +- .../src/cli/display/message_display.rs | 2 +- apps/framework-cli/src/cli/local_webserver.rs | 54 +- apps/framework-cli/src/cli/logger.rs | 625 +++++++++++++----- .../src/cli/processing_coordinator.rs | 10 +- apps/framework-cli/src/cli/routines/build.rs | 2 +- apps/framework-cli/src/cli/routines/clean.rs | 2 +- .../src/cli/routines/code_generation.rs | 2 +- apps/framework-cli/src/cli/routines/dev.rs | 2 +- .../src/cli/routines/docker_packager.rs | 2 +- .../src/cli/routines/kafka_pull.rs | 2 +- .../framework-cli/src/cli/routines/migrate.rs | 12 +- apps/framework-cli/src/cli/routines/mod.rs | 6 +- apps/framework-cli/src/cli/routines/peek.rs | 4 +- apps/framework-cli/src/cli/routines/ps.rs | 2 +- apps/framework-cli/src/cli/routines/query.rs | 2 +- .../src/cli/routines/seed_data.rs | 2 +- .../src/cli/routines/templates.rs | 2 +- .../src/cli/routines/truncate_table.rs | 2 +- .../src/cli/routines/validate.rs | 2 +- apps/framework-cli/src/cli/settings.rs | 2 +- apps/framework-cli/src/cli/watcher.rs | 14 +- .../src/framework/consumption/loader.rs | 2 +- .../src/framework/core/execute.rs | 18 +- .../framework/core/infra_reality_checker.rs | 2 +- .../src/framework/core/infrastructure_map.rs | 168 ++--- .../core/partial_infrastructure_map.rs | 8 +- apps/framework-cli/src/framework/core/plan.rs | 4 +- .../src/framework/core/primitive_map.rs | 12 +- .../src/framework/core/state_storage.rs | 2 +- .../src/framework/data_model/config.rs | 2 +- .../src/framework/data_model/parser.rs | 2 +- .../src/framework/python/blocks.rs | 2 +- .../src/framework/python/consumption.rs | 2 +- .../src/framework/python/datamodel_config.rs | 2 +- .../src/framework/python/parser.rs | 2 +- .../src/framework/python/scripts_worker.rs | 2 +- .../src/framework/python/streaming.rs | 4 +- .../src/framework/scripts/executor.rs | 8 +- .../src/framework/streaming/loader.rs | 2 +- .../src/framework/typescript/blocks.rs | 2 +- .../src/framework/typescript/consumption.rs | 2 +- .../framework/typescript/export_collectors.rs | 2 +- .../src/framework/typescript/parser.rs | 6 +- .../framework/typescript/scripts_worker.rs | 2 +- .../src/framework/typescript/streaming.rs | 2 +- .../infrastructure/olap/clickhouse/client.rs | 6 +- .../olap/clickhouse/diagnostics/errors.rs | 2 +- .../clickhouse/diagnostics/merge_failures.rs | 2 +- .../olap/clickhouse/diagnostics/merges.rs | 2 +- .../olap/clickhouse/diagnostics/mod.rs | 4 +- .../olap/clickhouse/diagnostics/mutations.rs | 2 +- .../olap/clickhouse/diagnostics/parts.rs | 2 +- .../clickhouse/diagnostics/replication.rs | 2 +- .../olap/clickhouse/diagnostics/s3queue.rs | 2 +- .../diagnostics/stopped_operations.rs | 2 +- .../olap/clickhouse/diff_strategy.rs | 18 +- .../olap/clickhouse/inserter.rs | 2 +- .../src/infrastructure/olap/clickhouse/mod.rs | 40 +- .../infrastructure/olap/clickhouse/queries.rs | 16 +- .../olap/clickhouse_http_client.rs | 2 +- .../src/infrastructure/olap/ddl_ordering.rs | 8 +- .../orchestration/temporal_client.rs | 2 +- .../processes/blocks_registry.rs | 2 +- .../processes/consumption_registry.rs | 2 +- .../processes/functions_registry.rs | 2 +- .../processes/kafka_clickhouse_sync.rs | 22 +- .../src/infrastructure/processes/mod.rs | 36 +- .../orchestration_workers_registry.rs | 2 +- .../src/infrastructure/redis/connection.rs | 25 +- .../src/infrastructure/redis/leadership.rs | 12 +- .../src/infrastructure/redis/messaging.rs | 4 +- .../src/infrastructure/redis/mock.rs | 9 +- .../src/infrastructure/redis/presence.rs | 4 +- .../src/infrastructure/redis/redis_client.rs | 28 +- .../src/infrastructure/stream/kafka/client.rs | 2 +- .../src/infrastructure/webapp.rs | 4 +- apps/framework-cli/src/main.rs | 2 +- apps/framework-cli/src/mcp/server.rs | 2 +- .../src/mcp/tools/infra_issues/mod.rs | 2 +- .../framework-cli/src/mcp/tools/query_olap.rs | 2 +- .../src/mcp/tools/sample_stream.rs | 6 +- apps/framework-cli/src/metrics.rs | 2 +- apps/framework-cli/src/project.rs | 2 +- apps/framework-cli/src/utilities/capture.rs | 4 +- apps/framework-cli/src/utilities/docker.rs | 2 +- apps/framework-cli/src/utilities/dotenv.rs | 2 +- .../framework-cli/src/utilities/machine_id.rs | 2 +- .../src/utilities/nodejs_version.rs | 2 +- .../src/utilities/package_managers.rs | 2 +- .../src/utilities/process_output.rs | 2 +- apps/framework-cli/src/utilities/system.rs | 4 +- 95 files changed, 846 insertions(+), 520 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index efb795dffe..aa23183992 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1243,16 +1243,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "fern" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4316185f709b23713e41e3195f90edef7fb00c3ed4adc79769cf09cc762a3b29" -dependencies = [ - "chrono", - "log", -] - [[package]] name = "filetime" version = "0.2.26" @@ -2764,7 +2754,6 @@ dependencies = [ "crossterm 0.27.0", "csv", "dotenvy", - "fern", "flate2", "futures", "git2", @@ -2781,14 +2770,13 @@ dependencies = [ "jsonwebtoken", "keyring", "lazy_static", - "log", "logos", "notify", "num-bigint", "num-traits", "openssl", "opentelemetry 0.29.1", - "opentelemetry-appender-log", + "opentelemetry-appender-tracing", "opentelemetry-http 0.29.0", "opentelemetry-otlp 0.29.0", "opentelemetry-semantic-conventions", @@ -2833,6 +2821,7 @@ dependencies = [ "toml_edit 0.22.27", "tonic", "tracing", + "tracing-subscriber", "uuid", "walkdir", ] @@ -3141,13 +3130,15 @@ dependencies = [ ] [[package]] -name = "opentelemetry-appender-log" -version = "0.29.0" +name = "opentelemetry-appender-tracing" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9268ffd1e361eb0cc835d3daad47a52e0159cea7495570ee7c22f7f72cc00dd1" +checksum = "e716f864eb23007bdd9dc4aec381e188a1cee28eecf22066772b5fd822b9727d" dependencies = [ - "log", "opentelemetry 0.29.1", + "tracing", + "tracing-core", + "tracing-subscriber", ] [[package]] @@ -5901,6 +5892,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.20" @@ -5912,12 +5913,15 @@ dependencies = [ "once_cell", "parking_lot", "regex-automata", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] [[package]] diff --git a/apps/framework-cli/Cargo.toml b/apps/framework-cli/Cargo.toml index 1af2883114..2d3a86015e 100644 --- a/apps/framework-cli/Cargo.toml +++ b/apps/framework-cli/Cargo.toml @@ -32,8 +32,6 @@ handlebars = "5.1" rdkafka = { version = "0.38", features = ["ssl"] } rdkafka-sys = "4.7" # Needed for rd_kafka_wait_destroyed convert_case = "0.6.0" -log = "0.4" -fern = { version = "0.7", features = ["date-based"] } humantime = "2.1.0" uuid = { version = "1.6", features = ["v4", "serde"] } serde_json = { version = "1.0.133", features = ["preserve_order"] } @@ -85,6 +83,7 @@ logos = "0.15.0" # Monitoring tracing = "0.1.40" +tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "fmt"] } opentelemetry_sdk = { version = "0.29.0", features = [ "logs", "metrics", @@ -93,7 +92,7 @@ opentelemetry_sdk = { version = "0.29.0", features = [ ] } opentelemetry-semantic-conventions = "0.29" opentelemetry = "0.29" -opentelemetry-appender-log = "0.29" +opentelemetry-appender-tracing = "0.29.0" opentelemetry-otlp = { version = "0.29.0", default-features = false, features = [ "logs", "metrics", diff --git a/apps/framework-cli/src/cli.rs b/apps/framework-cli/src/cli.rs index 87a2de1ad6..b0c7aaaefe 100644 --- a/apps/framework-cli/src/cli.rs +++ b/apps/framework-cli/src/cli.rs @@ -18,7 +18,6 @@ use commands::{ }; use config::ConfigError; use display::with_spinner_completion; -use log::{debug, info, warn}; use regex::Regex; use routines::auth::generate_hash_token; use routines::build::build_package; @@ -34,6 +33,7 @@ use routines::scripts::{ terminate_workflow, unpause_workflow, }; use routines::templates::list_available_templates; +use tracing::{debug, info, warn}; use settings::Settings; use std::collections::HashMap; diff --git a/apps/framework-cli/src/cli/display/infrastructure.rs b/apps/framework-cli/src/cli/display/infrastructure.rs index 88e561b16b..85b4521c2d 100644 --- a/apps/framework-cli/src/cli/display/infrastructure.rs +++ b/apps/framework-cli/src/cli/display/infrastructure.rs @@ -36,7 +36,7 @@ use crate::framework::core::{ plan::InfraPlan, }; use crossterm::{execute, style::Print}; -use log::info; +use tracing::info; /// Create the detail indentation string at compile time /// Computed from ACTION_WIDTH (15) + 3 spaces: diff --git a/apps/framework-cli/src/cli/display/message_display.rs b/apps/framework-cli/src/cli/display/message_display.rs index 8061836679..909c50955b 100644 --- a/apps/framework-cli/src/cli/display/message_display.rs +++ b/apps/framework-cli/src/cli/display/message_display.rs @@ -7,7 +7,7 @@ use super::{ message::{Message, MessageType}, terminal::{write_styled_line, StyledText}, }; -use log::info; +use tracing::info; /// Displays a message about a batch database insertion. /// diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index 840252b1a9..bb037676fb 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -60,8 +60,6 @@ use hyper::Response; use hyper::StatusCode; use hyper_util::rt::TokioIo; use hyper_util::{rt::TokioExecutor, server::conn::auto}; -use log::{debug, log, trace}; -use log::{error, info, warn}; use rdkafka::error::KafkaError; use rdkafka::producer::future_producer::OwnedDeliveryResult; use rdkafka::producer::{DeliveryFuture, FutureProducer, FutureRecord, Producer}; @@ -73,12 +71,12 @@ use serde::Serialize; use serde::{Deserialize, Deserializer}; use serde_json::{json, Deserializer as JsonDeserializer, Value}; use tokio::spawn; +use tracing::{debug, error, info, trace, warn}; use crate::framework::data_model::model::DataModel; use crate::utilities::validate_passthrough::{DataModelArrayVisitor, DataModelVisitor}; use hyper_util::server::graceful::GracefulShutdown; use lazy_static::lazy_static; -use log::Level::{Debug, Trace}; use std::collections::{HashMap, HashSet}; use std::env; use std::env::VarError; @@ -1295,7 +1293,7 @@ async fn send_to_kafka>>( let mut temp_res: Vec> = Vec::new(); for (count, payload) in records.enumerate() { - log::trace!("Sending payload {:?} to topic: {}", payload, topic_name); + tracing::trace!("Sending payload {:?} to topic: {}", payload, topic_name); let record = FutureRecord::to(topic_name) .key(topic_name) // This should probably be generated by the client that pushes data to the API .payload(payload.as_slice()); @@ -1946,17 +1944,21 @@ async fn management_router( req: Request, max_request_body_size: usize, ) -> Result>, hyper::http::Error> { - let level = if req.uri().path().ends_with(METRICS_LOGS_PATH) { - Trace // too many lines of log created without user interaction + // Use appropriate log level based on path + // TRACE for metrics logs to reduce noise, DEBUG for other requests + if req.uri().path().ends_with(METRICS_LOGS_PATH) { + tracing::trace!( + "-> HTTP Request: {:?} - {:?}", + req.method(), + req.uri().path(), + ); } else { - Debug - }; - log!( - level, - "-> HTTP Request: {:?} - {:?}", - req.method(), - req.uri().path(), - ); + tracing::debug!( + "-> HTTP Request: {:?} - {:?}", + req.method(), + req.uri().path(), + ); + } let route = get_path_without_prefix(PathBuf::from(req.uri().path()), path_prefix); let route = route.to_str().unwrap(); @@ -2210,7 +2212,7 @@ impl Webserver { route_table: &'static RwLock>, consumption_apis: &'static RwLock>, ) -> mpsc::Sender<(InfrastructureMap, ApiChange)> { - log::info!("Spawning API update listener"); + tracing::info!("Spawning API update listener"); let (tx, mut rx) = mpsc::channel::<(InfrastructureMap, ApiChange)>(32); @@ -2219,7 +2221,7 @@ impl Webserver { let mut route_table = route_table.write().await; match api_change { ApiChange::ApiEndpoint(Change::Added(api_endpoint)) => { - log::info!("Adding route: {:?}", api_endpoint.path); + tracing::info!("Adding route: {:?}", api_endpoint.path); match api_endpoint.api_type { APIType::INGRESS { target_topic_id, @@ -2273,7 +2275,7 @@ impl Webserver { } } ApiChange::ApiEndpoint(Change::Removed(api_endpoint)) => { - log::info!("Removing route: {:?}", api_endpoint.path); + tracing::info!("Removing route: {:?}", api_endpoint.path); match api_endpoint.api_type { APIType::INGRESS { .. } => { route_table.remove(&api_endpoint.path); @@ -2294,7 +2296,7 @@ impl Webserver { data_model, schema: _, } => { - log::info!("Replacing route: {:?} with {:?}", before, after); + tracing::info!("Replacing route: {:?} with {:?}", before, after); let topic = infra_map .find_topic_by_id(target_topic_id) @@ -2347,28 +2349,28 @@ impl Webserver { &self, web_apps: &'static RwLock>, ) -> mpsc::Sender { - log::info!("Spawning WebApp update listener"); + tracing::info!("Spawning WebApp update listener"); let (tx, mut rx) = mpsc::channel::(32); tokio::spawn(async move { while let Some(webapp_change) = rx.recv().await { - log::info!("🔔 Received WebApp change: {:?}", webapp_change); + tracing::info!("🔔 Received WebApp change: {:?}", webapp_change); match webapp_change { crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Added(webapp), ) => { - log::info!("Adding WebApp mount path: {:?}", webapp.mount_path); + tracing::info!("Adding WebApp mount path: {:?}", webapp.mount_path); web_apps.write().await.insert(webapp.mount_path.clone()); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Removed(webapp), ) => { - log::info!("Removing WebApp mount path: {:?}", webapp.mount_path); + tracing::info!("Removing WebApp mount path: {:?}", webapp.mount_path); web_apps.write().await.remove(&webapp.mount_path); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } crate::framework::core::infrastructure_map::WebAppChange::WebApp( crate::framework::core::infrastructure_map::Change::Updated { @@ -2376,7 +2378,7 @@ impl Webserver { after, }, ) => { - log::info!( + tracing::info!( "Updating WebApp mount path: {:?} to {:?}", before.mount_path, after.mount_path @@ -2385,7 +2387,7 @@ impl Webserver { web_apps_guard.remove(&before.mount_path); web_apps_guard.insert(after.mount_path.clone()); drop(web_apps_guard); - log::info!("✅ Current web_apps: {:?}", *web_apps.read().await); + tracing::info!("✅ Current web_apps: {:?}", *web_apps.read().await); } } } diff --git a/apps/framework-cli/src/cli/logger.rs b/apps/framework-cli/src/cli/logger.rs index 684c5fefbf..157a9864f5 100644 --- a/apps/framework-cli/src/cli/logger.rs +++ b/apps/framework-cli/src/cli/logger.rs @@ -1,21 +1,64 @@ //! # Logger Module //! -//! This module provides logging functionality for the application. +//! This module provides logging functionality using `tracing-subscriber` with support for +//! dynamic log filtering via `RUST_LOG` and dual format support (legacy/modern). +//! +//! ## Architecture +//! +//! The logging system is built using `tracing-subscriber` layers: +//! - **EnvFilter Layer**: Provides `RUST_LOG` support for module-level filtering +//! - **Format Layer**: Either legacy (fern-compatible) or modern (tracing native) format +//! - **OTEL Layer**: Optional OpenTelemetry export for observability platforms //! //! ## Components //! //! - `LoggerLevel`: An enumeration representing the different levels of logging: DEBUG, INFO, WARN, and ERROR. -//! - `LoggerSettings`: A struct that holds the settings for the logger, including the log file's name and the logging level. +//! - `LogFormat`: Either Text or JSON output format. +//! - `LoggerSettings`: A struct that holds the settings for the logger, including format, level, and export options. //! - `setup_logging`: A function used to set up the logging system with the provided settings. +//! - `LegacyFormatLayer`: Custom layer that matches the old fern format exactly (for backward compatibility). +//! +//! ## Features +//! +//! ### RUST_LOG Support +//! Use the standard Rust `RUST_LOG` environment variable for dynamic filtering: +//! ```bash +//! RUST_LOG=moose_cli::infrastructure=debug cargo run +//! RUST_LOG=debug cargo run # Enable debug for all modules +//! ``` +//! +//! ### Dual Format Support +//! - **Legacy Format** (default): Maintains exact compatibility with the old fern-based logging +//! - Text: `[timestamp LEVEL - target] message` +//! - JSON: `{"timestamp": "...", "severity": "INFO", "target": "...", "message": "..."}` +//! - **Modern Format** (opt-in): Uses tracing-subscriber's native formatting +//! - Enable via `MOOSE_LOGGER__USE_TRACING_FORMAT=true` +//! +//! ### Additional Features +//! - **Date-based file rotation**: Daily log files in `~/.moose/YYYY-MM-DD-cli.log` +//! - **Automatic cleanup**: Deletes logs older than 7 days +//! - **Session ID tracking**: Optional per-session identifier in logs +//! - **Machine ID tracking**: Included in every log event +//! - **OpenTelemetry export**: Optional OTLP/HTTP JSON export to observability platforms +//! - **Configurable outputs**: File and/or stdout +//! +//! ## Environment Variables +//! +//! - `RUST_LOG`: Standard Rust log filtering (e.g., `RUST_LOG=moose_cli::infrastructure=debug`) +//! - `MOOSE_LOGGER__USE_TRACING_FORMAT`: Opt-in to modern format (default: `false`) +//! - `MOOSE_LOGGER__LEVEL`: Log level (DEBUG, INFO, WARN, ERROR) +//! - `MOOSE_LOGGER__STDOUT`: Output to stdout vs file (default: `false`) +//! - `MOOSE_LOGGER__FORMAT`: Text or JSON (default: Text) +//! - `MOOSE_LOGGER__EXPORT_TO`: OTEL endpoint URL +//! - `MOOSE_LOGGER__INCLUDE_SESSION_ID`: Include session ID in logs (default: `false`) //! //! ## Usage //! //! The logger is configured by creating a `LoggerSettings` instance and passing it to the `setup_logging` function. -//! The `LoggerSettings` can be configured with a log file and a log level. If these are not provided, default values are used. -//! The default log file is "cli.log" in the user's directory, and the default log level is INFO. -//! Use the macros to write to the log file. +//! Default values are provided for all settings. Use the `tracing::` macros to write logs. +//! +//! ### Log Levels //! -//! The log levels have the following uses: //! - `DEBUG`: Use this level for detailed information typically of use only when diagnosing problems. You would usually only expect to see these logs in a development environment. For example, you might log method entry/exit points, variable values, query results, etc. //! - `INFO`: Use this level to confirm that things are working as expected. This is the default log level and will give you general operational insights into the application behavior. For example, you might log start/stop of a process, configuration details, successful completion of significant transactions, etc. //! - `WARN`: Use this level when something unexpected happened in the system, or there might be a problem in the near future (like 'disk space low'). The software is still working as expected, so it's not an error. For example, you might log deprecated API usage, poor performance issues, retrying an operation, etc. @@ -24,31 +67,42 @@ //! ## Example //! //! ```rust +//! use tracing::{debug, info, warn, error}; +//! //! debug!("This is a DEBUG message. Typically used for detailed information useful in a development environment."); //! info!("This is an INFO message. Used to confirm that things are working as expected."); //! warn!("This is a WARN message. Indicates something unexpected happened or there might be a problem in the near future."); //! error!("This is an ERROR message. Used when the system is in distress, customers are probably being affected but the program is not terminated."); //! ``` //! +//! ## Backward Compatibility +//! +//! The legacy format layer ensures 100% backward compatibility with systems consuming the old +//! fern-based log format (e.g., Boreal/hosting_telemetry). The modern format can be enabled +//! via environment variable once downstream consumers are ready. use hyper::Uri; -use log::{error, warn}; -use log::{LevelFilter, Metadata, Record}; -use opentelemetry::logs::Logger; use opentelemetry::KeyValue; -use opentelemetry_appender_log::OpenTelemetryLogBridge; -use opentelemetry_otlp::{Protocol, WithExportConfig, WithHttpConfig}; -use opentelemetry_sdk::logs::SdkLoggerProvider; +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_otlp::{Protocol, WithExportConfig}; +use opentelemetry_sdk::logs::{BatchLogProcessor, SdkLoggerProvider}; use opentelemetry_sdk::Resource; use opentelemetry_semantic_conventions::resource::SERVICE_NAME; use serde::Deserialize; -use serde_json::Value; use std::env; -use std::env::VarError; +use std::fmt; +use std::io::Write; use std::time::{Duration, SystemTime}; +use tracing::field::{Field, Visit}; +use tracing::{warn, Event, Level, Subscriber}; +use tracing_subscriber::filter::LevelFilter; +use tracing_subscriber::fmt::MakeWriter; +use tracing_subscriber::layer::{Context, SubscriberExt}; +use tracing_subscriber::registry::LookupSpan; +use tracing_subscriber::util::SubscriberInitExt; +use tracing_subscriber::{EnvFilter, Layer}; use crate::utilities::constants::{CONTEXT, CTX_SESSION_ID}; -use crate::utilities::decode_object; use super::settings::user_directory; @@ -68,12 +122,12 @@ pub enum LoggerLevel { } impl LoggerLevel { - pub fn to_log_level(&self) -> log::LevelFilter { + pub fn to_tracing_level(&self) -> LevelFilter { match self { - LoggerLevel::Debug => log::LevelFilter::Debug, - LoggerLevel::Info => log::LevelFilter::Info, - LoggerLevel::Warn => log::LevelFilter::Warn, - LoggerLevel::Error => log::LevelFilter::Error, + LoggerLevel::Debug => LevelFilter::DEBUG, + LoggerLevel::Info => LevelFilter::INFO, + LoggerLevel::Warn => LevelFilter::WARN, + LoggerLevel::Error => LevelFilter::ERROR, } } } @@ -101,6 +155,9 @@ pub struct LoggerSettings { #[serde(default = "default_include_session_id")] pub include_session_id: bool, + + #[serde(default = "default_use_tracing_format")] + pub use_tracing_format: bool, } fn parsing_url<'de, D>(deserializer: D) -> Result, D::Error> @@ -131,6 +188,13 @@ fn default_include_session_id() -> bool { false } +fn default_use_tracing_format() -> bool { + env::var("MOOSE_LOGGER__USE_TRACING_FORMAT") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(false) +} + impl Default for LoggerSettings { fn default() -> Self { LoggerSettings { @@ -140,6 +204,7 @@ impl Default for LoggerSettings { format: default_log_format(), export_to: None, include_session_id: default_include_session_id(), + use_tracing_format: default_use_tracing_format(), } } } @@ -193,158 +258,410 @@ fn clean_old_logs() { // Error that rolls up all the possible errors that can occur during logging setup #[derive(thiserror::Error, Debug)] pub enum LoggerError { - #[error("Error Initializing fern logger")] - Init(#[from] fern::InitError), - #[error("Error setting up otel logger")] - Exporter(#[from] opentelemetry_sdk::error::OTelSdkError), - #[error("Error building the exporter")] - ExporterBuild(#[from] opentelemetry_otlp::ExporterBuildError), - #[error("Error setting up default logger")] - LogSetup(#[from] log::SetLoggerError), + #[error("Error setting up OTEL logger: {0}")] + OtelSetup(String), } -pub fn setup_logging(settings: &LoggerSettings, machine_id: &str) -> Result<(), LoggerError> { - clean_old_logs(); +/// Custom fields that get injected into every log event +#[derive(Clone)] +struct CustomFields { + session_id: String, + #[allow(dead_code)] // Will be used when OTEL support is re-enabled + machine_id: String, +} - let session_id = CONTEXT.get(CTX_SESSION_ID).unwrap(); - let include_session_id = settings.include_session_id; - - let base_config = fern::Dispatch::new().level(settings.level.to_log_level()); - - let format_config = if settings.format == LogFormat::Text { - fern::Dispatch::new().format(move |out, message, record| { - out.finish(format_args!( - "[{} {}{} - {}] {}", - humantime::format_rfc3339_seconds(SystemTime::now()), - record.level(), - if include_session_id { - format!(" {}", &session_id) - } else { - String::new() - }, - record.target(), - message - )) - }) - } else { - fern::Dispatch::new().format(move |out, message, record| { - let mut log_json = serde_json::json!({ - "timestamp": chrono::Utc::now().to_rfc3339(), - "severity": record.level().to_string(), - "target": record.target(), - "message": message, - }); - - if include_session_id { - log_json["session_id"] = serde_json::Value::String(session_id.to_string()); +/// Layer that formats logs to match the legacy fern format exactly +struct LegacyFormatLayer { + writer: W, + format: LogFormat, + include_session_id: bool, + custom_fields: CustomFields, +} + +impl LegacyFormatLayer { + fn new( + writer: W, + format: LogFormat, + include_session_id: bool, + custom_fields: CustomFields, + ) -> Self { + Self { + writer, + format, + include_session_id, + custom_fields, + } + } + + fn format_text(&self, level: &Level, target: &str, message: &str) -> String { + // Match current fern text format exactly + format!( + "[{} {}{} - {}] {}", + humantime::format_rfc3339_seconds(SystemTime::now()), + level, + if self.include_session_id { + format!(" {}", self.custom_fields.session_id) + } else { + String::new() + }, + target, + message + ) + } + + fn format_json(&self, level: &Level, target: &str, message: &str) -> String { + // Match current fern JSON format exactly + let mut log_json = serde_json::json!({ + "timestamp": chrono::Utc::now().to_rfc3339(), + "severity": level.to_string(), + "target": target, + "message": message, + }); + + if self.include_session_id { + log_json["session_id"] = + serde_json::Value::String(self.custom_fields.session_id.clone()); + } + + serde_json::to_string(&log_json) + .expect("formatting `serde_json::Value` with string keys never fails") + } +} + +impl Layer for LegacyFormatLayer +where + S: Subscriber + for<'a> LookupSpan<'a>, + W: for<'writer> MakeWriter<'writer> + 'static, +{ + fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { + // Extract metadata + let metadata = event.metadata(); + let level = metadata.level(); + let target = metadata.target(); + + // Extract message using visitor + let mut visitor = MessageVisitor::default(); + event.record(&mut visitor); + let message = visitor.message; + + // Format based on LogFormat + let output = if self.format == LogFormat::Text { + self.format_text(level, target, &message) + } else { + self.format_json(level, target, &message) + }; + + // Write to output + let mut writer = self.writer.make_writer(); + let _ = writer.write_all(output.as_bytes()); + let _ = writer.write_all(b"\n"); + } +} + +#[derive(Default)] +struct MessageVisitor { + message: String, +} + +impl Visit for MessageVisitor { + fn record_debug(&mut self, field: &Field, value: &dyn fmt::Debug) { + if field.name() == "message" { + self.message = format!("{:?}", value); + // Remove surrounding quotes from debug format + if self.message.starts_with('"') && self.message.ends_with('"') { + self.message = self.message[1..self.message.len() - 1].to_string(); } + } + } +} - out.finish(format_args!( - "{}", - serde_json::to_string(&log_json) - .expect("formatting `serde_json::Value` with string keys never fails") - )) - }) - }; +/// Custom MakeWriter that creates log files with user-specified date format +/// +/// This maintains backward compatibility with fern's DateBased rotation by allowing +/// custom date format strings like "%Y-%m-%d-cli.log" to produce "2025-11-25-cli.log" +struct DateBasedWriter { + date_format: String, +} - let output_config = if settings.stdout { - format_config.chain(std::io::stdout()) - } else { - format_config.chain(fern::DateBased::new( - // `.join("")` is an idempotent way to ensure the path ends with '/' - user_directory().join("").to_str().unwrap(), - settings.log_file_date_format.clone(), - )) - }; +impl DateBasedWriter { + fn new(date_format: String) -> Self { + Self { date_format } + } +} + +impl<'a> MakeWriter<'a> for DateBasedWriter { + type Writer = std::fs::File; + + fn make_writer(&'a self) -> Self::Writer { + let formatted_name = chrono::Local::now().format(&self.date_format).to_string(); + let file_path = user_directory().join(&formatted_name); + + std::fs::OpenOptions::new() + .create(true) + .append(true) + .open(file_path) + .expect("Failed to open log file") + } +} + +/// Creates a rolling file appender with custom date format +/// +/// This function creates a file appender that respects the configured date format +/// for log file naming, maintaining backward compatibility with fern's DateBased rotation. +fn create_rolling_file_appender(date_format: &str) -> DateBasedWriter { + DateBasedWriter::new(date_format.to_string()) +} - let output_config = match &settings.export_to { - None => output_config, - Some(otel_endpoint) => { - let string_uri = otel_endpoint.to_string(); - let reqwest_client = reqwest::blocking::Client::new(); - - let open_telemetry_exporter = opentelemetry_otlp::LogExporter::builder() - .with_http() - .with_http_client(reqwest_client) - .with_endpoint(string_uri) - .with_protocol(Protocol::HttpJson) - .with_timeout(Duration::from_millis(5000)) - .build()?; - - let mut resource_attributes = vec![ - KeyValue::new(SERVICE_NAME, "moose-cli"), - KeyValue::new("session_id", session_id.as_str()), - KeyValue::new("machine_id", String::from(machine_id)), - ]; - match env::var("MOOSE_METRIC__LABELS") { - Ok(base64) => match decode_object::decode_base64_to_json(&base64) { - Ok(Value::Object(labels)) => { - for (key, value) in labels { - if let Some(value_str) = value.as_str() { - resource_attributes.push(KeyValue::new(key, value_str.to_string())); - } - } +/// Creates an OpenTelemetry layer for log export +/// +/// This function sets up OTLP log export using opentelemetry-appender-tracing. +/// It creates a LoggerProvider with a batch processor and OTLP exporter. +fn create_otel_layer( + endpoint: &Uri, + session_id: &str, + machine_id: &str, +) -> Result, LoggerError> { + use crate::utilities::decode_object; + use serde_json::Value; + use std::env::VarError; + + // Create base resource attributes + let mut resource_attributes = vec![ + KeyValue::new(SERVICE_NAME, "moose-cli"), + KeyValue::new("session_id", session_id.to_string()), + KeyValue::new("machine_id", machine_id.to_string()), + ]; + + // Add labels from MOOSE_METRIC__LABELS environment variable + match env::var("MOOSE_METRIC__LABELS") { + Ok(base64) => match decode_object::decode_base64_to_json(&base64) { + Ok(Value::Object(labels)) => { + for (key, value) in labels { + if let Some(value_str) = value.as_str() { + resource_attributes.push(KeyValue::new(key, value_str.to_string())); } - Ok(_) => warn!("Unexpected value for MOOSE_METRIC_LABELS"), - Err(e) => error!("Error decoding MOOSE_METRIC_LABELS: {}", e), - }, - Err(VarError::NotPresent) => {} - Err(VarError::NotUnicode(e)) => { - error!("MOOSE_METRIC__LABELS is not unicode: {:?}", e); } } - - let resource = Resource::builder() - .with_attributes(resource_attributes) - .build(); - let logger_provider = SdkLoggerProvider::builder() - .with_resource(resource) - .with_batch_exporter(open_telemetry_exporter) - .build(); - - let logger: Box = Box::new(TargetToKvLogger { - inner: OpenTelemetryLogBridge::new(&logger_provider), - }); - - fern::Dispatch::new().chain(output_config).chain( - fern::Dispatch::new() - // to prevent exporter recursively calls logging and thus itself - .level(LevelFilter::Off) - .level_for("moose_cli", settings.level.to_log_level()) - .chain(logger), - ) + Ok(_) => warn!("Unexpected value for MOOSE_METRIC_LABELS"), + Err(e) => { + warn!("Error decoding MOOSE_METRIC_LABELS: {}", e); + } + }, + Err(VarError::NotPresent) => {} + Err(VarError::NotUnicode(e)) => { + warn!("MOOSE_METRIC__LABELS is not unicode: {:?}", e); } - }; - base_config.chain(output_config).apply()?; + } - Ok(()) + // Create resource with all attributes + let resource = Resource::builder() + .with_attributes(resource_attributes) + .build(); + + // Build OTLP log exporter + let exporter = opentelemetry_otlp::LogExporter::builder() + .with_http() + .with_protocol(Protocol::HttpJson) + .with_endpoint(endpoint.to_string()) + .build() + .map_err(|e| LoggerError::OtelSetup(format!("Failed to build OTLP exporter: {}", e)))?; + + // Create logger provider with batch processor + let provider = SdkLoggerProvider::builder() + .with_resource(resource) + .with_log_processor(BatchLogProcessor::builder(exporter).build()) + .build(); + + // Create the tracing bridge layer + Ok(OpenTelemetryTracingBridge::new(&provider)) } -struct TargetToKvLogger -where - P: opentelemetry::logs::LoggerProvider + Send + Sync, - L: Logger + Send + Sync, -{ - inner: OpenTelemetryLogBridge, -} +pub fn setup_logging(settings: &LoggerSettings, machine_id: &str) -> Result<(), LoggerError> { + clean_old_logs(); -impl log::Log for TargetToKvLogger -where - P: opentelemetry::logs::LoggerProvider + Send + Sync, - L: Logger + Send + Sync, -{ - fn enabled(&self, metadata: &Metadata) -> bool { - self.inner.enabled(metadata) + let session_id = CONTEXT.get(CTX_SESSION_ID).unwrap(); + + // Create custom fields for use in formatters + let custom_fields = CustomFields { + session_id: session_id.to_string(), + machine_id: machine_id.to_string(), + }; + + // Setup logging based on format type + if settings.use_tracing_format { + // Modern format using tracing built-ins + setup_modern_format(settings, session_id, machine_id) + } else { + // Legacy format matching fern exactly + setup_legacy_format(settings, session_id, machine_id, custom_fields) } +} - fn log(&self, record: &Record) { - let mut with_target = record.to_builder(); - let kvs: &dyn log::kv::Source = &("target", record.target()); - with_target.key_values(kvs); - self.inner.log(&with_target.build()); +fn setup_modern_format( + settings: &LoggerSettings, + session_id: &str, + machine_id: &str, +) -> Result<(), LoggerError> { + let env_filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new(settings.level.to_tracing_level().to_string())); + + // Setup with or without OTEL based on configuration + if let Some(endpoint) = &settings.export_to { + let otel_layer = create_otel_layer(endpoint, session_id, machine_id)?; + + if settings.stdout { + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(std::io::stdout) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(file_appender) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } + } else { + // No OTEL export + if settings.stdout { + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(std::io::stdout) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let format_layer = tracing_subscriber::fmt::layer() + .with_writer(file_appender) + .with_target(true) + .with_level(true); + + if settings.format == LogFormat::Json { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.json()) + .init(); + } else { + tracing_subscriber::registry() + .with(env_filter) + .with(format_layer.compact()) + .init(); + } + } } - fn flush(&self) { - self.inner.flush() + Ok(()) +} + +fn setup_legacy_format( + settings: &LoggerSettings, + session_id: &str, + machine_id: &str, + custom_fields: CustomFields, +) -> Result<(), LoggerError> { + let env_filter = EnvFilter::try_from_default_env() + .unwrap_or_else(|_| EnvFilter::new(settings.level.to_tracing_level().to_string())); + + // Setup with or without OTEL based on configuration + if let Some(endpoint) = &settings.export_to { + let otel_layer = create_otel_layer(endpoint, session_id, machine_id)?; + + if settings.stdout { + let legacy_layer = LegacyFormatLayer::new( + std::io::stdout, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(legacy_layer) + .init(); + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let legacy_layer = LegacyFormatLayer::new( + file_appender, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(otel_layer) + .with(env_filter) + .with(legacy_layer) + .init(); + } + } else { + // No OTEL export + if settings.stdout { + let legacy_layer = LegacyFormatLayer::new( + std::io::stdout, + settings.format.clone(), + settings.include_session_id, + custom_fields.clone(), + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } else { + let file_appender = create_rolling_file_appender(&settings.log_file_date_format); + let legacy_layer = LegacyFormatLayer::new( + file_appender, + settings.format.clone(), + settings.include_session_id, + custom_fields, + ); + + tracing_subscriber::registry() + .with(env_filter) + .with(legacy_layer) + .init(); + } } + + Ok(()) } diff --git a/apps/framework-cli/src/cli/processing_coordinator.rs b/apps/framework-cli/src/cli/processing_coordinator.rs index e865eef358..195ba7e8e5 100644 --- a/apps/framework-cli/src/cli/processing_coordinator.rs +++ b/apps/framework-cli/src/cli/processing_coordinator.rs @@ -63,9 +63,9 @@ impl ProcessingCoordinator { /// // Guard drops here, releasing write lock /// ``` pub async fn begin_processing(&self) -> ProcessingGuard { - log::debug!("[ProcessingCoordinator] Acquiring write lock for processing"); + tracing::debug!("[ProcessingCoordinator] Acquiring write lock for processing"); let write_guard = self.lock.clone().write_owned().await; - log::debug!("[ProcessingCoordinator] Write lock acquired, processing started"); + tracing::debug!("[ProcessingCoordinator] Write lock acquired, processing started"); ProcessingGuard { _write_guard: write_guard, @@ -86,9 +86,9 @@ impl ProcessingCoordinator { /// // Now safe to read from Redis, ClickHouse, etc. /// ``` pub async fn wait_for_stable_state(&self) { - log::trace!("[ProcessingCoordinator] Waiting for stable state (acquiring read lock)"); + tracing::trace!("[ProcessingCoordinator] Waiting for stable state (acquiring read lock)"); let _read_guard = self.lock.read().await; - log::trace!("[ProcessingCoordinator] State is stable (read lock acquired)"); + tracing::trace!("[ProcessingCoordinator] State is stable (read lock acquired)"); // Read lock is dropped here, allowing processing to proceed if needed } } @@ -109,7 +109,7 @@ pub struct ProcessingGuard { impl Drop for ProcessingGuard { fn drop(&mut self) { - log::debug!("[ProcessingCoordinator] Processing complete, releasing write lock"); + tracing::debug!("[ProcessingCoordinator] Processing complete, releasing write lock"); // Write guard drops automatically, releasing the lock } } diff --git a/apps/framework-cli/src/cli/routines/build.rs b/apps/framework-cli/src/cli/routines/build.rs index 441425f1f8..4109d9ea06 100644 --- a/apps/framework-cli/src/cli/routines/build.rs +++ b/apps/framework-cli/src/cli/routines/build.rs @@ -26,11 +26,11 @@ /// } /// ``` use chrono::Local; -use log::{debug, error, info}; use std::fs; use std::path::Path; use std::path::PathBuf; use std::process::Command; +use tracing::{debug, error, info}; use crate::framework::languages::SupportedLanguages; use crate::project::Project; diff --git a/apps/framework-cli/src/cli/routines/clean.rs b/apps/framework-cli/src/cli/routines/clean.rs index 540c676568..3bfdd472c6 100644 --- a/apps/framework-cli/src/cli/routines/clean.rs +++ b/apps/framework-cli/src/cli/routines/clean.rs @@ -1,6 +1,6 @@ use crate::utilities::docker::DockerClient; use crate::{cli::display::Message, cli::settings::Settings, project::Project}; -use log::info; +use tracing::info; use super::util::ensure_docker_running; use super::{RoutineFailure, RoutineSuccess}; diff --git a/apps/framework-cli/src/cli/routines/code_generation.rs b/apps/framework-cli/src/cli/routines/code_generation.rs index 80b8baf3d6..e5f4e0a8d5 100644 --- a/apps/framework-cli/src/cli/routines/code_generation.rs +++ b/apps/framework-cli/src/cli/routines/code_generation.rs @@ -15,11 +15,11 @@ use crate::utilities::constants::{ }; use crate::utilities::git::create_code_generation_commit; use clickhouse::Client; -use log::debug; use std::borrow::Cow; use std::env; use std::io::Write; use std::path::Path; +use tracing::debug; pub fn prompt_user_for_remote_ch_http() -> Result { let base = prompt_user( diff --git a/apps/framework-cli/src/cli/routines/dev.rs b/apps/framework-cli/src/cli/routines/dev.rs index f602a35057..c62b271155 100644 --- a/apps/framework-cli/src/cli/routines/dev.rs +++ b/apps/framework-cli/src/cli/routines/dev.rs @@ -17,7 +17,7 @@ pub fn run_local_infrastructure( docker_client: &DockerClient, ) -> anyhow::Result<()> { // Debug log to check load_infra value at runtime - log::info!( + tracing::info!( "[moose] DEBUG: load_infra from config: {:?}, should_load_infra(): {}", project.load_infra, project.should_load_infra() diff --git a/apps/framework-cli/src/cli/routines/docker_packager.rs b/apps/framework-cli/src/cli/routines/docker_packager.rs index 262724ddef..7ecbda7c1d 100644 --- a/apps/framework-cli/src/cli/routines/docker_packager.rs +++ b/apps/framework-cli/src/cli/routines/docker_packager.rs @@ -12,10 +12,10 @@ use crate::utilities::package_managers::get_lock_file_path; use crate::utilities::{constants, system}; use crate::{cli::display::Message, project::Project}; -use log::{debug, error, info}; use serde_json::Value as JsonValue; use std::fs; use std::path::{Path, PathBuf}; +use tracing::{debug, error, info}; #[derive(Debug, Clone)] struct PackageInfo { diff --git a/apps/framework-cli/src/cli/routines/kafka_pull.rs b/apps/framework-cli/src/cli/routines/kafka_pull.rs index 535f8f947a..c7e9c863bb 100644 --- a/apps/framework-cli/src/cli/routines/kafka_pull.rs +++ b/apps/framework-cli/src/cli/routines/kafka_pull.rs @@ -10,7 +10,6 @@ use crate::framework::typescript::generate::sanitize_typescript_identifier; use crate::infrastructure::stream::kafka::client::fetch_topics; use crate::project::Project; use globset::{Glob, GlobMatcher}; -use log::{info, warn}; use schema_registry_client::rest::apis::Error as SchemaRegistryError; use schema_registry_client::rest::schema_registry_client::{ Client as SrClientTrait, SchemaRegistryClient, @@ -19,6 +18,7 @@ use serde_json::Value; use std::fs; use std::path::Path; use std::str::FromStr; +use tracing::{info, warn}; fn build_matcher(s: &str) -> Result { let matcher = Glob::new(s) diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 6edc7a8bea..1256a302e5 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -212,11 +212,11 @@ fn validate_table_databases_and_clusters( .map(|cs| cs.iter().map(|c| c.name.clone()).collect()) .unwrap_or_default(); - log::info!("Configured cluster names: {:?}", cluster_names); + tracing::info!("Configured cluster names: {:?}", cluster_names); // Helper to validate database and cluster options let mut validate = |db_opt: &Option, cluster_opt: &Option, table_name: &str| { - log::info!( + tracing::info!( "Validating table '{}' with cluster: {:?}", table_name, cluster_opt @@ -229,14 +229,14 @@ fn validate_table_databases_and_clusters( } // Validate cluster if let Some(cluster) = cluster_opt { - log::info!( + tracing::info!( "Checking if cluster '{}' is in {:?}", cluster, cluster_names ); // Fail if cluster is not in the configured list (or if list is empty) if cluster_names.is_empty() || !cluster_names.contains(cluster) { - log::info!("Cluster '{}' not found in configured clusters!", cluster); + tracing::info!("Cluster '{}' not found in configured clusters!", cluster); invalid_clusters.push((table_name.to_string(), cluster.clone())); } } @@ -448,7 +448,7 @@ async fn execute_operations( ); // Validate that all table databases and clusters are configured - log::info!( + tracing::info!( "Validating operations against config. Clusters: {:?}", project.clickhouse_config.clusters ); @@ -650,7 +650,7 @@ pub async fn execute_migration( // Always release lock explicitly before returning // This ensures cleanup happens even if any operation above failed if let Err(e) = state_storage.release_migration_lock().await { - log::warn!("Failed to release migration lock: {}", e); + tracing::warn!("Failed to release migration lock: {}", e); } result diff --git a/apps/framework-cli/src/cli/routines/mod.rs b/apps/framework-cli/src/cli/routines/mod.rs index 4f875ee89d..c25017e6c8 100644 --- a/apps/framework-cli/src/cli/routines/mod.rs +++ b/apps/framework-cli/src/cli/routines/mod.rs @@ -98,7 +98,6 @@ use crate::framework::core::migration_plan::{MigrationPlan, MigrationPlanWithBef use crate::framework::core::plan_validator; use crate::infrastructure::redis::redis_client::RedisClient; use crate::project::Project; -use log::{debug, error, info, warn}; use serde::Deserialize; use std::collections::{HashMap, HashSet}; use std::path::PathBuf; @@ -106,6 +105,7 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; use tokio::sync::RwLock; use tokio::time::{interval, Duration}; +use tracing::{debug, error, info, warn}; use super::super::metrics::Metrics; use super::local_webserver::{PlanRequest, PlanResponse, Webserver}; @@ -487,7 +487,9 @@ pub async fn start_development_mode( .to_string(), } ); - log::warn!("Failed to write suppression flag to config: {e:?}"); + tracing::warn!( + "Failed to write suppression flag to config: {e:?}" + ); } } None diff --git a/apps/framework-cli/src/cli/routines/peek.rs b/apps/framework-cli/src/cli/routines/peek.rs index affadb731c..8fc031e921 100644 --- a/apps/framework-cli/src/cli/routines/peek.rs +++ b/apps/framework-cli/src/cli/routines/peek.rs @@ -14,7 +14,6 @@ use super::{setup_redis_client, RoutineFailure, RoutineSuccess}; use crate::infrastructure::olap::clickhouse::model::ClickHouseTable; use crate::infrastructure::stream::kafka::client::create_consumer; use futures::stream::BoxStream; -use log::info; use rdkafka::consumer::{Consumer, StreamConsumer}; use rdkafka::{Message as KafkaMessage, Offset, TopicPartitionList}; use serde_json::Value; @@ -24,6 +23,7 @@ use std::time::Duration; use tokio::fs::File; use tokio::io::AsyncWriteExt; use tokio_stream::StreamExt; +use tracing::info; /// Retrieves and displays a sample of data from either a database table or streaming topic. /// @@ -267,7 +267,7 @@ pub async fn peek( success_count += 1; } Err(e) => { - log::error!("Failed to read row {}", e); + tracing::error!("Failed to read row {}", e); } } } diff --git a/apps/framework-cli/src/cli/routines/ps.rs b/apps/framework-cli/src/cli/routines/ps.rs index f584c33adf..512f8c4ddb 100644 --- a/apps/framework-cli/src/cli/routines/ps.rs +++ b/apps/framework-cli/src/cli/routines/ps.rs @@ -3,7 +3,7 @@ use std::{ sync::Arc, }; -use log::error; +use tracing::error; use crate::{ cli::display::{show_table, Message}, diff --git a/apps/framework-cli/src/cli/routines/query.rs b/apps/framework-cli/src/cli/routines/query.rs index 44405801c4..fbbc9ddd57 100644 --- a/apps/framework-cli/src/cli/routines/query.rs +++ b/apps/framework-cli/src/cli/routines/query.rs @@ -9,10 +9,10 @@ use crate::framework::core::infrastructure_map::InfrastructureMap; use crate::infrastructure::olap::clickhouse_http_client::create_query_client; use crate::project::Project; -use log::info; use std::io::Read; use std::path::PathBuf; use std::sync::Arc; +use tracing::info; /// Reads SQL query from argument, file, or stdin. /// diff --git a/apps/framework-cli/src/cli/routines/seed_data.rs b/apps/framework-cli/src/cli/routines/seed_data.rs index b827a5b617..979841d028 100644 --- a/apps/framework-cli/src/cli/routines/seed_data.rs +++ b/apps/framework-cli/src/cli/routines/seed_data.rs @@ -14,9 +14,9 @@ use crate::utilities::constants::KEY_REMOTE_CLICKHOUSE_URL; use crate::utilities::keyring::{KeyringSecretRepository, SecretRepository}; use crate::framework::core::infrastructure::table::Table; -use log::{debug, info, warn}; use std::cmp::min; use std::collections::HashSet; +use tracing::{debug, info, warn}; /// Validates that a database name is not empty fn validate_database_name(db_name: &str) -> Result<(), RoutineFailure> { diff --git a/apps/framework-cli/src/cli/routines/templates.rs b/apps/framework-cli/src/cli/routines/templates.rs index b6e7b80609..cfc79035b3 100644 --- a/apps/framework-cli/src/cli/routines/templates.rs +++ b/apps/framework-cli/src/cli/routines/templates.rs @@ -1,7 +1,6 @@ use flate2::read::GzDecoder; use futures::StreamExt; use home::home_dir; -use log::warn; use regex::Regex; use std::fs::File; use std::io::Write; @@ -10,6 +9,7 @@ use std::path::PathBuf; use std::sync::Arc; use tar::Archive; use toml::Value; +use tracing::warn; use super::RoutineFailure; use super::RoutineSuccess; diff --git a/apps/framework-cli/src/cli/routines/truncate_table.rs b/apps/framework-cli/src/cli/routines/truncate_table.rs index 3c35e1f416..29abe3f70b 100644 --- a/apps/framework-cli/src/cli/routines/truncate_table.rs +++ b/apps/framework-cli/src/cli/routines/truncate_table.rs @@ -4,7 +4,7 @@ use crate::infrastructure::olap::clickhouse::{ check_ready, create_client, extract_order_by_from_create_query, run_query, }; use crate::project::Project; -use log::{info, warn}; +use tracing::{info, warn}; fn escape_ident(ident: &str) -> String { ident.replace('`', "``") diff --git a/apps/framework-cli/src/cli/routines/validate.rs b/apps/framework-cli/src/cli/routines/validate.rs index fd433ac784..a3266ce8bb 100644 --- a/apps/framework-cli/src/cli/routines/validate.rs +++ b/apps/framework-cli/src/cli/routines/validate.rs @@ -1,4 +1,4 @@ -use log::debug; +use tracing::debug; use super::{RoutineFailure, RoutineSuccess}; use crate::cli::display::Message; diff --git a/apps/framework-cli/src/cli/settings.rs b/apps/framework-cli/src/cli/settings.rs index 3db16de5c3..e4a5773fc1 100644 --- a/apps/framework-cli/src/cli/settings.rs +++ b/apps/framework-cli/src/cli/settings.rs @@ -31,10 +31,10 @@ use config::{Config, ConfigError, Environment, File}; use home::home_dir; -use log::warn; use serde::Deserialize; use std::path::PathBuf; use toml_edit::{table, value, DocumentMut, Entry, Item}; +use tracing::warn; use super::display::{Message, MessageType}; use super::logger::LoggerSettings; diff --git a/apps/framework-cli/src/cli/watcher.rs b/apps/framework-cli/src/cli/watcher.rs index 398317557e..4c899f3555 100644 --- a/apps/framework-cli/src/cli/watcher.rs +++ b/apps/framework-cli/src/cli/watcher.rs @@ -18,7 +18,6 @@ /// 3. After a short delay (debouncing), changes are processed to update the infrastructure /// 4. The updated infrastructure is applied to the system use crate::framework; -use log::info; use notify::event::ModifyKind; use notify::{Event, EventHandler, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use std::collections::HashSet; @@ -26,6 +25,7 @@ use std::sync::Arc; use std::time::Duration; use std::{io::Error, path::PathBuf}; use tokio::sync::RwLock; +use tracing::info; use crate::framework::core::infrastructure_map::{ApiChange, InfrastructureMap}; @@ -48,7 +48,7 @@ struct EventListener { impl EventHandler for EventListener { fn handle_event(&mut self, event: notify::Result) { - log::debug!("Received Watcher event: {:?}", event); + tracing::debug!("Received Watcher event: {:?}", event); match event { Ok(event) => { self.tx.send_if_modified(|events| { @@ -57,7 +57,7 @@ impl EventHandler for EventListener { }); } Err(e) => { - log::error!("Watcher Error: {:?}", e); + tracing::error!("Watcher Error: {:?}", e); } } } @@ -133,7 +133,7 @@ async fn watch( processing_coordinator: ProcessingCoordinator, mut shutdown_rx: tokio::sync::watch::Receiver, ) -> Result<(), anyhow::Error> { - log::debug!( + tracing::debug!( "Starting file watcher for project: {:?}", project.app_dir().display() ); @@ -148,7 +148,7 @@ async fn watch( .watch(project.app_dir().as_ref(), RecursiveMode::Recursive) .map_err(|e| Error::other(format!("Failed to watch file: {e}")))?; - log::debug!("Watcher setup complete, entering main loop"); + tracing::debug!("Watcher setup complete, entering main loop"); loop { tokio::select! { @@ -157,7 +157,7 @@ async fn watch( return Ok(()); } Ok(()) = rx.changed() => { - log::debug!("Received change notification, current changes: {:?}", rx.borrow()); + tracing::debug!("Received change notification, current changes: {:?}", rx.borrow()); } _ = tokio::time::sleep(Duration::from_secs(1)) => { let should_process = { @@ -166,7 +166,7 @@ async fn watch( }; if should_process { - log::debug!("Debounce period elapsed, processing changes"); + tracing::debug!("Debounce period elapsed, processing changes"); receiver_ack.send_replace(EventBuckets::default()); rx.mark_unchanged(); diff --git a/apps/framework-cli/src/framework/consumption/loader.rs b/apps/framework-cli/src/framework/consumption/loader.rs index ac4fc6adf1..eeac713a83 100644 --- a/apps/framework-cli/src/framework/consumption/loader.rs +++ b/apps/framework-cli/src/framework/consumption/loader.rs @@ -35,7 +35,7 @@ pub async fn load_consumption(project: &Project) -> Result log::info!("Terminated running workflows: {:?}", success), - Err(e) => log::warn!("Failed to terminate running workflows: {:?}", e), + Ok(success) => tracing::info!("Terminated running workflows: {:?}", success), + Err(e) => tracing::warn!("Failed to terminate running workflows: {:?}", e), } execute_scheduled_workflows(project, &plan.target_infra_map.workflows).await; diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index 17320e03ca..e78c6de5ec 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -23,10 +23,10 @@ use crate::{ infrastructure::olap::{OlapChangesError, OlapOperations}, project::Project, }; -use log::debug; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use thiserror::Error; +use tracing::debug; /// Represents errors that can occur during infrastructure reality checking. #[derive(Debug, Error)] diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index 95ef3f7465..e33d7c23eb 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -684,18 +684,18 @@ impl InfrastructureMap { } } None => { - log::error!( + tracing::error!( "Could not find previous version with no change for data model: {} {}", data_model.name, data_model.version ); - log::debug!("Data Models Dump: {:?}", primitive_map.datamodels); + tracing::debug!("Data Models Dump: {:?}", primitive_map.datamodels); } } } if !project.features.streaming_engine && !primitive_map.functions.is_empty() { - log::error!("Streaming disabled. Functions are disabled."); + tracing::error!("Streaming disabled. Functions are disabled."); show_message_wrapper( MessageType::Error, Message { @@ -723,7 +723,7 @@ impl InfrastructureMap { // consumption api endpoints let consumption_api_web_server = ConsumptionApiWebServer {}; if !project.features.apis && !primitive_map.consumption.endpoint_files.is_empty() { - log::error!("Analytics APIs disabled. API endpoints will not be available."); + tracing::error!("Analytics APIs disabled. API endpoints will not be available."); show_message_wrapper( MessageType::Error, Message { @@ -969,7 +969,7 @@ impl InfrastructureMap { ); // Tables (using custom strategy) - log::info!("Analyzing changes in Tables..."); + tracing::info!("Analyzing changes in Tables..."); let olap_changes_len_before = changes.olap_changes.len(); Self::diff_tables_with_strategy( &self.tables, @@ -981,13 +981,13 @@ impl InfrastructureMap { ignore_ops, ); let table_changes = changes.olap_changes.len() - olap_changes_len_before; - log::info!("Table changes detected: {}", table_changes); + tracing::info!("Table changes detected: {}", table_changes); // Views Self::diff_views(&self.views, &target_map.views, &mut changes.olap_changes); // SQL Resources (needs tables context for MV population detection) - log::info!("Analyzing changes in SQL Resources..."); + tracing::info!("Analyzing changes in SQL Resources..."); let olap_changes_len_before = changes.olap_changes.len(); Self::diff_sql_resources( &self.sql_resources, @@ -997,13 +997,13 @@ impl InfrastructureMap { &mut changes.olap_changes, ); let sql_resource_changes = changes.olap_changes.len() - olap_changes_len_before; - log::info!("SQL Resource changes detected: {}", sql_resource_changes); + tracing::info!("SQL Resource changes detected: {}", sql_resource_changes); // All process types self.diff_all_processes(target_map, &mut changes.processes_changes); // Summary - log::info!( + tracing::info!( "Total changes detected - OLAP: {}, Processes: {}, API: {}, WebApps: {}, Streaming: {}", changes.olap_changes.len(), changes.processes_changes.len(), @@ -1033,7 +1033,7 @@ impl InfrastructureMap { streaming_changes: &mut Vec, respect_life_cycle: bool, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topics..."); + tracing::info!("Analyzing changes in Topics..."); let mut topic_updates = 0; let mut topic_removals = 0; let mut topic_additions = 0; @@ -1044,12 +1044,12 @@ impl InfrastructureMap { // Respect lifecycle: ExternallyManaged topics are never modified if target_topic.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Topic '{}' has changes but is externally managed - skipping update", topic.name ); } else { - log::debug!("Topic updated: {} ({})", topic.name, id); + tracing::debug!("Topic updated: {} ({})", topic.name, id); topic_updates += 1; streaming_changes.push(StreamingChange::Topic(Change::::Updated { before: Box::new(topic.clone()), @@ -1061,20 +1061,20 @@ impl InfrastructureMap { // Respect lifecycle: DeletionProtected and ExternallyManaged topics are never removed match (topic.life_cycle, respect_life_cycle) { (LifeCycle::FullyManaged, _) | (_, false) => { - log::debug!("Topic removed: {} ({})", topic.name, id); + tracing::debug!("Topic removed: {} ({})", topic.name, id); topic_removals += 1; streaming_changes.push(StreamingChange::Topic(Change::::Removed( Box::new(topic.clone()), ))); } (LifeCycle::DeletionProtected, true) => { - log::debug!( + tracing::debug!( "Topic '{}' marked for removal but is deletion-protected - skipping removal", topic.name ); } (LifeCycle::ExternallyManaged, true) => { - log::debug!( + tracing::debug!( "Topic '{}' marked for removal but is externally managed - skipping removal", topic.name ); @@ -1087,12 +1087,12 @@ impl InfrastructureMap { if !self_topics.contains_key(id) { // Respect lifecycle: ExternallyManaged topics are never added automatically if topic.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Topic '{}' marked for addition but is externally managed - skipping addition", topic.name ); } else { - log::debug!("Topic added: {} ({})", topic.name, id); + tracing::debug!("Topic added: {} ({})", topic.name, id); topic_additions += 1; streaming_changes.push(StreamingChange::Topic(Change::::Added( Box::new(topic.clone()), @@ -1101,7 +1101,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic changes: {} added, {} removed, {} updated", topic_additions, topic_removals, @@ -1128,7 +1128,7 @@ impl InfrastructureMap { target_endpoints: &HashMap, api_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in API Endpoints..."); + tracing::info!("Analyzing changes in API Endpoints..."); let mut endpoint_updates = 0; let mut endpoint_removals = 0; let mut endpoint_additions = 0; @@ -1136,7 +1136,7 @@ impl InfrastructureMap { for (id, endpoint) in self_endpoints { if let Some(target_endpoint) = target_endpoints.get(id) { if !api_endpoints_equal_ignore_metadata(endpoint, target_endpoint) { - log::debug!("API Endpoint updated: {}", id); + tracing::debug!("API Endpoint updated: {}", id); endpoint_updates += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Updated { before: Box::new(endpoint.clone()), @@ -1144,7 +1144,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("API Endpoint removed: {}", id); + tracing::debug!("API Endpoint removed: {}", id); endpoint_removals += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Removed( Box::new(endpoint.clone()), @@ -1154,7 +1154,7 @@ impl InfrastructureMap { for (id, endpoint) in target_endpoints { if !self_endpoints.contains_key(id) { - log::debug!("API Endpoint added: {}", id); + tracing::debug!("API Endpoint added: {}", id); endpoint_additions += 1; api_changes.push(ApiChange::ApiEndpoint(Change::::Added( Box::new(endpoint.clone()), @@ -1162,7 +1162,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "API Endpoint changes: {} added, {} removed, {} updated", endpoint_additions, endpoint_removals, @@ -1189,7 +1189,7 @@ impl InfrastructureMap { target_web_apps: &HashMap, web_app_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in WebApps..."); + tracing::info!("Analyzing changes in WebApps..."); let mut webapp_updates = 0; let mut webapp_removals = 0; let mut webapp_additions = 0; @@ -1197,7 +1197,7 @@ impl InfrastructureMap { for (id, webapp) in self_web_apps { if let Some(target_webapp) = target_web_apps.get(id) { if webapp != target_webapp { - log::debug!("WebApp updated: {}", id); + tracing::debug!("WebApp updated: {}", id); webapp_updates += 1; web_app_changes.push(WebAppChange::WebApp(Change::Updated { before: Box::new(webapp.clone()), @@ -1205,7 +1205,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("WebApp removed: {}", id); + tracing::debug!("WebApp removed: {}", id); webapp_removals += 1; web_app_changes.push(WebAppChange::WebApp(Change::Removed(Box::new( webapp.clone(), @@ -1215,7 +1215,7 @@ impl InfrastructureMap { for (id, webapp) in target_web_apps { if !self_web_apps.contains_key(id) { - log::debug!("WebApp added: {}", id); + tracing::debug!("WebApp added: {}", id); webapp_additions += 1; web_app_changes.push(WebAppChange::WebApp(Change::Added(Box::new( webapp.clone(), @@ -1223,7 +1223,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "WebApp changes: {} added, {} removed, {} updated", webapp_additions, webapp_removals, @@ -1250,7 +1250,7 @@ impl InfrastructureMap { target_views: &HashMap, olap_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Views..."); + tracing::info!("Analyzing changes in Views..."); let mut view_updates = 0; let mut view_removals = 0; let mut view_additions = 0; @@ -1259,7 +1259,7 @@ impl InfrastructureMap { for (id, view) in self_views { if let Some(target_view) = target_views.get(id) { if view != target_view { - log::debug!("View updated: {} ({})", view.name, id); + tracing::debug!("View updated: {} ({})", view.name, id); view_updates += 1; olap_changes.push(OlapChange::View(Change::Updated { before: Box::new(view.clone()), @@ -1267,7 +1267,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("View removed: {} ({})", view.name, id); + tracing::debug!("View removed: {} ({})", view.name, id); view_removals += 1; olap_changes.push(OlapChange::View(Change::Removed(Box::new(view.clone())))); } @@ -1276,13 +1276,13 @@ impl InfrastructureMap { // Check for additions for (id, view) in target_views { if !self_views.contains_key(id) { - log::debug!("View added: {} ({})", view.name, id); + tracing::debug!("View added: {} ({})", view.name, id); view_additions += 1; olap_changes.push(OlapChange::View(Change::Added(Box::new(view.clone())))); } } - log::info!( + tracing::info!( "View changes: {} added, {} removed, {} updated", view_additions, view_removals, @@ -1343,7 +1343,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topic-to-Table Sync Processes..."); + tracing::info!("Analyzing changes in Topic-to-Table Sync Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1351,7 +1351,7 @@ impl InfrastructureMap { for (id, process) in self_processes { if let Some(target_process) = target_processes.get(id) { if process != target_process { - log::debug!("TopicToTableSyncProcess updated: {}", id); + tracing::debug!("TopicToTableSyncProcess updated: {}", id); process_updates += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1361,7 +1361,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("TopicToTableSyncProcess removed: {}", id); + tracing::debug!("TopicToTableSyncProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1373,7 +1373,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("TopicToTableSyncProcess added: {}", id); + tracing::debug!("TopicToTableSyncProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::TopicToTableSyncProcess(Change::< TopicToTableSyncProcess, @@ -1383,7 +1383,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic-to-Table Sync Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1399,7 +1399,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Topic-to-Topic Sync Processes..."); + tracing::info!("Analyzing changes in Topic-to-Topic Sync Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1407,7 +1407,7 @@ impl InfrastructureMap { for (id, process) in self_processes { if let Some(target_process) = target_processes.get(id) { if process != target_process { - log::debug!("TopicToTopicSyncProcess updated: {}", id); + tracing::debug!("TopicToTopicSyncProcess updated: {}", id); process_updates += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1417,7 +1417,7 @@ impl InfrastructureMap { })); } } else { - log::debug!("TopicToTopicSyncProcess removed: {}", id); + tracing::debug!("TopicToTopicSyncProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1429,7 +1429,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("TopicToTopicSyncProcess added: {}", id); + tracing::debug!("TopicToTopicSyncProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::TopicToTopicSyncProcess(Change::< TopicToTopicSyncProcess, @@ -1439,7 +1439,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Topic-to-Topic Sync Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1455,7 +1455,7 @@ impl InfrastructureMap { target_processes: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Function Processes..."); + tracing::info!("Analyzing changes in Function Processes..."); let mut process_updates = 0; let mut process_removals = 0; let mut process_additions = 0; @@ -1464,7 +1464,7 @@ impl InfrastructureMap { if let Some(target_process) = target_processes.get(id) { // Always treat function processes as updated if they exist in both maps // This ensures function code changes are always redeployed - log::debug!("FunctionProcess updated (forced): {}", id); + tracing::debug!("FunctionProcess updated (forced): {}", id); process_updates += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Updated { @@ -1473,7 +1473,7 @@ impl InfrastructureMap { }, )); } else { - log::debug!("FunctionProcess removed: {}", id); + tracing::debug!("FunctionProcess removed: {}", id); process_removals += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Removed(Box::new(process.clone())), @@ -1483,7 +1483,7 @@ impl InfrastructureMap { for (id, process) in target_processes { if !self_processes.contains_key(id) { - log::debug!("FunctionProcess added: {}", id); + tracing::debug!("FunctionProcess added: {}", id); process_additions += 1; process_changes.push(ProcessChange::FunctionProcess( Change::::Added(Box::new(process.clone())), @@ -1491,7 +1491,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Function Process changes: {} added, {} removed, {} updated", process_additions, process_removals, @@ -1507,11 +1507,11 @@ impl InfrastructureMap { target_process: &OlapProcess, process_changes: &mut Vec, ) { - log::info!("Analyzing changes in OLAP processes..."); + tracing::info!("Analyzing changes in OLAP processes..."); // Currently we assume there is always a change and restart the processes // TODO: Once we refactor to have multiple processes, we should compare actual changes - log::debug!("OLAP Process updated (assumed for now)"); + tracing::debug!("OLAP Process updated (assumed for now)"); process_changes.push(ProcessChange::OlapProcess(Change::::Updated { before: Box::new(self_process.clone()), after: Box::new(target_process.clone()), @@ -1524,11 +1524,11 @@ impl InfrastructureMap { target_process: &ConsumptionApiWebServer, process_changes: &mut Vec, ) { - log::info!("Analyzing changes in Analytics API processes..."); + tracing::info!("Analyzing changes in Analytics API processes..."); // We are currently not tracking individual consumption endpoints, so we will just restart // the consumption web server when something changed - log::debug!("Analytics API Web Server updated (assumed for now)"); + tracing::debug!("Analytics API Web Server updated (assumed for now)"); process_changes.push(ProcessChange::ConsumptionApiWebServer(Change::< ConsumptionApiWebServer, >::Updated { @@ -1543,7 +1543,7 @@ impl InfrastructureMap { target_workers: &HashMap, process_changes: &mut Vec, ) -> (usize, usize, usize) { - log::info!("Analyzing changes in Orchestration Workers..."); + tracing::info!("Analyzing changes in Orchestration Workers..."); let mut worker_updates = 0; let mut worker_removals = 0; let mut worker_additions = 0; @@ -1551,7 +1551,7 @@ impl InfrastructureMap { for (id, worker) in self_workers { if let Some(target_worker) = target_workers.get(id) { // Always treat workers as updated to ensure redeployment - log::debug!( + tracing::debug!( "OrchestrationWorker updated (forced): {} ({})", id, worker.supported_language @@ -1564,7 +1564,7 @@ impl InfrastructureMap { after: Box::new(target_worker.clone()), })); } else { - log::debug!( + tracing::debug!( "OrchestrationWorker removed: {} ({})", id, worker.supported_language @@ -1580,7 +1580,7 @@ impl InfrastructureMap { for (id, worker) in target_workers { if !self_workers.contains_key(id) { - log::debug!( + tracing::debug!( "OrchestrationWorker added: {} ({})", id, worker.supported_language @@ -1594,7 +1594,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Orchestration Worker changes: {} added, {} removed, {} updated", worker_additions, worker_removals, @@ -1625,7 +1625,7 @@ impl InfrastructureMap { is_production: bool, olap_changes: &mut Vec, ) { - log::info!( + tracing::info!( "Analyzing SQL resource differences between {} source resources and {} target resources", self_sql_resources.len(), target_sql_resources.len() @@ -1639,7 +1639,7 @@ impl InfrastructureMap { if let Some(target_sql_resource) = target_sql_resources.get(id) { if sql_resource != target_sql_resource { // TODO: if only the teardown code changed, we should not need to execute any changes - log::debug!("SQL resource '{}' has differences", id); + tracing::debug!("SQL resource '{}' has differences", id); sql_resource_updates += 1; olap_changes.push(OlapChange::SqlResource(Change::Updated { before: Box::new(sql_resource.clone()), @@ -1657,7 +1657,7 @@ impl InfrastructureMap { ); } } else { - log::debug!("SQL resource '{}' removed", id); + tracing::debug!("SQL resource '{}' removed", id); sql_resource_removals += 1; olap_changes.push(OlapChange::SqlResource(Change::Removed(Box::new( sql_resource.clone(), @@ -1667,7 +1667,7 @@ impl InfrastructureMap { for (id, sql_resource) in target_sql_resources { if !self_sql_resources.contains_key(id) { - log::debug!("SQL resource '{}' added", id); + tracing::debug!("SQL resource '{}' added", id); sql_resource_additions += 1; olap_changes.push(OlapChange::SqlResource(Change::Added(Box::new( sql_resource.clone(), @@ -1685,7 +1685,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "SQL resource changes: {} added, {} removed, {} updated", sql_resource_additions, sql_resource_removals, @@ -1719,7 +1719,7 @@ impl InfrastructureMap { default_database: &str, ignore_ops: &[crate::infrastructure::olap::clickhouse::IgnorableOperation], ) { - log::info!( + tracing::info!( "Analyzing table differences between {} source tables and {} target tables", self_tables.len(), target_tables.len() @@ -1727,7 +1727,7 @@ impl InfrastructureMap { // Normalize tables for comparison if ignore_ops is provided let (normalized_self, normalized_target) = if !ignore_ops.is_empty() { - log::info!( + tracing::info!( "Normalizing tables before comparison. Ignore list: {:?}", ignore_ops ); @@ -1783,7 +1783,7 @@ impl InfrastructureMap { // Respect lifecycle: ExternallyManaged tables are never modified if target_table.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Table '{}' has changes but is externally managed - skipping update", table.name ); @@ -1798,7 +1798,7 @@ impl InfrastructureMap { let original_len = column_changes.len(); column_changes.retain(|change| match change { ColumnChange::Removed(_) => { - log::debug!( + tracing::debug!( "Filtering out column removal for deletion-protected table '{}'", table.name ); @@ -1809,7 +1809,7 @@ impl InfrastructureMap { }); if original_len != column_changes.len() { - log::info!( + tracing::info!( "Filtered {} destructive column changes for deletion-protected table '{}'", original_len - column_changes.len(), table.name @@ -1857,7 +1857,7 @@ impl InfrastructureMap { &table.table_ttl_setting, &target_table.table_ttl_setting, ) { - log::debug!( + tracing::debug!( "Table '{}' has table-level TTL change: {:?} -> {:?}", table.name, table.table_ttl_setting, @@ -1910,18 +1910,18 @@ impl InfrastructureMap { // Respect lifecycle: DeletionProtected and ExternallyManaged tables are never removed match (table.life_cycle, respect_life_cycle) { (LifeCycle::FullyManaged, _) | (_, false) => { - log::debug!("Table '{}' removed", table.name); + tracing::debug!("Table '{}' removed", table.name); table_removals += 1; olap_changes.push(OlapChange::Table(TableChange::Removed(table.clone()))); } (LifeCycle::DeletionProtected, true) => { - log::debug!( + tracing::debug!( "Table '{}' marked for removal but is deletion-protected - skipping removal", table.name ); } (LifeCycle::ExternallyManaged, true) => { - log::debug!( + tracing::debug!( "Table '{}' marked for removal but is externally managed - skipping removal", table.name ); @@ -1935,18 +1935,18 @@ impl InfrastructureMap { if find_table_from_infra_map(table, &normalized_self, default_database).is_none() { // Respect lifecycle: ExternallyManaged tables are never added automatically if table.life_cycle == LifeCycle::ExternallyManaged && respect_life_cycle { - log::debug!( + tracing::debug!( "Table '{}' marked for addition but is externally managed - skipping addition", table.name ); } else { - log::debug!( + tracing::debug!( "Table '{}' added with {} columns", table.name, table.columns.len() ); for col in &table.columns { - log::trace!(" - Column: {} ({})", col.name, col.data_type); + tracing::trace!(" - Column: {} ({})", col.name, col.data_type); } table_additions += 1; olap_changes.push(OlapChange::Table(TableChange::Added(table.clone()))); @@ -1954,7 +1954,7 @@ impl InfrastructureMap { } } - log::info!( + tracing::info!( "Table changes: {} added, {} removed, {} updated", table_additions, table_removals, @@ -2084,7 +2084,7 @@ impl InfrastructureMap { let original_len = column_changes.len(); column_changes.retain(|change| match change { ColumnChange::Removed(_) => { - log::debug!( + tracing::debug!( "Filtering out column removal for deletion-protected table '{}'", table.name ); @@ -2095,7 +2095,7 @@ impl InfrastructureMap { }); if original_len != column_changes.len() { - log::info!( + tracing::info!( "Filtered {} destructive column changes for deletion-protected table '{}'", original_len - column_changes.len(), table.name @@ -2221,7 +2221,7 @@ impl InfrastructureMap { *access_key = resolved_access_key; *secret_key = resolved_secret_key; - log::debug!( + tracing::debug!( "Resolved {} credentials for table '{}' at runtime", engine_name, table.name @@ -2263,7 +2263,7 @@ impl InfrastructureMap { // Recalculate engine_params_hash after resolving credentials if should_recalc_hash { table.engine_params_hash = Some(table.engine.non_alterable_params_hash()); - log::debug!( + tracing::debug!( "Recalculated engine_params_hash for table '{}' after credential resolution", table.name ); @@ -2324,7 +2324,7 @@ impl InfrastructureMap { pub async fn load_from_last_redis_prefix(redis_client: &RedisClient) -> Result> { let last_prefix = &redis_client.config.last_key_prefix; - log::info!( + tracing::info!( "Loading InfrastructureMap from last Redis prefix: {}", last_prefix ); @@ -2335,7 +2335,7 @@ impl InfrastructureMap { .context("Failed to get InfrastructureMap from Redis using LAST_KEY_PREFIX"); if let Err(e) = encoded { - log::error!("{}", e); + tracing::error!("{}", e); return Ok(None); } @@ -2873,7 +2873,7 @@ pub fn compute_table_columns_diff(before: &Table, after: &Table) -> Vec Vec Vec Created topic_to_table_sync_processes {}", sync_id); + tracing::info!(" Created topic_to_table_sync_processes {}", sync_id); } else { - log::info!( + tracing::info!( " Topic {} has no target_table specified, skipping sync process creation", partial_topic.name ); diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index c3978121c2..69b062e549 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -24,12 +24,12 @@ use crate::infrastructure::olap::clickhouse::config::DEFAULT_DATABASE_NAME; use crate::infrastructure::olap::clickhouse::diff_strategy::ClickHouseTableDiffStrategy; use crate::infrastructure::olap::OlapOperations; use crate::project::Project; -use log::{debug, error, info}; use rdkafka::error::KafkaError; use serde::{Deserialize, Serialize}; use std::collections::HashSet; use std::mem; use std::path::Path; +use tracing::{debug, error, info}; /// Errors that can occur during the planning process. #[derive(Debug, thiserror::Error)] @@ -302,7 +302,7 @@ pub async fn reconcile_with_reality( reconciled_map.sql_resources.insert(name.clone(), *before); } _ => { - log::warn!( + tracing::warn!( "Unexpected change type in mismatched_sql_resources: {:?}", change ); diff --git a/apps/framework-cli/src/framework/core/primitive_map.rs b/apps/framework-cli/src/framework/core/primitive_map.rs index 4675988ae6..8f5245e1a6 100644 --- a/apps/framework-cli/src/framework/core/primitive_map.rs +++ b/apps/framework-cli/src/framework/core/primitive_map.rs @@ -1,8 +1,8 @@ -use log::warn; use std::{ collections::{HashMap, HashSet}, path::Path, }; +use tracing::warn; use walkdir::WalkDir; use crate::framework::data_model::config::DataModelConfig; @@ -135,7 +135,7 @@ impl PrimitiveMap { let mut primitive_map = PrimitiveMap::default(); for version in project.versions() { - log::debug!("Loading Versioned primitive map for version: {}", version); + tracing::debug!("Loading Versioned primitive map for version: {}", version); PrimitiveMap::load_versioned(project, &version, &mut primitive_map).await?; } @@ -150,7 +150,7 @@ impl PrimitiveMap { primitive_map.consumption = load_consumption(project).await?; - log::debug!("Loaded Versioned primitive map: {:?}", primitive_map); + tracing::debug!("Loaded Versioned primitive map: {:?}", primitive_map); primitive_map.validate()?; @@ -167,7 +167,7 @@ impl PrimitiveMap { primitive_map: &mut PrimitiveMap, ) -> Result<(), PrimitiveMapLoadingError> { let data_models_root = project.versioned_data_model_dir(version)?; - log::debug!("Loading data models from: {:?}", data_models_root); + tracing::debug!("Loading data models from: {:?}", data_models_root); for res_entry in WalkDir::new(data_models_root) { let entry = res_entry?; @@ -199,7 +199,7 @@ impl PrimitiveMap { ) -> Result, DataModelError> { let file_objects = data_model::parser::parse_data_model_file(file_path, version, project).await?; - log::debug!( + tracing::debug!( "Found the following data models: {:?} in path {:?}", file_objects.models, file_path @@ -232,7 +232,7 @@ impl PrimitiveMap { } } - log::debug!( + tracing::debug!( "Data Models matched with configuration: {:?} from file: {:?}", indexed_models, file_path diff --git a/apps/framework-cli/src/framework/core/state_storage.rs b/apps/framework-cli/src/framework/core/state_storage.rs index 794f14f5cf..077bcb518a 100644 --- a/apps/framework-cli/src/framework/core/state_storage.rs +++ b/apps/framework-cli/src/framework/core/state_storage.rs @@ -13,10 +13,10 @@ use crate::utilities::machine_id::get_or_create_machine_id; use anyhow::{Context, Result}; use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; -use log::{debug, info, warn}; use protobuf::Message; use serde::{Deserialize, Serialize}; use std::sync::Arc; +use tracing::{debug, info, warn}; /// Lock data for migration coordination #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/apps/framework-cli/src/framework/data_model/config.rs b/apps/framework-cli/src/framework/data_model/config.rs index bd222ecc16..07f6f38d86 100644 --- a/apps/framework-cli/src/framework/data_model/config.rs +++ b/apps/framework-cli/src/framework/data_model/config.rs @@ -6,10 +6,10 @@ use crate::framework::python::datamodel_config::execute_python_model_file_for_co use crate::framework::typescript::export_collectors::get_data_model_configs; use crate::project::Project; use crate::utilities::_true; -use log::info; use serde::Deserialize; use serde::Serialize; use std::ffi::OsStr; +use tracing::info; pub type ConfigIdentifier = String; diff --git a/apps/framework-cli/src/framework/data_model/parser.rs b/apps/framework-cli/src/framework/data_model/parser.rs index 9d89291b2c..77a34c4601 100644 --- a/apps/framework-cli/src/framework/data_model/parser.rs +++ b/apps/framework-cli/src/framework/data_model/parser.rs @@ -4,9 +4,9 @@ use crate::{ framework::{core::infrastructure::table::DataEnum, python, typescript}, project::Project, }; -use log::info; use serde::Deserialize; use std::path::Path; +use tracing::info; #[derive(Debug, thiserror::Error)] #[error("Failed to parse the data model file")] diff --git a/apps/framework-cli/src/framework/python/blocks.rs b/apps/framework-cli/src/framework/python/blocks.rs index d5a9f499fa..daec94cdbf 100644 --- a/apps/framework-cli/src/framework/python/blocks.rs +++ b/apps/framework-cli/src/framework/python/blocks.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::executor; use crate::framework::blocks::model::BlocksError; diff --git a/apps/framework-cli/src/framework/python/consumption.rs b/apps/framework-cli/src/framework/python/consumption.rs index 5f84b1615a..4a31bedc6b 100644 --- a/apps/framework-cli/src/framework/python/consumption.rs +++ b/apps/framework-cli/src/framework/python/consumption.rs @@ -5,11 +5,11 @@ use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::processes::consumption_registry::ConsumptionError; use crate::project::{JwtConfig, Project}; use crate::utilities::constants::{CONSUMPTION_WRAPPER_PACKAGE_NAME, UTILS_WRAPPER_PACKAGE_NAME}; -use log::{error, info}; use std::fs; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::executor; diff --git a/apps/framework-cli/src/framework/python/datamodel_config.rs b/apps/framework-cli/src/framework/python/datamodel_config.rs index 8207f7d097..b540dddeee 100644 --- a/apps/framework-cli/src/framework/python/datamodel_config.rs +++ b/apps/framework-cli/src/framework/python/datamodel_config.rs @@ -1,10 +1,10 @@ -use log::info; use serde::Deserialize; use serde::Serialize; use std::{ collections::HashMap, path::{absolute, Path}, }; +use tracing::info; use crate::framework::core::partial_infrastructure_map::{ DmV2LoadingError, PartialInfrastructureMap, diff --git a/apps/framework-cli/src/framework/python/parser.rs b/apps/framework-cli/src/framework/python/parser.rs index feec1e9fd1..5d0b23b2e8 100644 --- a/apps/framework-cli/src/framework/python/parser.rs +++ b/apps/framework-cli/src/framework/python/parser.rs @@ -24,13 +24,13 @@ use crate::{ project::python_project::PythonProject, utilities::constants::REQUIREMENTS_TXT, }; -use log::warn; use rustpython_parser::{ ast::{self, Constant, Expr, ExprName, Identifier, Keyword, Stmt, StmtClassDef}, Parse, }; use std::fmt::Debug; use std::path::{Path, PathBuf}; +use tracing::warn; use crate::framework::core::infrastructure::table::{ Column, ColumnType, DataEnum as FrameworkEnum, FloatType, IntType, Nested, diff --git a/apps/framework-cli/src/framework/python/scripts_worker.rs b/apps/framework-cli/src/framework/python/scripts_worker.rs index 2472d094c7..b21d11c5f1 100644 --- a/apps/framework-cli/src/framework/python/scripts_worker.rs +++ b/apps/framework-cli/src/framework/python/scripts_worker.rs @@ -1,7 +1,7 @@ -use log::{error, info, warn}; use std::fs; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info, warn}; use crate::cli::display::{show_message_wrapper, Message, MessageType}; use crate::project::{Project, ProjectFileError}; diff --git a/apps/framework-cli/src/framework/python/streaming.rs b/apps/framework-cli/src/framework/python/streaming.rs index bf662da1d2..c54608f175 100644 --- a/apps/framework-cli/src/framework/python/streaming.rs +++ b/apps/framework-cli/src/framework/python/streaming.rs @@ -75,13 +75,13 @@ pub fn run( tokio::spawn(async move { while let Ok(Some(line)) = stdout_reader.next_line().await { - log::info!("{}", line); + tracing::info!("{}", line); } }); tokio::spawn(async move { while let Ok(Some(line)) = stderr_reader.next_line().await { - log::error!("{}", line); + tracing::error!("{}", line); } }); diff --git a/apps/framework-cli/src/framework/scripts/executor.rs b/apps/framework-cli/src/framework/scripts/executor.rs index 3e713d485e..23557cb8b8 100644 --- a/apps/framework-cli/src/framework/scripts/executor.rs +++ b/apps/framework-cli/src/framework/scripts/executor.rs @@ -1,8 +1,8 @@ use anyhow::Result; -use log::info; use serde::Serialize; use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; +use tracing::info; use super::{config::WorkflowConfig, Workflow}; use crate::framework::{ @@ -156,7 +156,7 @@ pub(crate) async fn execute_scheduled_workflows( match workflow.start(&project.temporal_config, None).await { Ok(_) => info!("Auto-started workflow: {}", workflow.name), - Err(e) => log::error!("Failed to auto-start workflow {}: {}", workflow.name, e), + Err(e) => tracing::error!("Failed to auto-start workflow {}: {}", workflow.name, e), } } } @@ -165,7 +165,7 @@ async fn list_running_workflows(project: &Project) -> HashSet { let client_manager = match TemporalClientManager::new_validate(&project.temporal_config, true) { Ok(manager) => manager, Err(e) => { - log::error!("Failed to create Temporal client manager: {}", e); + tracing::error!("Failed to create Temporal client manager: {}", e); return HashSet::new(); } }; @@ -195,7 +195,7 @@ async fn list_running_workflows(project: &Project) -> HashSet { .map(|execution_info| execution_info.workflow_id) .collect(), Err(e) => { - log::error!("Failed to list running workflows: {}", e); + tracing::error!("Failed to list running workflows: {}", e); HashSet::new() } } diff --git a/apps/framework-cli/src/framework/streaming/loader.rs b/apps/framework-cli/src/framework/streaming/loader.rs index 2ad560abe1..6d31bfe306 100644 --- a/apps/framework-cli/src/framework/streaming/loader.rs +++ b/apps/framework-cli/src/framework/streaming/loader.rs @@ -7,10 +7,10 @@ use crate::{ project::Project, utilities::constants::{PY_FLOW_FILE, TS_FLOW_FILE}, }; -use log::{debug, info, warn}; use regex::{Captures, Regex}; use std::ffi::OsStr; use std::{fs, path::Path}; +use tracing::{debug, info, warn}; const MIGRATION_REGEX: &str = r"^([a-zA-Z0-9_]+)_migrate__([0-9_]+)__(([a-zA-Z0-9_]+)__)?([0-9_]+)$"; diff --git a/apps/framework-cli/src/framework/typescript/blocks.rs b/apps/framework-cli/src/framework/typescript/blocks.rs index 5d3fcc84af..c1434afaaa 100644 --- a/apps/framework-cli/src/framework/typescript/blocks.rs +++ b/apps/framework-cli/src/framework/typescript/blocks.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::bin; use crate::framework::blocks::model::BlocksError; diff --git a/apps/framework-cli/src/framework/typescript/consumption.rs b/apps/framework-cli/src/framework/typescript/consumption.rs index bb64f23df6..24df3a763f 100644 --- a/apps/framework-cli/src/framework/typescript/consumption.rs +++ b/apps/framework-cli/src/framework/typescript/consumption.rs @@ -5,11 +5,11 @@ use crate::framework::typescript::export_collectors::ExportCollectorError; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::processes::consumption_registry::ConsumptionError; use crate::project::{JwtConfig, Project}; -use log::{debug, error, info}; use serde_json::{Map, Value}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{debug, error, info}; use super::bin; diff --git a/apps/framework-cli/src/framework/typescript/export_collectors.rs b/apps/framework-cli/src/framework/typescript/export_collectors.rs index 12a7e8bcdc..9b5a3898f2 100644 --- a/apps/framework-cli/src/framework/typescript/export_collectors.rs +++ b/apps/framework-cli/src/framework/typescript/export_collectors.rs @@ -3,12 +3,12 @@ use crate::framework::consumption::model::ConsumptionQueryParam; use crate::framework::data_model::config::{ConfigIdentifier, DataModelConfig}; use crate::framework::typescript::consumption::{extract_intput_param, extract_schema}; use crate::project::Project; -use log::debug; use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::path::Path; use tokio::io::AsyncReadExt; use tokio::process::Child; +use tracing::debug; const EXPORT_SERIALIZER_BIN: &str = "export-serializer"; const EXPORT_FUNC_TYPE_BIN: &str = "consumption-type-serializer"; diff --git a/apps/framework-cli/src/framework/typescript/parser.rs b/apps/framework-cli/src/framework/typescript/parser.rs index d6ec488171..16bc7495f8 100644 --- a/apps/framework-cli/src/framework/typescript/parser.rs +++ b/apps/framework-cli/src/framework/typescript/parser.rs @@ -38,7 +38,7 @@ pub async fn extract_data_model_from_file( let internal = project.internal_dir().unwrap(); let output_dir = internal.join("serialized_datamodels"); - log::info!("Extracting data model from file: {:?}", path); + tracing::info!("Extracting data model from file: {:?}", path); fs::write( internal.join(TSCONFIG_JSON), @@ -85,12 +85,12 @@ pub async fn extract_data_model_from_file( run_command_with_output_proxy(command, "TypeScript Compiler") .await .map_err(|err| { - log::error!("Error while running moose-tspc: {}", err); + tracing::error!("Error while running moose-tspc: {}", err); TypescriptParsingError::TypescriptCompilerError(Some(err)) })? }; - log::info!("Typescript compiler return code: {:?}", ts_return_code); + tracing::info!("Typescript compiler return code: {:?}", ts_return_code); if !ts_return_code.success() { return Err(TypescriptParsingError::TypescriptCompilerError(None)); diff --git a/apps/framework-cli/src/framework/typescript/scripts_worker.rs b/apps/framework-cli/src/framework/typescript/scripts_worker.rs index 9fcd4eed47..6a4c206e3e 100644 --- a/apps/framework-cli/src/framework/typescript/scripts_worker.rs +++ b/apps/framework-cli/src/framework/typescript/scripts_worker.rs @@ -1,9 +1,9 @@ use crate::cli::display::{show_message_wrapper, Message, MessageType}; use crate::project::{Project, ProjectFileError}; -use log::{debug, error, info, warn}; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{debug, error, info, warn}; use super::bin; diff --git a/apps/framework-cli/src/framework/typescript/streaming.rs b/apps/framework-cli/src/framework/typescript/streaming.rs index d76559a4cd..d8d7869551 100644 --- a/apps/framework-cli/src/framework/typescript/streaming.rs +++ b/apps/framework-cli/src/framework/typescript/streaming.rs @@ -1,7 +1,7 @@ -use log::{error, info}; use std::path::Path; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Child; +use tracing::{error, info}; use super::bin; use crate::infrastructure::stream::kafka::models::KafkaConfig; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs index 10d3fff6a5..e6b270fcd1 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/client.rs @@ -7,13 +7,13 @@ use hyper::body::Bytes; use hyper::{Request, Response, Uri}; use hyper_tls::HttpsConnector; use hyper_util::client::legacy::{connect::HttpConnector, Client}; -use log::debug; use tokio::time::{sleep, Duration}; +use tracing::debug; use super::config::ClickHouseConfig; use super::model::{wrap_and_join_column_names, ClickHouseRecord}; -use log::error; +use tracing::error; use async_trait::async_trait; @@ -152,7 +152,7 @@ impl ClickHouseClient { let body = Self::build_body(columns, records); - log::trace!("Inserting into clickhouse with values: {}", body); + tracing::trace!("Inserting into clickhouse with values: {}", body); let bytes = Bytes::from(body); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs index 1bfc932f23..668ac49f59 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/errors.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking system-wide errors -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs index 79db903db6..79cda395c0 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merge_failures.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking merge failures from system.metrics -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs index 62aec66941..b5b9cf503c 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/merges.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking stuck background merges -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs index 66ea7561a6..b76f364d4b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mod.rs @@ -420,12 +420,12 @@ pub async fn run_diagnostics( Ok(issues) => all_issues.extend(issues), Err(e) => { // Log error but continue with other providers - log::warn!("Provider {} failed: {}", provider_name, e); + tracing::warn!("Provider {} failed: {}", provider_name, e); } }, Err(e) => { // Task panicked or was cancelled - log::error!("Diagnostic task failed: {}", e); + tracing::error!("Diagnostic task failed: {}", e); } } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs index bce0079981..d567c1cfee 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/mutations.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking stuck or failed mutations -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs index 926209e881..2193363443 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/parts.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking data parts issues -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs index 79020926d0..0d42c10f49 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/replication.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking replication health -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs index 37ad704857..bdc267c27b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/s3queue.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking S3Queue ingestion -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs index 397e44b907..6523be76d9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diagnostics/stopped_operations.rs @@ -1,7 +1,7 @@ //! Diagnostic provider for checking stopped operations (merges, replication) -use log::debug; use serde_json::{json, Map, Value}; +use tracing::debug; use super::{Component, DiagnosticError, DiagnosticProvider, Issue, Severity}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index 46168bcc69..e765fbd72b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -393,7 +393,7 @@ impl ClickHouseTableDiffStrategy { // Skip population in production (user must handle manually) // Only populate in dev for new MVs with non-S3Queue sources if is_new && !has_s3queue_source && !is_production { - log::info!( + tracing::info!( "Adding population operation for materialized view '{}'", sql_resource.name ); @@ -436,7 +436,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // Check if ORDER BY has changed let order_by_changed = order_by_change.before != order_by_change.after; if order_by_changed { - log::warn!( + tracing::warn!( "ClickHouse: ORDER BY changed for table '{}', requiring drop+create", before.name ); @@ -463,7 +463,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { let error_message = format_database_change_error(&before.name, before_db, after_db); - log::error!("{}", error_message); + tracing::error!("{}", error_message); return vec![OlapChange::Table(TableChange::ValidationError { table_name: before.name.clone(), @@ -482,7 +482,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // Check if PARTITION BY has changed let partition_by_changed = partition_by_change.before != partition_by_change.after; if partition_by_changed { - log::warn!( + tracing::warn!( "ClickHouse: PARTITION BY changed for table '{}', requiring drop+create", before.name ); @@ -501,7 +501,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // S3 allows specifying PK, but that information is not in system.columns && after.engine.is_merge_tree_family() { - log::warn!( + tracing::warn!( "ClickHouse: Primary key structure changed for table '{}', requiring drop+create", before.name ); @@ -530,7 +530,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // Check if engine has changed (using hash comparison when available) if engine_changed { - log::warn!( + tracing::warn!( "ClickHouse: engine changed for table '{}', requiring drop+create", before.name ); @@ -567,7 +567,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { .map_or(*default, |v| v); if before_value != after_value { - log::warn!( + tracing::warn!( "ClickHouse: Readonly setting '{}' changed for table '{}' (from {:?} to {:?}), requiring drop+create", readonly_setting, before.name, @@ -581,7 +581,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { } } - log::debug!( + tracing::debug!( "ClickHouse: Only modifiable table settings changed for table '{}', can use ALTER TABLE MODIFY SETTING", before.name ); @@ -598,7 +598,7 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // S3Queue only supports MODIFY/RESET SETTING, not column operations if !column_changes.is_empty() && matches!(&before.engine, ClickhouseEngine::S3Queue { .. }) { - log::warn!( + tracing::warn!( "ClickHouse: S3Queue table '{}' has column changes, requiring drop+create (S3Queue doesn't support ALTER TABLE for columns)", before.name ); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs index a776c40503..da12c6148d 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/inserter.rs @@ -47,8 +47,8 @@ use crate::infrastructure::olap::clickhouse::client::ClickHouseClientTrait; use crate::infrastructure::olap::clickhouse::model::ClickHouseRecord; use std::collections::{HashMap, VecDeque}; -use log::{info, warn}; use rdkafka::error::KafkaError; +use tracing::{info, warn}; /// Represents a Kafka partition identifier type Partition = i32; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs index 757bf54b4d..7cd6e2b274 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mod.rs @@ -33,7 +33,6 @@ use clickhouse::Client; use errors::ClickhouseError; -use log::{debug, info, warn}; use mapper::{std_column_to_clickhouse_column, std_table_to_clickhouse_table}; use model::ClickHouseColumn; use queries::ClickhouseEngine; @@ -51,6 +50,7 @@ use sql_parser::{ use std::collections::HashMap; use std::ops::Deref; use std::sync::LazyLock; +use tracing::{debug, info, warn}; use self::model::ClickHouseSystemTable; use crate::framework::core::infrastructure::sql_resource::SqlResource; @@ -657,7 +657,7 @@ async fn execute_create_table( client: &ConfiguredDBClient, is_dev: bool, ) -> Result<(), ClickhouseChangesError> { - log::info!("Executing CreateTable: {:?}", table.id(db_name)); + tracing::info!("Executing CreateTable: {:?}", table.id(db_name)); let clickhouse_table = std_table_to_clickhouse_table(table)?; // Use table's database if specified, otherwise use global database let target_database = table.database.as_deref().unwrap_or(db_name); @@ -777,7 +777,7 @@ async fn execute_drop_table( cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!("Executing DropTable: {:?}", table_name); + tracing::info!("Executing DropTable: {:?}", table_name); // Use table's database if specified, otherwise use global database let target_database = table_database.unwrap_or(db_name); let drop_query = drop_table_query(target_database, table_name, cluster_name)?; @@ -803,7 +803,7 @@ async fn execute_add_table_column( cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing AddTableColumn for table: {}, column: {}, after: {:?}", table_name, column.name, @@ -836,7 +836,7 @@ async fn execute_add_table_column( Some(after_col) => format!("AFTER `{after_col}`"), } ); - log::debug!("Adding column: {}", add_column_query); + tracing::debug!("Adding column: {}", add_column_query); run_query(&add_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -853,7 +853,7 @@ async fn execute_drop_table_column( cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing DropTableColumn for table: {}, column: {}", table_name, column_name @@ -865,7 +865,7 @@ async fn execute_drop_table_column( "ALTER TABLE `{}`.`{}`{} DROP COLUMN IF EXISTS `{}`", db_name, table_name, cluster_clause, column_name ); - log::debug!("Dropping column: {}", drop_column_query); + tracing::debug!("Dropping column: {}", drop_column_query); run_query(&drop_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -904,7 +904,7 @@ async fn execute_modify_table_column( && !ttl_changed && comment_changed { - log::info!( + tracing::info!( "Executing comment-only modification for table: {}, column: {}", table_name, after_column.name @@ -938,7 +938,7 @@ async fn execute_modify_table_column( return Ok(()); } - log::info!( + tracing::info!( "Executing ModifyTableColumn for table: {}, column: {} ({}→{})\ data_type_changed: {data_type_changed}, default_changed: {default_changed}, required_changed: {required_changed}, comment_changed: {comment_changed}, ttl_changed: {ttl_changed}", table_name, @@ -964,7 +964,7 @@ data_type_changed: {data_type_changed}, default_changed: {default_changed}, requ // Execute all statements in order for query in queries { - log::debug!("Modifying column: {}", query); + tracing::debug!("Modifying column: {}", query); run_query(&query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -988,7 +988,7 @@ async fn execute_modify_column_comment( cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing ModifyColumnComment for table: {}, column: {}", table_name, column.name @@ -997,7 +997,7 @@ async fn execute_modify_column_comment( let modify_comment_query = build_modify_column_comment_sql(db_name, table_name, &column.name, comment, cluster_name)?; - log::debug!("Modifying column comment: {}", modify_comment_query); + tracing::debug!("Modifying column comment: {}", modify_comment_query); run_query(&modify_comment_query, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -1139,7 +1139,7 @@ async fn execute_modify_table_settings( } } - log::info!( + tracing::info!( "Executing ModifyTableSettings for table: {} - modifying {} settings, resetting {} settings", table_name, settings_to_modify.len(), @@ -1154,7 +1154,7 @@ async fn execute_modify_table_settings( &settings_to_modify, cluster_name, )?; - log::debug!("Modifying table settings: {}", alter_settings_query); + tracing::debug!("Modifying table settings: {}", alter_settings_query); run_query(&alter_settings_query, client) .await @@ -1172,7 +1172,7 @@ async fn execute_modify_table_settings( &settings_to_reset, cluster_name, )?; - log::debug!("Resetting table settings: {}", reset_settings_query); + tracing::debug!("Resetting table settings: {}", reset_settings_query); run_query(&reset_settings_query, client) .await @@ -1194,7 +1194,7 @@ async fn execute_rename_table_column( cluster_name: Option<&str>, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing RenameTableColumn for table: {}, column: {} → {}", table_name, before_column_name, @@ -1206,7 +1206,7 @@ async fn execute_rename_table_column( let rename_column_query = format!( "ALTER TABLE `{db_name}`.`{table_name}`{cluster_clause} RENAME COLUMN `{before_column_name}` TO `{after_column_name}`" ); - log::debug!("Renaming column: {}", rename_column_query); + tracing::debug!("Renaming column: {}", rename_column_query); run_query(&rename_column_query, client).await.map_err(|e| { ClickhouseChangesError::ClickhouseClient { error: e, @@ -1222,14 +1222,14 @@ async fn execute_raw_sql( description: &str, client: &ConfiguredDBClient, ) -> Result<(), ClickhouseChangesError> { - log::info!( + tracing::info!( "Executing {} raw SQL statements. {}", sql_statements.len(), description ); for (i, sql) in sql_statements.iter().enumerate() { if !sql.trim().is_empty() { - log::debug!("Executing SQL statement {}: {}", i + 1, sql); + tracing::debug!("Executing SQL statement {}: {}", i + 1, sql); run_query(sql, client) .await .map_err(|e| ClickhouseChangesError::ClickhouseClient { @@ -1520,7 +1520,7 @@ fn parse_column_metadata(comment: &str) -> Option { match serde_json::from_str::(json_str) { Ok(metadata) => Some(metadata), Err(e) => { - log::warn!("Failed to parse column metadata JSON: {}", e); + tracing::warn!("Failed to parse column metadata JSON: {}", e); None } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 154fc53b57..4b3042ed03 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -1,8 +1,8 @@ use handlebars::{no_escape, Handlebars}; -use log::info; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use sha2::{Digest, Sha256}; +use tracing::info; use super::errors::ClickhouseError; use super::model::ClickHouseColumn; @@ -174,13 +174,13 @@ impl BufferEngine { ) -> String { // Warn about invalid combinations (but serialize what we can) if flush_rows.is_some() && flush_time.is_none() { - log::warn!( + tracing::warn!( "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ This violates ClickHouse nested optional constraint." ); } if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { - log::warn!( + tracing::warn!( "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ This violates ClickHouse nested optional constraint." ); @@ -1267,7 +1267,7 @@ impl ClickhouseEngine { if ver.is_some() { params.push(format!("'{}'", d)); } else { - log::warn!("is_deleted requires ver to be specified, this was not caught by the validation"); + tracing::warn!("is_deleted requires ver to be specified, this was not caught by the validation"); } } if !params.is_empty() { @@ -1400,7 +1400,7 @@ impl ClickhouseEngine { ) -> String { // Warn about invalid combination if policy_name.is_some() && sharding_key.is_none() { - log::warn!( + tracing::warn!( "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ This violates ClickHouse nested optional constraint." ); @@ -2592,13 +2592,13 @@ pub fn create_table_query( }) => { // Warn about invalid combinations if flush_rows.is_some() && flush_time.is_none() { - log::warn!( + tracing::warn!( "Buffer engine has flush_rows but no flush_time - flush_rows will be ignored. \ This violates ClickHouse nested optional constraint." ); } if flush_bytes.is_some() && (flush_time.is_none() || flush_rows.is_none()) { - log::warn!( + tracing::warn!( "Buffer engine has flush_bytes but missing flush_time or flush_rows - flush_bytes will be ignored. \ This violates ClickHouse nested optional constraint." ); @@ -2640,7 +2640,7 @@ pub fn create_table_query( } => { // Warn about invalid combination if policy_name.is_some() && sharding_key.is_none() { - log::warn!( + tracing::warn!( "Distributed engine has policy_name but no sharding_key - policy_name will be ignored. \ This violates ClickHouse nested optional constraint." ); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs index a02afdce0e..c62a09aea6 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse_http_client.rs @@ -9,8 +9,8 @@ use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; use crate::infrastructure::olap::clickhouse::{create_client, ConfiguredDBClient}; -use log::debug; use serde_json::Value; +use tracing::debug; /// Create a configured HTTP client for query operations /// diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index ca9589426b..2f7f375397 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1238,7 +1238,7 @@ fn order_operations_by_dependencies( // Check if adding this edge created a cycle if petgraph::algo::is_cyclic_directed(&graph) { - log::debug!("Cycle detected while adding edge"); + tracing::debug!("Cycle detected while adding edge"); return Err(PlanOrderingError::CyclicDependency); } } @@ -1247,14 +1247,14 @@ fn order_operations_by_dependencies( // Also check for cycles after all edges are added if petgraph::algo::is_cyclic_directed(&graph) { - log::debug!("Cycle detected after adding all edges"); + tracing::debug!("Cycle detected after adding all edges"); return Err(PlanOrderingError::CyclicDependency); } // If no edges were added, just return operations in original order // This handles cases where signatures were invalid or not found if edge_count == 0 && operations.len() > 1 { - log::debug!("No edges were added to the graph"); + tracing::debug!("No edges were added to the graph"); return Ok(operations.to_vec()); } @@ -1262,7 +1262,7 @@ fn order_operations_by_dependencies( let sorted_indices = match toposort(&graph, None) { Ok(indices) => indices, Err(err) => { - log::debug!( + tracing::debug!( "Cycle detected during topological sort: {:?}", err.node_id() ); diff --git a/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs b/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs index fc44d7db49..dae0523565 100644 --- a/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs +++ b/apps/framework-cli/src/infrastructure/orchestration/temporal_client.rs @@ -1,5 +1,4 @@ use anyhow::{Error, Result}; -use log::info; use temporal_sdk_core_protos::temporal::api::workflowservice::v1::workflow_service_client::WorkflowServiceClient; use temporal_sdk_core_protos::temporal::api::workflowservice::v1::{ DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeWorkflowExecutionRequest, @@ -12,6 +11,7 @@ use temporal_sdk_core_protos::temporal::api::workflowservice::v1::{ }; use tonic::service::interceptor::InterceptedService; use tonic::transport::{Channel, Uri}; +use tracing::info; use crate::infrastructure::orchestration::temporal::{InvalidTemporalSchemeError, TemporalConfig}; use crate::project::Project; diff --git a/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs b/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs index bad89a00f6..ab6879a559 100644 --- a/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/blocks_registry.rs @@ -1,7 +1,7 @@ -use log::info; use std::sync::Arc; use std::{collections::HashMap, path::PathBuf}; use tokio::process::Child; +use tracing::info; use crate::project::Project; use crate::{ diff --git a/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs b/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs index df66237972..4271780e16 100644 --- a/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/consumption_registry.rs @@ -1,6 +1,6 @@ use std::path::PathBuf; -use log::info; +use tracing::info; use crate::utilities::system::{RestartingProcess, StartChildFn}; use crate::{ diff --git a/apps/framework-cli/src/infrastructure/processes/functions_registry.rs b/apps/framework-cli/src/infrastructure/processes/functions_registry.rs index 05148864fd..3f0f16597b 100644 --- a/apps/framework-cli/src/infrastructure/processes/functions_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/functions_registry.rs @@ -8,9 +8,9 @@ use crate::{ project::Project, utilities::system::KillProcessError, }; -use log::{error, info}; use std::collections::HashMap; use std::sync::Arc; +use tracing::{error, info}; #[derive(Debug, thiserror::Error)] pub enum FunctionRegistryError { diff --git a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs index bd7ccf8cca..115e63a59d 100644 --- a/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs +++ b/apps/framework-cli/src/infrastructure/processes/kafka_clickhouse_sync.rs @@ -9,9 +9,6 @@ //! batching, back pressure, and error handling mechanisms. use futures::TryFutureExt; -use log::error; -use log::info; -use log::{debug, warn}; use rdkafka::consumer::{Consumer, StreamConsumer}; use rdkafka::producer::{DeliveryFuture, Producer}; use rdkafka::Message; @@ -19,6 +16,9 @@ use serde_json::Value; use std::collections::{HashMap, VecDeque}; use std::sync::{Arc, LazyLock}; use tokio::task::JoinHandle; +use tracing::error; +use tracing::info; +use tracing::{debug, warn}; use crate::framework::core::infrastructure::table::Column; use crate::framework::core::infrastructure::table::ColumnType; @@ -485,7 +485,7 @@ async fn sync_kafka_to_kafka( Ok(message) => match message.payload() { Some(payload) => match std::str::from_utf8(payload) { Ok(payload_str) => { - log::trace!( + tracing::trace!( "Received message from {}: {}", source_topic_name, payload_str @@ -631,7 +631,7 @@ async fn sync_kafka_to_clickhouse( let payload = if payload.len() >= 5 && payload[0] == 0x00 { &payload[5..] } else { payload }; match std::str::from_utf8(payload) { Ok(payload_str) => { - log::trace!( + tracing::trace!( "Received message from {}: {}", source_topic_name, payload_str ); @@ -699,17 +699,17 @@ fn mapper_json_to_clickhouse_record( let key = column.name.clone(); let value = map.get(&key); - log::trace!( + tracing::trace!( "Looking to map column {:?} to values in map: {:?}", column, map ); - log::trace!("Value found for key {}: {:?}", key, value); + tracing::trace!("Value found for key {}: {:?}", key, value); match value { Some(Value::Null) => { if column.required { - log::error!("Required column {} has a null value", key); + tracing::error!("Required column {} has a null value", key); } else { record.insert(key, ClickHouseValue::new_null()); } @@ -722,7 +722,7 @@ fn mapper_json_to_clickhouse_record( Err(e) => { // Promote mapping failures to `warn!` so we don't silently skip // individual records when their schema/value deviates. - log::warn!("For column {} with type {}, Error mapping JSON value to ClickHouse value: {}", column.name, &column.data_type, e) + tracing::warn!("For column {} with type {}, Error mapping JSON value to ClickHouse value: {}", column.name, &column.data_type, e) } }; } @@ -878,7 +878,7 @@ fn map_json_value_to_clickhouse_value( for value in arr.iter() { if *value == Value::Null { if !element_nullable { - log::error!("Array of non nullable elements has a null value"); + tracing::error!("Array of non nullable elements has a null value"); } // We are adding the value anyway to match the number of arguments that clickhouse expects array_values.push(ClickHouseValue::new_null()); @@ -925,7 +925,7 @@ fn map_json_value_to_clickhouse_value( match val { Some(Value::Null) => { if col.required { - log::error!("Required column {} has a null value", col_name); + tracing::error!("Required column {} has a null value", col_name); } // We are adding the value anyway to match the number of arguments that clickhouse expects values.push(ClickHouseValue::new_null()); diff --git a/apps/framework-cli/src/infrastructure/processes/mod.rs b/apps/framework-cli/src/infrastructure/processes/mod.rs index a7ddd5b3f8..f16fe598ec 100644 --- a/apps/framework-cli/src/infrastructure/processes/mod.rs +++ b/apps/framework-cli/src/infrastructure/processes/mod.rs @@ -57,7 +57,7 @@ pub async fn execute_changes( for change in changes.iter() { match change { ProcessChange::TopicToTableSyncProcess(Change::Added(sync)) => { - log::info!("Starting sync process: {:?}", sync.id()); + tracing::info!("Starting sync process: {:?}", sync.id()); let target_table_columns = std_columns_to_clickhouse_columns(&sync.columns)?; // Topic doesn't contain the namespace, so we need to build the full topic name @@ -76,11 +76,11 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTableSyncProcess(Change::Removed(sync)) => { - log::info!("Stopping sync process: {:?}", sync.id()); + tracing::info!("Stopping sync process: {:?}", sync.id()); process_registry.syncing.stop_topic_to_table(&sync.id()) } ProcessChange::TopicToTableSyncProcess(Change::Updated { before, after }) => { - log::info!("Replacing Sync process: {:?} by {:?}", before, after); + tracing::info!("Replacing Sync process: {:?} by {:?}", before, after); // Topic doesn't contain the namespace, so we need to build the full topic name let after_source_topic = infra_map.get_topic(&after.source_topic_id)?; @@ -102,7 +102,7 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTopicSyncProcess(Change::Added(sync)) => { - log::info!("Starting sync process: {:?}", sync.id()); + tracing::info!("Starting sync process: {:?}", sync.id()); // Topic doesn't contain the namespace, so we need to build the full topic name let source_topic = infra_map.get_topic(&sync.source_topic_id)?; @@ -118,7 +118,7 @@ pub async fn execute_changes( ); } ProcessChange::TopicToTopicSyncProcess(Change::Removed(sync)) => { - log::info!("Stopping sync process: {:?}", sync.id()); + tracing::info!("Stopping sync process: {:?}", sync.id()); // Topic doesn't contain the namespace, so we need to build the full topic name let target_topic = infra_map.get_topic(&sync.target_topic_id)?; @@ -130,7 +130,7 @@ pub async fn execute_changes( } // TopicToTopicSyncProcess Updated seems impossible ProcessChange::TopicToTopicSyncProcess(Change::Updated { before, after }) => { - log::info!("Replacing Sync process: {:?} by {:?}", before, after); + tracing::info!("Replacing Sync process: {:?} by {:?}", before, after); // Topic doesn't contain the namespace, so we need to build the full topic name let before_target_topic = infra_map.get_topic(&before.target_topic_id)?; @@ -155,17 +155,17 @@ pub async fn execute_changes( ); } ProcessChange::FunctionProcess(Change::Added(function_process)) => { - log::info!("Starting Function process: {:?}", function_process.id()); + tracing::info!("Starting Function process: {:?}", function_process.id()); process_registry .functions .start(infra_map, function_process)?; } ProcessChange::FunctionProcess(Change::Removed(function_process)) => { - log::info!("Stopping Function process: {:?}", function_process.id()); + tracing::info!("Stopping Function process: {:?}", function_process.id()); process_registry.functions.stop(function_process).await; } ProcessChange::FunctionProcess(Change::Updated { before, after }) => { - log::info!("Updating Function process: {:?}", before.id()); + tracing::info!("Updating Function process: {:?}", before.id()); process_registry.functions.stop(before).await; process_registry.functions.start(infra_map, after)?; } @@ -177,37 +177,37 @@ pub async fn execute_changes( after: _, }) => {} ProcessChange::ConsumptionApiWebServer(Change::Added(_)) => { - log::info!("Starting analytics api webserver process"); + tracing::info!("Starting analytics api webserver process"); process_registry.consumption.start()?; } ProcessChange::ConsumptionApiWebServer(Change::Removed(_)) => { - log::info!("Stopping analytics api webserver process"); + tracing::info!("Stopping analytics api webserver process"); process_registry.consumption.stop().await?; } ProcessChange::ConsumptionApiWebServer(Change::Updated { before: _, after: _, }) => { - log::info!("Re-Starting analytics api webserver process"); + tracing::info!("Re-Starting analytics api webserver process"); process_registry.consumption.stop().await?; process_registry.consumption.start()?; } ProcessChange::OrchestrationWorker(Change::Added(new_orchestration_worker)) => { - log::info!("Starting Orchestration worker process"); + tracing::info!("Starting Orchestration worker process"); process_registry .orchestration_workers .start(new_orchestration_worker) .await?; } ProcessChange::OrchestrationWorker(Change::Removed(old_orchestration_worker)) => { - log::info!("Stopping Orchestration worker process"); + tracing::info!("Stopping Orchestration worker process"); process_registry .orchestration_workers .stop(old_orchestration_worker) .await?; } ProcessChange::OrchestrationWorker(Change::Updated { before, after }) => { - log::info!("Restarting Orchestration worker process: {:?}", before.id()); + tracing::info!("Restarting Orchestration worker process: {:?}", before.id()); process_registry.orchestration_workers.stop(before).await?; process_registry.orchestration_workers.start(after).await?; } @@ -225,15 +225,15 @@ pub async fn execute_leader_changes( for change in changes.iter() { match (change, &mut process_registry.blocks) { (ProcessChange::OlapProcess(Change::Added(olap_process)), Some(blocks)) => { - log::info!("Starting Blocks process: {:?}", olap_process.id()); + tracing::info!("Starting Blocks process: {:?}", olap_process.id()); blocks.start(olap_process)?; } (ProcessChange::OlapProcess(Change::Removed(olap_process)), Some(blocks)) => { - log::info!("Stopping Blocks process: {:?}", olap_process.id()); + tracing::info!("Stopping Blocks process: {:?}", olap_process.id()); blocks.stop(olap_process).await?; } (ProcessChange::OlapProcess(Change::Updated { before, after }), Some(blocks)) => { - log::info!("Updating Blocks process: {:?}", before.id()); + tracing::info!("Updating Blocks process: {:?}", before.id()); blocks.stop(before).await?; blocks.start(after)?; } diff --git a/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs b/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs index 5a7236934e..0ed3950959 100644 --- a/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs +++ b/apps/framework-cli/src/infrastructure/processes/orchestration_workers_registry.rs @@ -1,5 +1,5 @@ -use log::info; use std::collections::HashMap; +use tracing::info; use crate::{ cli::settings::Settings, diff --git a/apps/framework-cli/src/infrastructure/redis/connection.rs b/apps/framework-cli/src/infrastructure/redis/connection.rs index f7c4dcc573..1cbde06ed7 100644 --- a/apps/framework-cli/src/infrastructure/redis/connection.rs +++ b/apps/framework-cli/src/infrastructure/redis/connection.rs @@ -98,7 +98,7 @@ impl ConnectionManagerWrapper { match time::timeout(Duration::from_secs(5), client.get_connection_manager()).await { Ok(Ok(conn)) => return Ok(conn), Ok(Err(e)) => { - log::warn!( + tracing::warn!( " Failed to create Redis connection (attempt {}/{}): {}", attempts + 1, max_attempts, @@ -107,7 +107,7 @@ impl ConnectionManagerWrapper { last_error = Some(e); } Err(_) => { - log::warn!( + tracing::warn!( " Timeout creating Redis connection (attempt {}/{})", attempts + 1, max_attempts @@ -186,12 +186,12 @@ impl ConnectionManagerWrapper { match timeout_future.await { Ok(Ok(_response)) => true, Ok(Err(e)) => { - log::warn!(" Redis ping failed: {:?}", e); + tracing::warn!(" Redis ping failed: {:?}", e); self.state.store(false, Ordering::SeqCst); false } Err(e) => { - log::warn!(" Redis ping timed out: {:?}", e); + tracing::warn!(" Redis ping timed out: {:?}", e); self.state.store(false, Ordering::SeqCst); false } @@ -217,7 +217,7 @@ impl ConnectionManagerWrapper { pub async fn attempt_reconnection(&mut self, config: &RedisConfig) { let mut backoff = 5; while !self.state.load(Ordering::SeqCst) { - log::info!( + tracing::info!( " Attempting to reconnect to Redis at {} (backoff: {}s)", config.effective_url(), backoff @@ -239,10 +239,10 @@ impl ConnectionManagerWrapper { self.pub_sub = new_pubsub; // Store the new client for future connection creation self.client = Arc::new(client); - log::info!(" Successfully reconnected both Redis connections"); + tracing::info!(" Successfully reconnected both Redis connections"); } Err(e) => { - log::warn!(" Reconnected main connection but failed to reconnect pub_sub: {}", e); + tracing::warn!(" Reconnected main connection but failed to reconnect pub_sub: {}", e); // Still mark as reconnected since the main connection succeeded } } @@ -251,13 +251,16 @@ impl ConnectionManagerWrapper { break; } Err(err) => { - log::warn!(" Failed to reconnect to Redis: {}", err); + tracing::warn!( + " Failed to reconnect to Redis: {}", + err + ); backoff = std::cmp::min(backoff * 2, 60); } } } Err(err) => { - log::warn!( + tracing::warn!( " Failed to create Redis client for reconnection: {}", err ); @@ -272,7 +275,7 @@ impl ConnectionManagerWrapper { /// This method should be called as part of the application shutdown sequence /// to ensure Redis connections are properly terminated. pub async fn shutdown(&self) { - log::info!(" Shutting down Redis connections"); + tracing::info!(" Shutting down Redis connections"); // Send QUIT command to both connection managers let mut conn = self.connection.clone(); @@ -284,6 +287,6 @@ impl ConnectionManagerWrapper { // Mark the connection as disconnected self.state.store(false, Ordering::SeqCst); - log::info!(" Redis connections shutdown complete"); + tracing::info!(" Redis connections shutdown complete"); } } diff --git a/apps/framework-cli/src/infrastructure/redis/leadership.rs b/apps/framework-cli/src/infrastructure/redis/leadership.rs index 2671b59575..2abfb62b59 100644 --- a/apps/framework-cli/src/infrastructure/redis/leadership.rs +++ b/apps/framework-cli/src/infrastructure/redis/leadership.rs @@ -83,7 +83,7 @@ impl LeadershipManager { .await { Ok(2) => { - log::debug!( + tracing::debug!( " Lock acquired: {} by instance {}", lock_key, instance_id @@ -98,7 +98,7 @@ impl LeadershipManager { (false, false) // doesn't have lock and not new acquisition } Err(e) => { - log::error!(" Error acquiring lock {}: {}", lock_key, e); + tracing::error!(" Error acquiring lock {}: {}", lock_key, e); (false, false) // doesn't have lock and not new acquisition } } @@ -152,7 +152,7 @@ impl LeadershipManager { .await { Ok(1) => { - log::trace!( + tracing::trace!( " Lock renewed: {} for instance {}", lock_key, instance_id @@ -160,7 +160,7 @@ impl LeadershipManager { Ok(true) } Ok(0) => { - log::warn!( + tracing::warn!( " Cannot renew lock {} - not owned by instance {}", lock_key, instance_id @@ -168,7 +168,7 @@ impl LeadershipManager { Ok(false) } Ok(_) => { - log::warn!( + tracing::warn!( " Unexpected result while renewing lock {} for instance {}", lock_key, instance_id @@ -176,7 +176,7 @@ impl LeadershipManager { Ok(false) } Err(e) => { - log::error!(" Error renewing lock {}: {}", lock_key, e); + tracing::error!(" Error renewing lock {}: {}", lock_key, e); Err(anyhow::anyhow!("Error renewing lock: {}", e)) } } diff --git a/apps/framework-cli/src/infrastructure/redis/messaging.rs b/apps/framework-cli/src/infrastructure/redis/messaging.rs index 8b81a9e447..2eb5f0482f 100644 --- a/apps/framework-cli/src/infrastructure/redis/messaging.rs +++ b/apps/framework-cli/src/infrastructure/redis/messaging.rs @@ -76,11 +76,11 @@ impl MessagingManager { match conn.publish::<_, _, ()>(&channel, message).await { Ok(_) => { - log::debug!(" Message published to channel {}", channel); + tracing::debug!(" Message published to channel {}", channel); Ok(()) } Err(e) => { - log::error!( + tracing::error!( " Failed to publish message to channel {}: {}", channel, e diff --git a/apps/framework-cli/src/infrastructure/redis/mock.rs b/apps/framework-cli/src/infrastructure/redis/mock.rs index 90464d04f8..c5dfe4f51c 100644 --- a/apps/framework-cli/src/infrastructure/redis/mock.rs +++ b/apps/framework-cli/src/infrastructure/redis/mock.rs @@ -1,5 +1,4 @@ use anyhow::Result; -use log; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::RwLock; @@ -140,13 +139,13 @@ impl MockRedisClient { let mut queues_map = self.queues.write().await; if !queues_map.contains_key(queue) { - log::debug!(" Creating new queue: {}", queue); + tracing::debug!(" Creating new queue: {}", queue); queues_map.insert(queue.to_string(), Vec::new()); } if let Some(queue_vec) = queues_map.get_mut(queue) { queue_vec.push(message.to_string()); - log::debug!( + tracing::debug!( " Added message to queue {}, length now: {}", queue, queue_vec.len() @@ -171,7 +170,7 @@ impl MockRedisClient { if let Some(queue_vec) = queues_map.get_mut(queue) { if !queue_vec.is_empty() { let message = queue_vec.remove(0); - log::debug!( + tracing::debug!( " Retrieved message from queue {}, length now: {}", queue, queue_vec.len() @@ -179,7 +178,7 @@ impl MockRedisClient { return Ok(Some(message)); } } else { - log::debug!(" Queue {} does not exist", queue); + tracing::debug!(" Queue {} does not exist", queue); } Ok(None) diff --git a/apps/framework-cli/src/infrastructure/redis/presence.rs b/apps/framework-cli/src/infrastructure/redis/presence.rs index cf02f4b3ce..4fd65218ca 100644 --- a/apps/framework-cli/src/infrastructure/redis/presence.rs +++ b/apps/framework-cli/src/infrastructure/redis/presence.rs @@ -54,14 +54,14 @@ impl PresenceManager { match conn.set_ex::<_, _, ()>(&key, now, 3).await { Ok(_) => { - log::debug!( + tracing::debug!( " Updated presence for instance {}", self.instance_id ); Ok(()) } Err(e) => { - log::error!( + tracing::error!( " Failed to update presence for instance {}: {}", self.instance_id, e diff --git a/apps/framework-cli/src/infrastructure/redis/redis_client.rs b/apps/framework-cli/src/infrastructure/redis/redis_client.rs index 5ffee8ca3a..c81a963aa8 100644 --- a/apps/framework-cli/src/infrastructure/redis/redis_client.rs +++ b/apps/framework-cli/src/infrastructure/redis/redis_client.rs @@ -431,7 +431,7 @@ impl RedisClient { ); let broadcast_channel = format!("{}::msgchannel", self.config.key_prefix); - log::info!( + tracing::info!( " Starting message listener on channels: {} and {}", instance_channel, broadcast_channel @@ -449,7 +449,7 @@ impl RedisClient { let mut pubsub = match client.get_async_pubsub().await { Ok(pubsub) => pubsub, Err(e) => { - log::error!(" Failed to get pubsub connection: {}", e); + tracing::error!(" Failed to get pubsub connection: {}", e); return; } }; @@ -458,7 +458,7 @@ impl RedisClient { .subscribe(&[&instance_channel_clone, &broadcast_channel_clone]) .await { - log::error!(" Failed to subscribe to channels: {}", e); + tracing::error!(" Failed to subscribe to channels: {}", e); return; } @@ -466,13 +466,13 @@ impl RedisClient { let msg = pubsub.on_message().next().await; if let Some(msg) = msg { if let Ok(payload) = msg.get_payload::() { - log::info!(" Received message: {}", payload); + tracing::info!(" Received message: {}", payload); let handlers = callbacks.read().await.clone(); for handler in handlers.iter() { handler(payload.clone()); } } else { - log::warn!(" Failed to decode message payload"); + tracing::warn!(" Failed to decode message payload"); } } else { tokio::time::sleep(tokio::time::Duration::from_secs(1)).await; @@ -683,11 +683,11 @@ impl RedisClient { { Ok(true) => { // Lock successfully renewed - log::debug!(" Lock '{}' renewed successfully", &lock_key); + tracing::debug!(" Lock '{}' renewed successfully", &lock_key); } Ok(false) => { // Lock not owned by this instance anymore - log::warn!( + tracing::warn!( " Failed to renew lock '{}': not owned by this instance", &lock_key ); @@ -695,7 +695,7 @@ impl RedisClient { } Err(e) => { // Error occurred while renewing - log::error!(" Error renewing lock '{}': {}", &lock_key, e); + tracing::error!(" Error renewing lock '{}': {}", &lock_key, e); // Continue trying to renew } } @@ -733,7 +733,7 @@ impl RedisClient { { Ok(_) => {} Err(e) => { - log::error!(" Failed to update presence: {}", e); + tracing::error!(" Failed to update presence: {}", e); } } } @@ -761,7 +761,7 @@ impl RedisClient { .update_presence(connection_manager.connection.clone()) .await { - log::error!(" Error updating presence: {}", e); + tracing::error!(" Error updating presence: {}", e); } } }); @@ -786,7 +786,7 @@ impl RedisClient { loop { tokio::time::sleep(Duration::from_secs(5)).await; if !connection_manager.ping().await { - log::warn!(" Redis connection lost, attempting reconnection"); + tracing::warn!(" Redis connection lost, attempting reconnection"); connection_manager.attempt_reconnection(&config).await; } } @@ -921,7 +921,7 @@ impl RedisClient { { Ok(_) => Ok(()), Err(e) => { - log::warn!( + tracing::warn!( " Failed to publish message (Redis may be unavailable): {}", e ); @@ -959,7 +959,7 @@ impl RedisClient { { Ok(_) => Ok(()), Err(e) => { - log::warn!( + tracing::warn!( " Failed to broadcast message (Redis may be unavailable): {}", e ); @@ -986,7 +986,7 @@ impl RedisClient { impl Drop for RedisClient { fn drop(&mut self) { - log::info!(" RedisClient is being dropped. Cleaning up tasks."); + tracing::info!(" RedisClient is being dropped. Cleaning up tasks."); // First, abort any running tasks to prevent them from using the connection if let Ok(mut guard) = self.listener_task.try_write() { diff --git a/apps/framework-cli/src/infrastructure/stream/kafka/client.rs b/apps/framework-cli/src/infrastructure/stream/kafka/client.rs index d24407b2ff..bb972dce47 100644 --- a/apps/framework-cli/src/infrastructure/stream/kafka/client.rs +++ b/apps/framework-cli/src/infrastructure/stream/kafka/client.rs @@ -13,7 +13,6 @@ use crate::infrastructure::stream::kafka::constants::{ DEFAULT_MAX_MESSAGE_BYTES, KAFKA_MAX_MESSAGE_BYTES_CONFIG_KEY, KAFKA_RETENTION_CONFIG_KEY, }; use crate::project::Project; -use log::{error, info, warn}; use rdkafka::admin::{AlterConfig, NewPartitions, ResourceSpecifier}; use rdkafka::config::RDKafkaLogLevel; use rdkafka::consumer::stream_consumer::StreamConsumer; @@ -28,6 +27,7 @@ use rdkafka::{ }; use std::collections::{HashMap, VecDeque}; use std::time::Duration; +use tracing::{error, info, warn}; use super::constants::{ DEFAULT_RETENTION_MS, KAFKA_ACKS_CONFIG_KEY, KAFKA_AUTO_COMMIT_INTERVAL_MS_CONFIG_KEY, diff --git a/apps/framework-cli/src/infrastructure/webapp.rs b/apps/framework-cli/src/infrastructure/webapp.rs index 47b98ba786..5ae64081fd 100644 --- a/apps/framework-cli/src/infrastructure/webapp.rs +++ b/apps/framework-cli/src/infrastructure/webapp.rs @@ -12,9 +12,9 @@ pub async fn execute_changes( web_app_changes: &[WebAppChange], webapp_changes_channel: Sender, ) -> Result<(), WebAppChangeError> { - log::info!("📤 Sending {} WebApp changes", web_app_changes.len()); + tracing::info!("📤 Sending {} WebApp changes", web_app_changes.len()); for webapp_change in web_app_changes.iter() { - log::info!("📤 Sending WebApp change: {:?}", webapp_change); + tracing::info!("📤 Sending WebApp change: {:?}", webapp_change); webapp_changes_channel.send(webapp_change.clone()).await?; } diff --git a/apps/framework-cli/src/main.rs b/apps/framework-cli/src/main.rs index 60cf9edeb0..66aada6f27 100644 --- a/apps/framework-cli/src/main.rs +++ b/apps/framework-cli/src/main.rs @@ -29,7 +29,7 @@ fn ensure_terminal_cleanup() { let _ = disable_raw_mode(); let _ = stdout.flush(); - log::info!("Terminal cleanup complete via crossterm"); + tracing::info!("Terminal cleanup complete via crossterm"); } // Entry point for the CLI application diff --git a/apps/framework-cli/src/mcp/server.rs b/apps/framework-cli/src/mcp/server.rs index 57f4e726ab..9932adc149 100644 --- a/apps/framework-cli/src/mcp/server.rs +++ b/apps/framework-cli/src/mcp/server.rs @@ -1,4 +1,3 @@ -use log::info; use rmcp::{ model::{ CallToolRequestParam, CallToolResult, ErrorCode, Implementation, ListResourcesResult, @@ -12,6 +11,7 @@ use rmcp::{ ErrorData, RoleServer, ServerHandler, }; use std::sync::Arc; +use tracing::info; use super::embedded_docs; use super::tools::{ diff --git a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs index 49fdbcabc6..b12823c5fb 100644 --- a/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs +++ b/apps/framework-cli/src/mcp/tools/infra_issues/mod.rs @@ -10,12 +10,12 @@ //! See the shared `crate::infrastructure::olap::clickhouse::diagnostics` module for //! detailed documentation on each diagnostic provider. -use log::{debug, info}; use regex::Regex; use rmcp::model::{CallToolResult, Tool}; use serde_json::{json, Map, Value}; use std::collections::HashMap; use std::sync::Arc; +use tracing::{debug, info}; use super::{create_error_result, create_success_result}; use crate::framework::core::infrastructure_map::InfrastructureMap; diff --git a/apps/framework-cli/src/mcp/tools/query_olap.rs b/apps/framework-cli/src/mcp/tools/query_olap.rs index 517f2a702f..203389a151 100644 --- a/apps/framework-cli/src/mcp/tools/query_olap.rs +++ b/apps/framework-cli/src/mcp/tools/query_olap.rs @@ -3,13 +3,13 @@ //! This module implements the MCP tool for executing read-only SQL queries //! against the ClickHouse OLAP database for data exploration and debugging. -use log::{debug, info}; use rmcp::model::{Annotated, CallToolResult, RawContent, RawTextContent, Tool}; use serde_json::{json, Map, Value}; use sqlparser::ast::Statement; use sqlparser::dialect::ClickHouseDialect; use sqlparser::parser::Parser; use std::sync::Arc; +use tracing::{debug, info}; use crate::infrastructure::olap::clickhouse::client::ClickHouseClient; use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig; diff --git a/apps/framework-cli/src/mcp/tools/sample_stream.rs b/apps/framework-cli/src/mcp/tools/sample_stream.rs index 0ebf6f9b91..79f36cb2e2 100644 --- a/apps/framework-cli/src/mcp/tools/sample_stream.rs +++ b/apps/framework-cli/src/mcp/tools/sample_stream.rs @@ -4,7 +4,6 @@ //! It provides functionality to retrieve recent messages from topics for debugging and exploration. use futures::stream::BoxStream; -use log::info; use rdkafka::consumer::Consumer; use rdkafka::{Message as KafkaMessage, Offset, TopicPartitionList}; use rmcp::model::{CallToolResult, Tool}; @@ -12,6 +11,7 @@ use serde_json::{json, Map, Value}; use std::sync::Arc; use std::time::Duration; use tokio_stream::StreamExt; +use tracing::info; use super::{create_error_result, create_success_result}; use crate::framework::core::infrastructure_map::InfrastructureMap; @@ -248,7 +248,7 @@ async fn collect_messages_from_stream( match result { Ok(Ok(value)) => messages.push(value), Ok(Err(e)) => { - log::warn!( + tracing::warn!( "Error deserializing message from stream '{}': {}", stream_name, e @@ -256,7 +256,7 @@ async fn collect_messages_from_stream( error_count += 1; } Err(_elapsed) => { - log::info!( + tracing::info!( "Timeout waiting for messages from stream '{}' after {} seconds. Retrieved {} messages.", stream_name, SAMPLE_TIMEOUT_SECS, diff --git a/apps/framework-cli/src/metrics.rs b/apps/framework-cli/src/metrics.rs index e1d57dddad..aea7869ead 100644 --- a/apps/framework-cli/src/metrics.rs +++ b/apps/framework-cli/src/metrics.rs @@ -20,7 +20,7 @@ use crate::metrics_inserter::MetricsInserter; use crate::utilities::constants::{CLI_VERSION, CONTEXT, CTX_SESSION_ID}; use crate::utilities::decode_object; use chrono::{DateTime, Utc}; -use log::{trace, warn}; +use tracing::{trace, warn}; const DEFAULT_ANONYMOUS_METRICS_URL: &str = "https://moosefood.514.dev/ingest/MooseSessionTelemetry/0.6"; diff --git a/apps/framework-cli/src/project.rs b/apps/framework-cli/src/project.rs index 5579373c43..54796a7852 100644 --- a/apps/framework-cli/src/project.rs +++ b/apps/framework-cli/src/project.rs @@ -80,10 +80,10 @@ use crate::utilities::git::GitConfig; use crate::utilities::PathExt; use crate::utilities::_true; use config::{Config, ConfigError, Environment, File}; -use log::{debug, error}; use python_project::PythonProject; use serde::Deserialize; use serde::Serialize; +use tracing::{debug, error}; /// Represents errors that can occur during project file operations #[derive(Debug, thiserror::Error)] diff --git a/apps/framework-cli/src/utilities/capture.rs b/apps/framework-cli/src/utilities/capture.rs index 106f3a16de..5743795a1a 100644 --- a/apps/framework-cli/src/utilities/capture.rs +++ b/apps/framework-cli/src/utilities/capture.rs @@ -132,7 +132,7 @@ pub fn capture_usage( let client = match PostHog514Client::from_env(machine_id) { Some(client) => client, None => { - log::warn!("PostHog client not configured - missing POSTHOG_API_KEY"); + tracing::warn!("PostHog client not configured - missing POSTHOG_API_KEY"); return; } }; @@ -147,7 +147,7 @@ pub fn capture_usage( ) .await { - log::warn!("Failed to send telemetry to PostHog: {:?}", e); + tracing::warn!("Failed to send telemetry to PostHog: {:?}", e); } })) } diff --git a/apps/framework-cli/src/utilities/docker.rs b/apps/framework-cli/src/utilities/docker.rs index fc792c46de..b22f0c7966 100644 --- a/apps/framework-cli/src/utilities/docker.rs +++ b/apps/framework-cli/src/utilities/docker.rs @@ -1,12 +1,12 @@ use handlebars::Handlebars; use lazy_static::lazy_static; -use log::{error, info, warn}; use regex::Regex; use serde::Deserialize; use serde_json::json; use std::path::PathBuf; use std::process::{Command, Stdio}; use tokio::io::{AsyncBufReadExt, BufReader}; +use tracing::{error, info, warn}; use crate::cli::settings::Settings; use crate::project::Project; diff --git a/apps/framework-cli/src/utilities/dotenv.rs b/apps/framework-cli/src/utilities/dotenv.rs index e43b69be69..a788af905f 100644 --- a/apps/framework-cli/src/utilities/dotenv.rs +++ b/apps/framework-cli/src/utilities/dotenv.rs @@ -34,8 +34,8 @@ //! MOOSE_CLICKHOUSE_CONFIG__PASSWORD=my-secret //! ``` -use log::{debug, info}; use std::path::Path; +use tracing::{debug, info}; /// Represents the runtime environment for the Moose project #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/apps/framework-cli/src/utilities/machine_id.rs b/apps/framework-cli/src/utilities/machine_id.rs index cfc68ec3fb..a7e091d364 100644 --- a/apps/framework-cli/src/utilities/machine_id.rs +++ b/apps/framework-cli/src/utilities/machine_id.rs @@ -1,7 +1,7 @@ use home::home_dir; -use log::warn; use std::fs; use std::path::PathBuf; +use tracing::warn; use uuid::Uuid; const MACHINE_ID_FILE: &str = ".fiveonefour/machine_id"; diff --git a/apps/framework-cli/src/utilities/nodejs_version.rs b/apps/framework-cli/src/utilities/nodejs_version.rs index fd7be0e37f..c1824a3802 100644 --- a/apps/framework-cli/src/utilities/nodejs_version.rs +++ b/apps/framework-cli/src/utilities/nodejs_version.rs @@ -1,8 +1,8 @@ -use log::{debug, info, warn}; use semver::{Version, VersionReq}; use serde_json::Value as JsonValue; use std::fs; use std::path::Path; +use tracing::{debug, info, warn}; #[derive(Debug, Clone)] pub struct NodeVersion { diff --git a/apps/framework-cli/src/utilities/package_managers.rs b/apps/framework-cli/src/utilities/package_managers.rs index fa2787face..9d1e87b210 100644 --- a/apps/framework-cli/src/utilities/package_managers.rs +++ b/apps/framework-cli/src/utilities/package_managers.rs @@ -3,7 +3,7 @@ use std::{fmt, path::PathBuf, process::Command}; use home::home_dir; -use log::{debug, error}; +use tracing::{debug, error}; use crate::utilities::constants::{PACKAGE_LOCK_JSON, PNPM_LOCK, YARN_LOCK}; diff --git a/apps/framework-cli/src/utilities/process_output.rs b/apps/framework-cli/src/utilities/process_output.rs index 7e66ea4df2..38661c5d8e 100644 --- a/apps/framework-cli/src/utilities/process_output.rs +++ b/apps/framework-cli/src/utilities/process_output.rs @@ -1,8 +1,8 @@ -use log::{error, info, warn}; use std::io::{BufRead, BufReader as StdBufReader}; use std::process::{Command, Stdio}; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{ChildStderr, ChildStdout}; +use tracing::{error, info, warn}; /// Utility for safely managing subprocess output while preventing terminal corruption. /// diff --git a/apps/framework-cli/src/utilities/system.rs b/apps/framework-cli/src/utilities/system.rs index 2fd86998b5..2aebeb6477 100644 --- a/apps/framework-cli/src/utilities/system.rs +++ b/apps/framework-cli/src/utilities/system.rs @@ -1,5 +1,4 @@ //! System utilities -use log::{debug, error, info, warn}; use std::fmt::Debug; use std::time::Duration; use std::{ @@ -10,6 +9,7 @@ use tokio::process::Child; use tokio::select; use tokio::task::JoinHandle; use tokio::time::{sleep, Instant}; +use tracing::{debug, error, info, warn}; #[derive(Debug, thiserror::Error)] #[non_exhaustive] @@ -57,7 +57,7 @@ pub async fn kill_child(child: &mut Child) -> Result<(), KillProcessError> { let status = kill.wait().await?; if !status.success() { - log::warn!("Failed to send SIGTERM to process {}", id); + tracing::warn!("Failed to send SIGTERM to process {}", id); } // Wait for the child process to exit with a timeout (10 seconds) From 5f1923e842a4acace75bd22037e10df030841029 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Wed, 26 Nov 2025 11:03:00 -0500 Subject: [PATCH 51/59] Update safe-chain-nix in flake.nix (#3037) > [!NOTE] > Bumps `safe-chain-nix` in `flake.lock` to `c931beaa` (updated rev, narHash, and timestamp). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 0171a301bb797514d57893a0faffdba1c503337e. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 5c221858bd..b95a977117 100644 --- a/flake.lock +++ b/flake.lock @@ -103,11 +103,11 @@ ] }, "locked": { - "lastModified": 1764035635, - "narHash": "sha256-BQXF3dmdSY9k6fS82HdmYjAsrU+Y3o7rDK8rh9oYY6g=", + "lastModified": 1764107305, + "narHash": "sha256-WvQKJS+eizwFPJeTom9B2H2OVt/cneStYNvnTkXADL8=", "owner": "LucioFranco", "repo": "safe-chain-nix", - "rev": "b286fdec32d947cd1f25c37b4b3c4ecca6f796b2", + "rev": "c931beaa029241bf9010d76d68e47c20f4d3bcf9", "type": "github" }, "original": { From 20e8d2d99dd4b367ef9e6995fdbbcda207be35da Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Wed, 26 Nov 2025 15:32:48 -0500 Subject: [PATCH 52/59] Disable otel (#3043) > [!NOTE] > Disables OTEL integration for the legacy logging format and marks session/machine IDs unused. > > - **Logger (`apps/framework-cli/src/cli/logger.rs`)**: > - **Legacy format**: > - Remove OTEL layer wiring even when `settings.export_to` is set (no `otel_layer` used). > - Mark `session_id` and `machine_id` parameters as unused (`_session_id`, `_machine_id`). > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit c39bd4df84b14d9e407dbf6cf51e606a37b78bd7. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --------- Co-authored-by: George Anderson --- apps/framework-cli/src/cli/logger.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/apps/framework-cli/src/cli/logger.rs b/apps/framework-cli/src/cli/logger.rs index 157a9864f5..f5e60520a9 100644 --- a/apps/framework-cli/src/cli/logger.rs +++ b/apps/framework-cli/src/cli/logger.rs @@ -594,17 +594,15 @@ fn setup_modern_format( fn setup_legacy_format( settings: &LoggerSettings, - session_id: &str, - machine_id: &str, + _session_id: &str, + _machine_id: &str, custom_fields: CustomFields, ) -> Result<(), LoggerError> { let env_filter = EnvFilter::try_from_default_env() .unwrap_or_else(|_| EnvFilter::new(settings.level.to_tracing_level().to_string())); // Setup with or without OTEL based on configuration - if let Some(endpoint) = &settings.export_to { - let otel_layer = create_otel_layer(endpoint, session_id, machine_id)?; - + if let Some(_endpoint) = &settings.export_to { if settings.stdout { let legacy_layer = LegacyFormatLayer::new( std::io::stdout, @@ -614,7 +612,6 @@ fn setup_legacy_format( ); tracing_subscriber::registry() - .with(otel_layer) .with(env_filter) .with(legacy_layer) .init(); @@ -628,7 +625,6 @@ fn setup_legacy_format( ); tracing_subscriber::registry() - .with(otel_layer) .with(env_filter) .with(legacy_layer) .init(); From 23da2516327422b8b58bbd124df7844499e2cf39 Mon Sep 17 00:00:00 2001 From: George Leung Date: Wed, 26 Nov 2025 12:54:55 -0800 Subject: [PATCH 53/59] logs latency in TS API (#3042) > [!NOTE] > Add elapsed-time logging by timing requests and passing start time to `httpLogger` across all response paths. > > - **Runtime/Logging**: > - Update `httpLogger` to accept `startMs` and log request latency in ms. > - Capture `start = Date.now()` in `apiHandler` and `createMainRouter` request handlers. > - Invoke `httpLogger(req, res, start)` on all exits (auth failures, success responses, errors, 404) in `packages/ts-moose-lib/src/consumption-apis/runner.ts`. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit d8061a593ea6b6355a1c967b4fe088997eff44e8. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- .../src/consumption-apis/runner.ts | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/packages/ts-moose-lib/src/consumption-apis/runner.ts b/packages/ts-moose-lib/src/consumption-apis/runner.ts index fa8ea462c9..4fc67cd1b0 100755 --- a/packages/ts-moose-lib/src/consumption-apis/runner.ts +++ b/packages/ts-moose-lib/src/consumption-apis/runner.ts @@ -51,8 +51,14 @@ const toClientConfig = (config: ClickhouseConfig) => ({ const createPath = (apisDir: string, path: string) => `${apisDir}${path}.ts`; -const httpLogger = (req: http.IncomingMessage, res: http.ServerResponse) => { - console.log(`${req.method} ${req.url} ${res.statusCode}`); +const httpLogger = ( + req: http.IncomingMessage, + res: http.ServerResponse, + startMs: number, +) => { + console.log( + `${req.method} ${req.url} ${res.statusCode} ${Date.now() - startMs}ms`, + ); }; const modulesCache = new Map(); @@ -82,6 +88,8 @@ const apiHandler = async ( ) => { const apis = isDmv2 ? await getApis() : new Map(); return async (req: http.IncomingMessage, res: http.ServerResponse) => { + const start = Date.now(); + try { const url = new URL(req.url || "", "http://localhost"); const fileName = url.pathname; @@ -101,20 +109,20 @@ const apiHandler = async ( if (enforceAuth) { res.writeHead(401, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: "Unauthorized" })); - httpLogger(req, res); + httpLogger(req, res, start); return; } } } else if (enforceAuth) { res.writeHead(401, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: "Unauthorized" })); - httpLogger(req, res); + httpLogger(req, res, start); return; } } else if (enforceAuth) { res.writeHead(401, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: "Unauthorized" })); - httpLogger(req, res); + httpLogger(req, res, start); return; } @@ -224,10 +232,10 @@ const apiHandler = async ( if (status) { res.writeHead(status, { "Content-Type": "application/json" }); - httpLogger(req, res); + httpLogger(req, res, start); } else { res.writeHead(200, { "Content-Type": "application/json" }); - httpLogger(req, res); + httpLogger(req, res, start); } res.end(body); @@ -237,16 +245,16 @@ const apiHandler = async ( if (Object.getPrototypeOf(error).constructor.name === "TypeGuardError") { res.writeHead(400, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: error.message })); - httpLogger(req, res); + httpLogger(req, res, start); } if (error instanceof Error) { res.writeHead(500, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: error.message })); - httpLogger(req, res); + httpLogger(req, res, start); } else { res.writeHead(500, { "Content-Type": "application/json" }); res.end(); - httpLogger(req, res); + httpLogger(req, res, start); } } }; @@ -280,6 +288,8 @@ const createMainRouter = async ( }); return async (req: http.IncomingMessage, res: http.ServerResponse) => { + const start = Date.now(); + const url = new URL(req.url || "", "http://localhost"); const pathname = url.pathname; @@ -391,7 +401,7 @@ const createMainRouter = async ( res.writeHead(404, { "Content-Type": "application/json" }); res.end(JSON.stringify({ error: "Not Found" })); - httpLogger(req, res); + httpLogger(req, res, start); }; }; From 703ea309c3f1db21af8fd1efac6a14c703ac4041 Mon Sep 17 00:00:00 2001 From: George Leung Date: Thu, 27 Nov 2025 14:34:32 -0800 Subject: [PATCH 54/59] primary key expression (#3031) > [!NOTE] > Adds first-class primary key expression support across OLAP pipelines, generating/reading PRIMARY KEY in DDL, normalizing for diffs, and exposing config in TS/Python SDKs with tests and docs. > > - **OLAP/ClickHouse core**: > - Add `primary_key_expression` to `Table`/ClickHouse models and protobuf; include in (de)serialization and introspection. > - Generate PRIMARY KEY in `create_table_query` (uses expression or column flags) and expose in list_tables via SQL parsing (`extract_primary_key_from_create_table`). > - Normalize PKs (strip spaces/backticks/outer parens) and compare in diffs; PK changes trigger drop+create. > - **Code generation**: > - TS/Python generators output `primaryKeyExpression`/`primary_key_expression` in table configs and switch key-wrapping logic to use it. > - **SDK/Internal config**: > - TS/Python SDKs add `primaryKeyExpression`/`primary_key_expression` to `OlapTable` config and internal representations. > - **Tests**: > - Extensive unit tests for PK parsing/normalization/diff behavior and DDL generation; e2e template tests validating PRIMARY KEY and ORDER BY in DDL. > - **Docs**: > - New/updated docs detailing primary key expressions, constraints, and examples in TS/Python, including ORDER BY prefix requirement. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 3bdd55ee080eec2f925ee3789061f292403d41ee. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- apps/framework-cli-e2e/test/templates.test.ts | 48 ++ .../test/utils/schema-definitions.ts | 38 ++ apps/framework-cli/src/cli/local_webserver.rs | 1 + .../framework-cli/src/cli/routines/migrate.rs | 1 + .../src/cli/routines/seed_data.rs | 1 + .../framework/core/infra_reality_checker.rs | 1 + .../framework/core/infrastructure/table.rs | 78 ++++ .../src/framework/core/infrastructure_map.rs | 3 + .../core/partial_infrastructure_map.rs | 4 + apps/framework-cli/src/framework/core/plan.rs | 1 + .../src/framework/core/plan_validator.rs | 2 + .../src/framework/data_model/model.rs | 1 + .../src/framework/python/generate.rs | 31 +- .../src/framework/typescript/generate.rs | 31 +- .../olap/clickhouse/diff_strategy.rs | 419 +++++++++++++++++- .../infrastructure/olap/clickhouse/mapper.rs | 1 + .../src/infrastructure/olap/clickhouse/mod.rs | 167 ++++++- .../infrastructure/olap/clickhouse/model.rs | 2 + .../infrastructure/olap/clickhouse/queries.rs | 156 ++++++- .../olap/clickhouse/sql_parser.rs | 155 +++++++ .../src/infrastructure/olap/ddl_ordering.rs | 24 + .../content/moosestack/olap/model-table.mdx | 125 ++++++ .../llm-docs/python/constraints.md | 13 +- .../llm-docs/typescript/constraints.md | 10 +- .../llm-docs/typescript/table-setup.md | 53 ++- packages/protobuf/infrastructure_map.proto | 4 + .../py-moose-lib/moose_lib/dmv2/olap_table.py | 6 + packages/py-moose-lib/moose_lib/internal.py | 3 + packages/ts-moose-lib/src/dmv2/internal.ts | 6 + .../ts-moose-lib/src/dmv2/sdk/olapTable.ts | 14 + templates/python-tests/src/ingest/models.py | 43 ++ .../typescript-tests/src/ingest/models.ts | 44 ++ 32 files changed, 1430 insertions(+), 56 deletions(-) diff --git a/apps/framework-cli-e2e/test/templates.test.ts b/apps/framework-cli-e2e/test/templates.test.ts index 96eaf2f805..241a2765e9 100644 --- a/apps/framework-cli-e2e/test/templates.test.ts +++ b/apps/framework-cli-e2e/test/templates.test.ts @@ -292,6 +292,54 @@ const createTemplateTestSuite = (config: TemplateTestConfig) => { } }); + it("should include PRIMARY KEY expression in DDL when configured", async function () { + if (config.isTestsVariant) { + // Test 1: Primary key with hash function + const ddl1 = await getTableDDL("PrimaryKeyExpressionTest", "local"); + const primaryKeyPattern = + config.language === "typescript" ? + "PRIMARY KEY (userId, cityHash64(eventId))" + : "PRIMARY KEY (user_id, cityHash64(event_id))"; + const orderByPattern = + config.language === "typescript" ? + "ORDER BY (userId, cityHash64(eventId), timestamp)" + : "ORDER BY (user_id, cityHash64(event_id), timestamp)"; + + if (!ddl1.includes(primaryKeyPattern)) { + throw new Error( + `PRIMARY KEY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${primaryKeyPattern}. DDL: ${ddl1}`, + ); + } + if (!ddl1.includes(orderByPattern)) { + throw new Error( + `ORDER BY expression not found in PrimaryKeyExpressionTest DDL. Expected: ${orderByPattern}. DDL: ${ddl1}`, + ); + } + + // Test 2: Primary key with different ordering + const ddl2 = await getTableDDL("PrimaryKeyOrderingTest", "local"); + const primaryKeyPattern2 = + config.language === "typescript" ? + "PRIMARY KEY productId" + : "PRIMARY KEY product_id"; + const orderByPattern2 = + config.language === "typescript" ? + "ORDER BY (productId, category, brand)" + : "ORDER BY (product_id, category, brand)"; + + if (!ddl2.includes(primaryKeyPattern2)) { + throw new Error( + `PRIMARY KEY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${primaryKeyPattern2}. DDL: ${ddl2}`, + ); + } + if (!ddl2.includes(orderByPattern2)) { + throw new Error( + `ORDER BY expression not found in PrimaryKeyOrderingTest DDL. Expected: ${orderByPattern2}. DDL: ${ddl2}`, + ); + } + } + }); + it("should generate FixedString types in DDL including type aliases", async function () { if (config.isTestsVariant && config.language === "python") { const ddl = await getTableDDL("FixedStringTest", "local"); diff --git a/apps/framework-cli-e2e/test/utils/schema-definitions.ts b/apps/framework-cli-e2e/test/utils/schema-definitions.ts index ce622995ee..299799ff95 100644 --- a/apps/framework-cli-e2e/test/utils/schema-definitions.ts +++ b/apps/framework-cli-e2e/test/utils/schema-definitions.ts @@ -421,6 +421,25 @@ export const TYPESCRIPT_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payloadBasic", type: "JSON(count Int64, name String)" }, ], }, + // Primary Key Expression Tests + { + tableName: "PrimaryKeyExpressionTest", + columns: [ + { name: "userId", type: "String" }, + { name: "eventId", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "category", type: "String" }, + ], + }, + { + tableName: "PrimaryKeyOrderingTest", + columns: [ + { name: "productId", type: "String" }, + { name: "category", type: "String" }, + { name: "brand", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + ], + }, ]; // ============ PYTHON TEMPLATE SCHEMA DEFINITIONS ============ @@ -805,6 +824,25 @@ export const PYTHON_TEST_SCHEMAS: ExpectedTableSchema[] = [ { name: "payload_basic", type: "JSON(count Int64, name String)" }, ], }, + // Primary Key Expression Tests + { + tableName: "PrimaryKeyExpressionTest", + columns: [ + { name: "user_id", type: "String" }, + { name: "event_id", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + { name: "category", type: "String" }, + ], + }, + { + tableName: "PrimaryKeyOrderingTest", + columns: [ + { name: "product_id", type: "String" }, + { name: "category", type: "String" }, + { name: "brand", type: "String" }, + { name: "timestamp", type: /DateTime\('UTC'\)/ }, + ], + }, ]; // ============ HELPER FUNCTIONS ============ diff --git a/apps/framework-cli/src/cli/local_webserver.rs b/apps/framework-cli/src/cli/local_webserver.rs index bb037676fb..4ac5e6225e 100644 --- a/apps/framework-cli/src/cli/local_webserver.rs +++ b/apps/framework-cli/src/cli/local_webserver.rs @@ -3564,6 +3564,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/cli/routines/migrate.rs b/apps/framework-cli/src/cli/routines/migrate.rs index 1256a302e5..afa256c69d 100644 --- a/apps/framework-cli/src/cli/routines/migrate.rs +++ b/apps/framework-cli/src/cli/routines/migrate.rs @@ -781,6 +781,7 @@ mod tests { table_settings: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/cli/routines/seed_data.rs b/apps/framework-cli/src/cli/routines/seed_data.rs index 979841d028..75a940889f 100644 --- a/apps/framework-cli/src/cli/routines/seed_data.rs +++ b/apps/framework-cli/src/cli/routines/seed_data.rs @@ -614,6 +614,7 @@ mod tests { table_settings: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/framework/core/infra_reality_checker.rs b/apps/framework-cli/src/framework/core/infra_reality_checker.rs index e78c6de5ec..08adf9cca3 100644 --- a/apps/framework-cli/src/framework/core/infra_reality_checker.rs +++ b/apps/framework-cli/src/framework/core/infra_reality_checker.rs @@ -533,6 +533,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/framework/core/infrastructure/table.rs b/apps/framework-cli/src/framework/core/infrastructure/table.rs index a21336a8e4..52fdde9897 100644 --- a/apps/framework-cli/src/framework/core/infrastructure/table.rs +++ b/apps/framework-cli/src/framework/core/infrastructure/table.rs @@ -302,6 +302,10 @@ pub struct Table { /// Optional cluster name for ON CLUSTER support in ClickHouse #[serde(skip_serializing_if = "Option::is_none", default)] pub cluster_name: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + /// Allows for complex primary keys using functions or different column ordering + #[serde(skip_serializing_if = "Option::is_none", default)] + pub primary_key_expression: Option, } impl Table { @@ -402,6 +406,75 @@ impl Table { .collect() } + /// Returns a normalized representation of the primary key for comparison purposes. + /// + /// This handles both: + /// - `primary_key_expression`: Uses the expression directly + /// - Column-level `primary_key` flags: Builds an expression from column names + /// + /// The result is normalized (trimmed, spaces removed, backticks removed, and outer + /// parentheses stripped for single-element tuples) to enable semantic comparison. + /// For example: + /// - `primary_key_expression: Some("(foo, bar)")` returns "(foo,bar)" + /// - Columns foo, bar with `primary_key: true` returns "(foo,bar)" + /// - `primary_key_expression: Some("foo")` returns "foo" + /// - `primary_key_expression: Some("(foo)")` returns "foo" (outer parens stripped) + /// - Single column foo with `primary_key: true` returns "foo" + pub fn normalized_primary_key_expr(&self) -> String { + let expr = if let Some(ref pk_expr) = self.primary_key_expression { + // Use the explicit primary_key_expression + pk_expr.clone() + } else { + // Build from column-level primary_key flags + let pk_cols = self.primary_key_columns(); + if pk_cols.is_empty() { + String::new() + } else if pk_cols.len() == 1 { + pk_cols[0].to_string() + } else { + format!("({})", pk_cols.join(", ")) + } + }; + + // Normalize: trim, remove backticks, remove spaces + let mut normalized = expr + .trim() + .trim_matches('`') + .replace('`', "") + .replace(" ", ""); + + // Strip outer parentheses if this is a single-element tuple + // E.g., "(col)" -> "col", "(cityHash64(col))" -> "cityHash64(col)" + // But keep "(col1,col2)" as-is + if normalized.starts_with('(') && normalized.ends_with(')') { + // Check if there are any top-level commas (not inside nested parentheses) + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + // If no top-level comma, it's a single-element tuple - strip outer parens + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + } + pub fn order_by_with_fallback(&self) -> OrderBy { // table (in infra map created by older version of moose) may leave order_by unspecified, // but the implicit order_by from primary keys can be the same @@ -473,6 +546,7 @@ impl Table { table_settings: self.table_settings.clone().unwrap_or_default(), table_ttl_setting: self.table_ttl_setting.clone(), cluster_name: self.cluster_name.clone(), + primary_key_expression: self.primary_key_expression.clone(), metadata: MessageField::from_option(self.metadata.as_ref().map(|m| { infrastructure_map::Metadata { description: m.description.clone().unwrap_or_default(), @@ -581,6 +655,7 @@ impl Table { database: proto.database, table_ttl_setting: proto.table_ttl_setting, cluster_name: proto.cluster_name, + primary_key_expression: proto.primary_key_expression, } } } @@ -1647,6 +1722,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; assert_eq!(table1.id(DEFAULT_DATABASE_NAME), "local_users"); @@ -1776,6 +1852,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Target table from code: explicit order_by that matches primary key @@ -1799,6 +1876,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // These should be equal because: diff --git a/apps/framework-cli/src/framework/core/infrastructure_map.rs b/apps/framework-cli/src/framework/core/infrastructure_map.rs index e33d7c23eb..9b4bec5831 100644 --- a/apps/framework-cli/src/framework/core/infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/infrastructure_map.rs @@ -3065,6 +3065,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let after = Table { @@ -3121,6 +3122,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let diff = compute_table_columns_diff(&before, &after); @@ -3297,6 +3299,7 @@ mod diff_tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs index eb0e047d05..1bc781c1c0 100644 --- a/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs +++ b/apps/framework-cli/src/framework/core/partial_infrastructure_map.rs @@ -273,6 +273,9 @@ struct PartialTable { /// Optional cluster name for ON CLUSTER support #[serde(default)] pub cluster: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + #[serde(default, alias = "primary_key_expression")] + pub primary_key_expression: Option, } /// Represents a topic definition from user code before it's converted into a complete [`Topic`]. @@ -743,6 +746,7 @@ impl PartialInfrastructureMap { table_ttl_setting, database: partial_table.database.clone(), cluster_name: partial_table.cluster.clone(), + primary_key_expression: partial_table.primary_key_expression.clone(), }; Ok((table.id(default_database), table)) }) diff --git a/apps/framework-cli/src/framework/core/plan.rs b/apps/framework-cli/src/framework/core/plan.rs index 69b062e549..a3f38e3c5b 100644 --- a/apps/framework-cli/src/framework/core/plan.rs +++ b/apps/framework-cli/src/framework/core/plan.rs @@ -527,6 +527,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/framework/core/plan_validator.rs b/apps/framework-cli/src/framework/core/plan_validator.rs index 99f9dda6a5..2d0260591e 100644 --- a/apps/framework-cli/src/framework/core/plan_validator.rs +++ b/apps/framework-cli/src/framework/core/plan_validator.rs @@ -167,6 +167,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name, + primary_key_expression: None, } } @@ -342,6 +343,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name, + primary_key_expression: None, } } diff --git a/apps/framework-cli/src/framework/data_model/model.rs b/apps/framework-cli/src/framework/data_model/model.rs index 6591277682..da20ff2dd8 100644 --- a/apps/framework-cli/src/framework/data_model/model.rs +++ b/apps/framework-cli/src/framework/data_model/model.rs @@ -71,6 +71,7 @@ impl DataModel { database: None, // Database defaults to global config table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Compute hash that includes both engine params and database diff --git a/apps/framework-cli/src/framework/python/generate.rs b/apps/framework-cli/src/framework/python/generate.rs index a8be4e26d3..0416662b27 100644 --- a/apps/framework-cli/src/framework/python/generate.rs +++ b/apps/framework-cli/src/framework/python/generate.rs @@ -633,19 +633,8 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri // Generate model classes for table in tables { writeln!(output, "class {}(BaseModel):", table.name).unwrap(); - - let primary_key = table - .columns - .iter() - .filter_map(|column| { - if column.primary_key { - Some(column.name.to_string()) - } else { - None - } - }) - .collect::>(); - let can_use_key_wrapping = table.order_by.starts_with_fields(&primary_key); + // list_tables sets primary_key_expression to Some if Key wrapping is insufficient to represent the PK + let can_use_key_wrapping = table.primary_key_expression.is_none(); for column in &table.columns { let type_str = map_column_type_to_python( @@ -725,6 +714,11 @@ pub fn tables_to_python(tables: &[Table], life_cycle: Option) -> Stri ) .unwrap(); writeln!(output, " {order_by_spec},").unwrap(); + + if let Some(ref pk_expr) = table.primary_key_expression { + // Use the explicit primary_key_expression directly + writeln!(output, " primary_key_expression={:?},", pk_expr).unwrap(); + } if let Some(partition_by) = &table.partition_by { writeln!(output, " partition_by={:?},", partition_by).unwrap(); } @@ -1086,6 +1080,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1180,6 +1175,7 @@ foo_table = OlapTable[Foo]("Foo", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1299,6 +1295,7 @@ nested_array_table = OlapTable[NestedArray]("NestedArray", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1378,6 +1375,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1436,6 +1434,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1505,6 +1504,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1577,6 +1577,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1660,6 +1661,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1725,6 +1727,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1790,6 +1793,7 @@ user_table = OlapTable[User]("User", OlapConfig( indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); @@ -1844,6 +1848,7 @@ user_table = OlapTable[User]("User", OlapConfig( database: Some("analytics_db".to_string()), table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_python(&tables, None); diff --git a/apps/framework-cli/src/framework/typescript/generate.rs b/apps/framework-cli/src/framework/typescript/generate.rs index cffd6cae21..55b688b44c 100644 --- a/apps/framework-cli/src/framework/typescript/generate.rs +++ b/apps/framework-cli/src/framework/typescript/generate.rs @@ -549,18 +549,8 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> // Generate model interfaces for table in tables { - let primary_key = table - .columns - .iter() - .filter_map(|column| { - if column.primary_key { - Some(column.name.to_string()) - } else { - None - } - }) - .collect::>(); - let can_use_key_wrapping = table.order_by.starts_with_fields(&primary_key); + // list_tables sets primary_key_expression to Some if Key wrapping is insufficient to represent the PK + let can_use_key_wrapping = table.primary_key_expression.is_none(); writeln!(output, "export interface {} {{", table.name).unwrap(); @@ -630,6 +620,7 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> } OrderBy::SingleExpr(expr) => format!("orderByExpression: {:?}", expr), }; + let var_name = sanitize_typescript_identifier(&table.name); let (base_name, version) = extract_version_from_table_name(&table.name); @@ -645,6 +636,11 @@ pub fn tables_to_typescript(tables: &[Table], life_cycle: Option) -> ) .unwrap(); writeln!(output, " {order_by_spec},").unwrap(); + + if let Some(ref pk_expr) = table.primary_key_expression { + // Use the explicit primary_key_expression directly + writeln!(output, " primaryKeyExpression: {:?},", pk_expr).unwrap(); + } if let Some(partition_by) = &table.partition_by { writeln!(output, " partitionBy: {:?},", partition_by).unwrap(); } @@ -1022,6 +1018,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1104,6 +1101,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1155,6 +1153,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1225,6 +1224,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1270,6 +1270,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1347,6 +1348,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1410,6 +1412,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1481,6 +1484,7 @@ export const UserTable = new OlapTable("User", { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1559,6 +1563,7 @@ export const TaskTable = new OlapTable("Task", { database: None, table_ttl_setting: Some("timestamp + INTERVAL 90 DAY DELETE".to_string()), cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1626,6 +1631,7 @@ export const TaskTable = new OlapTable("Task", { indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); @@ -1674,6 +1680,7 @@ export const TaskTable = new OlapTable("Task", { database: Some("analytics_db".to_string()), table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }]; let result = tables_to_typescript(&tables, None); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs index e765fbd72b..affaa599a9 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/diff_strategy.rs @@ -495,15 +495,20 @@ impl TableDiffStrategy for ClickHouseTableDiffStrategy { // SAMPLE BY can be modified via ALTER TABLE; do not force drop+create // Check if primary key structure has changed - let before_primary_keys = before.primary_key_columns(); - let after_primary_keys = after.primary_key_columns(); - if before_primary_keys != after_primary_keys + // Use normalized expressions to handle both primary_key_expression and column-level flags + // This ensures that primary_key_expression: Some("(foo, bar)") is equivalent to + // columns foo, bar marked with primary_key: true + let before_pk_expr = before.normalized_primary_key_expr(); + let after_pk_expr = after.normalized_primary_key_expr(); + if before_pk_expr != after_pk_expr // S3 allows specifying PK, but that information is not in system.columns && after.engine.is_merge_tree_family() { tracing::warn!( - "ClickHouse: Primary key structure changed for table '{}', requiring drop+create", - before.name + "ClickHouse: Primary key structure changed for table '{}' (before: '{}', after: '{}'), requiring drop+create", + before.name, + before_pk_expr, + after_pk_expr ); return vec![ OlapChange::Table(TableChange::Removed(before.clone())), @@ -699,6 +704,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, } } @@ -1506,6 +1512,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; assert!(ClickHouseTableDiffStrategy::is_s3queue_table(&s3_table)); @@ -1807,4 +1814,406 @@ mod tests { // Should not trigger a validation error - no changes at all assert_eq!(changes.len(), 0); } + + #[test] + fn test_primary_key_change_requires_drop_create() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Change primary key: before has id, after has timestamp + before.columns[0].primary_key = true; + before.columns[1].primary_key = false; + after.columns[0].primary_key = false; + after.columns[1].primary_key = true; + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Primary key change requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_equivalent_to_column_flags() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flags for id and timestamp + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + // After: use primary_key_expression with same columns + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(id, timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since primary keys are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_single_column() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flag for single column + before.columns[0].primary_key = true; + + // After: use primary_key_expression with same single column + after.columns[0].primary_key = false; + after.primary_key_expression = Some("id".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since primary keys are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_with_extra_spaces() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: primary_key_expression with no spaces + before.columns[0].primary_key = false; + before.columns[1].primary_key = false; + before.primary_key_expression = Some("(id,timestamp)".to_string()); + + // After: primary_key_expression with spaces (should be normalized the same) + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("( id , timestamp )".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since both normalize to the same expression + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_different_order_requires_drop_create() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: primary key is (id, timestamp) + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + // After: primary key is (timestamp, id) - different order + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(timestamp, id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Different order requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_with_function() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: simple column-level primary key + before.columns[0].primary_key = true; + + // After: primary key with function expression + after.columns[0].primary_key = false; + after.primary_key_expression = Some("(id, cityHash64(timestamp))".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Different primary key (function vs simple column) requires drop+create + assert_eq!(changes.len(), 2); + assert!(matches!( + changes[0], + OlapChange::Table(TableChange::Removed(_)) + )); + assert!(matches!( + changes[1], + OlapChange::Table(TableChange::Added(_)) + )); + } + + #[test] + fn test_primary_key_expression_single_column_with_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Before: use column-level primary_key flag for single column + before.columns[0].primary_key = true; + + // After: use primary_key_expression with parentheses around single column + // In ClickHouse, (col) and col are semantically equivalent for PRIMARY KEY + after.columns[0].primary_key = false; + after.primary_key_expression = Some("(id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since (id) and id are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_expression_function_with_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both use primary_key_expression with a function wrapped in parens + before.columns[0].primary_key = false; + before.primary_key_expression = Some("(cityHash64(id))".to_string()); + + after.columns[0].primary_key = false; + after.primary_key_expression = Some("cityHash64(id)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create since (expr) and expr are semantically equivalent + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_multi_column_keeps_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Both have multi-column primary keys - should keep parentheses + before.columns[0].primary_key = true; + before.columns[1].primary_key = true; + + after.columns[0].primary_key = false; + after.columns[1].primary_key = false; + after.primary_key_expression = Some("(id,timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create - both normalize to (id,timestamp) + assert_eq!(changes.len(), 0); + } + + #[test] + fn test_primary_key_nested_function_parens() { + let strategy = ClickHouseTableDiffStrategy; + + let mut before = create_test_table("test", vec!["id".to_string()], false); + let mut after = create_test_table("test", vec!["id".to_string()], false); + + // Test that nested parentheses in functions are preserved correctly + before.columns[0].primary_key = false; + before.primary_key_expression = Some("(cityHash64(id, timestamp))".to_string()); + + after.columns[0].primary_key = false; + after.primary_key_expression = Some("cityHash64(id, timestamp)".to_string()); + + let order_by_change = OrderByChange { + before: before.order_by.clone(), + after: after.order_by.clone(), + }; + + let partition_by_change = PartitionByChange { + before: before.partition_by.clone(), + after: after.partition_by.clone(), + }; + + let changes = strategy.diff_table_update( + &before, + &after, + vec![], + order_by_change, + partition_by_change, + "local", + ); + + // Should NOT trigger drop+create - both are the same function, just with/without outer parens + assert_eq!(changes.len(), 0); + } } diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs index 2eecff31d0..21f23e6624 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/mapper.rs @@ -351,6 +351,7 @@ pub fn std_table_to_clickhouse_table(table: &Table) -> Result = columns + .iter() + .filter(|c| c.primary_key) + .map(|c| c.name.clone()) + .collect(); + + debug!("Columns marked as primary key: {:?}", primary_key_columns); + + // Build expected expression: single column = "col", multiple = "(col1, col2)" + let expected_pk_expr = if primary_key_columns.is_empty() { + String::new() + } else if primary_key_columns.len() == 1 { + primary_key_columns[0].clone() + } else { + format!("({})", primary_key_columns.join(", ")) + }; + + debug!("Expected PRIMARY KEY expression: '{}'", expected_pk_expr); + debug!("Extracted PRIMARY KEY expression: '{}'", pk_expr); + + // Normalize both expressions for comparison (same logic as Table::normalized_primary_key_expr) + let normalize = |s: &str| -> String { + // Step 1: trim, remove backticks, remove spaces + let mut normalized = + s.trim().trim_matches('`').replace('`', "").replace(" ", ""); + + // Step 2: Strip outer parentheses if this is a single-element tuple + // E.g., "(col)" -> "col", "(cityHash64(col))" -> "cityHash64(col)" + // But keep "(col1,col2)" as-is + if normalized.starts_with('(') && normalized.ends_with(')') { + // Check if there are any top-level commas (not inside nested parentheses) + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + // If no top-level comma, it's a single-element tuple - strip outer parens + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + }; + + let normalized_expected = normalize(&expected_pk_expr); + let normalized_extracted = normalize(pk_expr); + + debug!( + "Normalized expected: '{}', normalized extracted: '{}'", + normalized_expected, normalized_extracted + ); + + if normalized_expected == normalized_extracted { + // PRIMARY KEY matches what columns indicate, use column-level flags + debug!("PRIMARY KEY matches columns, using column-level primary_key flags"); + (columns, None) + } else { + // PRIMARY KEY differs (different order, expressions, etc.), use primary_key_expression + debug!("PRIMARY KEY differs from columns, using primary_key_expression"); + let updated_columns: Vec = columns + .into_iter() + .map(|mut c| { + c.primary_key = false; + c + }) + .collect(); + (updated_columns, Some(pk_expr.clone())) + } + } else { + // No PRIMARY KEY clause, use column-level flags as-is + debug!("No PRIMARY KEY clause, using column-level primary_key flags"); + (columns, None) + }; + // Extract base name and version for source primitive let (base_name, version) = extract_version_from_table_name(&table_name); @@ -1889,7 +1987,7 @@ impl OlapOperations for ConfiguredDBClient { let table = Table { // keep the name with version suffix, following PartialInfrastructureMap.convert_tables name: table_name, - columns, + columns: final_columns, order_by: OrderBy::Fields(order_by_cols), // Use the extracted ORDER BY columns partition_by: { let p = partition_key.trim(); @@ -1911,6 +2009,7 @@ impl OlapOperations for ConfiguredDBClient { // the ON CLUSTER clause - it's only used during DDL execution and isn't persisted // in system tables. Users must manually specify cluster in their table configs. cluster_name: None, + primary_key_expression: final_primary_key_expression, }; debug!("Created table object: {:?}", table); @@ -2904,6 +3003,62 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra ); } + #[test] + fn test_primary_key_normalization_single_element_tuple() { + // Test that "(id)" and "id" normalize to the same value + // This is the bug fix: single-element tuples should have outer parens stripped + let normalize = |s: &str| -> String { + let mut normalized = s.trim().trim_matches('`').replace('`', "").replace(" ", ""); + + if normalized.starts_with('(') && normalized.ends_with(')') { + let inner = &normalized[1..normalized.len() - 1]; + let has_top_level_comma = { + let mut depth = 0; + let mut found_comma = false; + for ch in inner.chars() { + match ch { + '(' => depth += 1, + ')' => depth -= 1, + ',' if depth == 0 => { + found_comma = true; + break; + } + _ => {} + } + } + found_comma + }; + + if !has_top_level_comma { + normalized = inner.to_string(); + } + } + + normalized + }; + + // Single element: "(id)" should normalize to "id" + assert_eq!(normalize("(id)"), "id"); + assert_eq!(normalize("id"), "id"); + assert_eq!(normalize("(id)"), normalize("id")); + + // Single element with function: "(cityHash64(id))" should normalize to "cityHash64(id)" + assert_eq!(normalize("(cityHash64(id))"), "cityHash64(id)"); + assert_eq!(normalize("cityHash64(id)"), "cityHash64(id)"); + assert_eq!(normalize("(cityHash64(id))"), normalize("cityHash64(id)")); + + // Multiple elements: "(id, ts)" should stay as "(id,ts)" (with spaces removed) + assert_eq!(normalize("(id, ts)"), "(id,ts)"); + assert_eq!(normalize("(id,ts)"), "(id,ts)"); + + // Multiple elements with functions: should keep parens + assert_eq!(normalize("(id, cityHash64(ts))"), "(id,cityHash64(ts))"); + + // Backticks should be removed + assert_eq!(normalize("(`id`)"), "id"); + assert_eq!(normalize("(` id `)"), "id"); + } + #[test] fn test_normalize_ttl_expression() { // Test DAY conversion @@ -3158,6 +3313,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra database: None, cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), + primary_key_expression: None, }; let ignore_ops = vec![ @@ -3224,6 +3380,7 @@ SETTINGS enable_mixed_granularity_parts = 1, index_granularity = 8192, index_gra database: None, cluster_name: None, table_ttl_setting: Some("created_at + INTERVAL 30 DAY".to_string()), + primary_key_expression: None, }; let ignore_ops = vec![]; diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs index fbc134ea65..d21ca45082 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/model.rs @@ -659,6 +659,8 @@ pub struct ClickHouseTable { pub table_ttl_setting: Option, /// Optional cluster name for ON CLUSTER support pub cluster_name: Option, + /// Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified) + pub primary_key_expression: Option, } impl ClickHouseTable { diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs index 4b3042ed03..5a2c736a2b 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs @@ -2710,12 +2710,30 @@ pub fn create_table_query( None }; - let primary_key = table - .columns - .iter() - .filter(|column| column.primary_key) - .map(|column| column.name.clone()) - .collect::>(); + // PRIMARY KEY: use primary_key_expression if specified, otherwise use columns with primary_key flag + let primary_key_str = if let Some(ref expr) = table.primary_key_expression { + // When primary_key_expression is specified, use it directly (ignoring column-level primary_key flags) + // Strip outer parentheses if present, as the template will add them + let trimmed = expr.trim(); + if trimmed.starts_with('(') && trimmed.ends_with(')') { + Some(trimmed[1..trimmed.len() - 1].to_string()) + } else { + Some(trimmed.to_string()) + } + } else { + // Otherwise, use columns with primary_key flag + let primary_key = table + .columns + .iter() + .filter(|column| column.primary_key) + .map(|column| column.name.clone()) + .collect::>(); + if !primary_key.is_empty() { + Some(wrap_and_join_column_names(&primary_key, ",")) + } else { + None + } + }; // Prepare indexes strings like: INDEX name expr TYPE type(args...) GRANULARITY n let (has_indexes, index_strings): (bool, Vec) = if table.indexes.is_empty() { @@ -2765,8 +2783,8 @@ pub fn create_table_query( "has_fields": !table.columns.is_empty(), "has_indexes": has_indexes, "indexes": index_strings, - "primary_key_string": if supports_primary_key && !primary_key.is_empty() { - Some(wrap_and_join_column_names(&primary_key, ",")) + "primary_key_string": if supports_primary_key { + primary_key_str } else { None }, @@ -2775,7 +2793,18 @@ pub fn create_table_query( OrderBy::Fields(v) if v.len() == 1 && v[0] == "tuple()" => Some("tuple()".to_string()), OrderBy::Fields(v) if v.is_empty() => None, OrderBy::Fields(v) => Some(wrap_and_join_column_names(v, ",")), - OrderBy::SingleExpr(expr) => Some(expr.clone()), + OrderBy::SingleExpr(expr) => { + // Strip outer parentheses if present, as the template will add them + // Exception: keep tuple() as-is since it's a function call + let trimmed = expr.trim(); + if trimmed == "tuple()" { + Some(trimmed.to_string()) + } else if trimmed.starts_with('(') && trimmed.ends_with(')') { + Some(trimmed[1..trimmed.len()-1].to_string()) + } else { + Some(trimmed.to_string()) + } + }, } } else { None @@ -3279,6 +3308,7 @@ mod tests { indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3317,6 +3347,7 @@ PRIMARY KEY (`id`) indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3354,6 +3385,7 @@ ENGINE = MergeTree indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3414,6 +3446,7 @@ ENGINE = MergeTree indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3455,6 +3488,7 @@ ENGINE = MergeTree indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3495,6 +3529,7 @@ ORDER BY (`id`) "#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let result = create_table_query("test_db", table, false); @@ -3542,6 +3577,7 @@ ORDER BY (`id`) "#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3605,6 +3641,7 @@ ORDER BY (`id`) "#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3647,6 +3684,7 @@ ORDER BY (`id`) "#; table_ttl_setting: None, indexes: vec![], cluster_name: None, + primary_key_expression: None, }; let result = create_table_query("test_db", table, false); @@ -3803,6 +3841,7 @@ ORDER BY (`id`) "#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -3819,6 +3858,101 @@ ORDER BY (`id`) "#; assert_eq!(query.trim(), expected.trim()); } + #[test] + fn test_create_table_query_with_primary_key_expression() { + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ + ClickHouseColumn { + name: "user_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, // primary_key flag ignored when primary_key_expression is set + default: None, + comment: None, + ttl: None, + }, + ClickHouseColumn { + name: "event_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }, + ClickHouseColumn { + name: "timestamp".to_string(), + column_type: ClickHouseColumnType::DateTime, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }, + ], + order_by: OrderBy::SingleExpr("(user_id, cityHash64(event_id), timestamp)".to_string()), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: Some("(user_id, cityHash64(event_id))".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + let expected = r#" +CREATE TABLE IF NOT EXISTS `test_db`.`test_table` +( + `user_id` String NOT NULL, + `event_id` String NOT NULL, + `timestamp` DateTime('UTC') NOT NULL +) +ENGINE = MergeTree +PRIMARY KEY (user_id, cityHash64(event_id)) +ORDER BY (user_id, cityHash64(event_id), timestamp)"#; + assert_eq!(query.trim(), expected.trim()); + } + + #[test] + fn test_create_table_query_with_primary_key_expression_no_parens() { + // Test that primary_key_expression works even without outer parentheses + let table = ClickHouseTable { + version: Some(Version::from_string("1".to_string())), + name: "test_table".to_string(), + columns: vec![ClickHouseColumn { + name: "product_id".to_string(), + column_type: ClickHouseColumnType::String, + required: true, + unique: false, + primary_key: false, + default: None, + comment: None, + ttl: None, + }], + order_by: OrderBy::Fields(vec!["product_id".to_string()]), + partition_by: None, + sample_by: None, + engine: ClickhouseEngine::MergeTree, + table_settings: None, + indexes: vec![], + table_ttl_setting: None, + cluster_name: None, + primary_key_expression: Some("product_id".to_string()), + }; + + let query = create_table_query("test_db", table, false).unwrap(); + assert!(query.contains("PRIMARY KEY (product_id)")); + // Should have single parentheses, not double + assert!(!query.contains("PRIMARY KEY ((product_id))")); + } + #[test] fn test_create_table_query_s3queue() { let mut settings = std::collections::HashMap::new(); @@ -3869,6 +4003,7 @@ ORDER BY (`id`) "#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -4342,6 +4477,7 @@ SETTINGS keeper_path = '/clickhouse/s3queue/test_table', mode = 'unordered', s3q indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -4885,6 +5021,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; indexes: vec![], table_ttl_setting: None, cluster_name: Some("test_cluster".to_string()), + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); @@ -4929,6 +5066,7 @@ ENGINE = S3Queue('s3://my-bucket/data/*.csv', NOSIGN, 'CSV')"#; indexes: vec![], table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let query = create_table_query("test_db", table, false).unwrap(); diff --git a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs index fd67a4c273..8e2025ef16 100644 --- a/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs +++ b/apps/framework-cli/src/infrastructure/olap/clickhouse/sql_parser.rs @@ -264,6 +264,64 @@ pub fn extract_sample_by_from_create_table(sql: &str) -> Option { } } +/// Extract PRIMARY KEY expression from a CREATE TABLE statement +/// Returns the raw expression string that follows PRIMARY KEY, trimmed, +/// and stops before ORDER BY, SETTINGS, or end of statement +/// +/// Note: This extracts the PRIMARY KEY clause, which in ClickHouse is used +/// to specify a different primary key than the ORDER BY clause. +pub fn extract_primary_key_from_create_table(sql: &str) -> Option { + let upper = sql.to_uppercase(); + + // Find PRIMARY KEY that is NOT part of "ORDER BY PRIMARY KEY" + // We need to check that it's a standalone PRIMARY KEY clause + let mut primary_key_pos = None; + for (idx, _) in upper.match_indices("PRIMARY KEY") { + // Check if this is part of ORDER BY by looking at preceding text + let preceding_start = idx.saturating_sub(20); + let preceding = &upper[preceding_start..idx].trim(); + + // If preceded by ORDER BY, this is "ORDER BY PRIMARY KEY", not a standalone PRIMARY KEY + if !preceding.ends_with("ORDER BY") { + primary_key_pos = Some(idx); + break; + } + } + + let pos = primary_key_pos?; + + // After the keyword + let after = &sql[pos + "PRIMARY KEY".len()..]; + let after_upper = after.to_uppercase(); + + // Find earliest terminating keyword after PRIMARY KEY + // Clause order: PRIMARY KEY → PARTITION BY → ORDER BY → SAMPLE BY → SETTINGS → TTL + let mut end = after.len(); + if let Some(i) = after_upper.find("PARTITION BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find("ORDER BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find("SAMPLE BY") { + end = end.min(i); + } + if let Some(i) = after_upper.find(" SETTINGS") { + end = end.min(i); + } + // Note: Match " TTL" with leading space to avoid matching substrings + if let Some(i) = after_upper.find(" TTL") { + end = end.min(i); + } + + let expr = after[..end].trim(); + if expr.is_empty() { + None + } else { + Some(expr.to_string()) + } +} + // sql_parser library cannot handle clickhouse indexes last time i tried // `show indexes` does not provide index argument info // so we're stuck with this @@ -1677,6 +1735,103 @@ pub mod tests { ); } + // Tests for extract_primary_key_from_create_table + #[test] + fn test_extract_primary_key_simple() { + let sql = r#"CREATE TABLE t (id UInt64, name String) ENGINE = MergeTree PRIMARY KEY id ORDER BY id"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_tuple() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, ts) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, ts)".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_expression() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, toYYYYMM(ts)) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, toYYYYMM(ts))".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_order_by_primary_key() { + // Test that we DON'T extract "ORDER BY PRIMARY KEY" as a PRIMARY KEY clause + let sql = r#"CREATE TABLE t (id UInt64) ENGINE = MergeTree ORDER BY PRIMARY KEY id"#; + assert_eq!(extract_primary_key_from_create_table(sql), None); + } + + #[test] + fn test_extract_primary_key_with_settings() { + let sql = r#"CREATE TABLE t (id UInt64, name String) ENGINE = MergeTree PRIMARY KEY id ORDER BY id SETTINGS index_granularity = 8192"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_no_primary_key() { + let sql = r#"CREATE TABLE t (id UInt64) ENGINE = MergeTree ORDER BY id"#; + assert_eq!(extract_primary_key_from_create_table(sql), None); + } + + #[test] + fn test_extract_primary_key_nested_objects() { + // NESTED_OBJECTS_SQL has "PRIMARY KEY id" + assert_eq!( + extract_primary_key_from_create_table(NESTED_OBJECTS_SQL), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_sample_by() { + let sql = r#"CREATE TABLE t (id UInt64, hash UInt64) ENGINE = MergeTree PRIMARY KEY id SAMPLE BY hash ORDER BY (id, hash)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_ttl() { + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY id ORDER BY id TTL ts + INTERVAL 30 DAY"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_with_partition_by() { + // Test that PRIMARY KEY stops at PARTITION BY clause + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY id PARTITION BY toYYYYMM(ts) ORDER BY id"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("id".to_string()) + ); + } + + #[test] + fn test_extract_primary_key_tuple_with_partition_by() { + // Test that PRIMARY KEY with tuple stops at PARTITION BY + let sql = r#"CREATE TABLE t (id UInt64, ts DateTime) ENGINE = MergeTree PRIMARY KEY (id, ts) PARTITION BY toYYYYMM(ts) ORDER BY (id, ts)"#; + assert_eq!( + extract_primary_key_from_create_table(sql), + Some("(id, ts)".to_string()) + ); + } + #[test] fn test_extract_indexes_from_create_table_multiple() { let sql = "CREATE TABLE local.table_name (`u64` UInt64, `i32` Int32, `s` String, \ diff --git a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs index 2f7f375397..aeefa8144d 100644 --- a/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs +++ b/apps/framework-cli/src/infrastructure/olap/ddl_ordering.rs @@ -1322,6 +1322,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create some atomic operations @@ -1397,6 +1398,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create table B - depends on table A @@ -1420,6 +1422,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create view C - depends on table B @@ -1515,6 +1518,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -1538,6 +1542,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create view C - depends on table B @@ -1653,6 +1658,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let view = View { @@ -1810,6 +1816,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -1832,6 +1839,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_c = Table { @@ -1854,6 +1862,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Test operations @@ -1945,6 +1954,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -1967,6 +1977,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_c = Table { @@ -1989,6 +2000,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_d = Table { @@ -2011,6 +2023,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_e = Table { @@ -2033,6 +2046,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let op_create_a = AtomicOlapOperation::CreateTable { @@ -2187,6 +2201,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -2210,6 +2225,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for a materialized view @@ -2333,6 +2349,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create table B - target for materialized view @@ -2356,6 +2373,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for a materialized view @@ -2484,6 +2502,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let table_b = Table { @@ -2506,6 +2525,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create SQL resource for materialized view @@ -2714,6 +2734,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create a column @@ -2824,6 +2845,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create operations with signatures that work with the current implementation @@ -2944,6 +2966,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; let after_table = Table { @@ -2989,6 +3012,7 @@ mod tests { database: None, table_ttl_setting: None, cluster_name: None, + primary_key_expression: None, }; // Create column changes (remove old_column, add new_column) diff --git a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx index abd8c4df08..584a71f444 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx +++ b/apps/framework-docs-v2/content/moosestack/olap/model-table.mdx @@ -514,6 +514,131 @@ unsorted = OlapTable[Events]("events_unsorted", OlapConfig( +### Primary Key Expression + +Use a ClickHouse SQL expression to define the primary key explicitly. This is useful when: +- You need functions in the primary key (e.g., `cityHash64(id)`) +- The primary key column ordering should differ from the schema definition +- The primary key should differ from the ORDER BY + +**Important:** When `primaryKeyExpression` is specified, any `Key` annotations on columns are ignored for PRIMARY KEY generation. + + + +```ts filename="PrimaryKeyExpression.ts" copy +import { OlapTable, Key } from "@514labs/moose-lib"; + +// Example 1: Primary key with function +interface UserEvents { + userId: string; + eventId: string; + timestamp: Date; +} + +const eventsTable = new OlapTable("user_events", { + // Use hash function in primary key for better distribution + primaryKeyExpression: "(userId, cityHash64(eventId))", + orderByExpression: "(userId, timestamp)", +}); + +// Example 2: Different ordering in primary key vs columns +interface Product { + category: string; + brand: string; + productId: string; + name: string; +} + +const productsTable = new OlapTable("products", { + // Primary key order optimized for uniqueness + primaryKeyExpression: "productId", + // Order by optimized for common queries + orderByFields: ["category", "brand", "productId"], +}); + +// Example 3: Override Key annotation +interface Record { + id: Key; // This Key annotation will be IGNORED + otherId: string; +} + +const recordTable = new OlapTable("records", { + // This expression overrides the Key annotation + primaryKeyExpression: "(otherId, id)", + orderByExpression: "(otherId, id)", +}); +``` + + +```py filename="PrimaryKeyExpression.py" copy +from moose_lib import OlapTable, OlapConfig, Key +from pydantic import BaseModel +from datetime import datetime + +# Example 1: Primary key with function +class UserEvents(BaseModel): + user_id: str + event_id: str + timestamp: datetime + +events_table = OlapTable[UserEvents]("user_events", OlapConfig( + # Use hash function in primary key for better distribution + primary_key_expression="(user_id, cityHash64(event_id))", + order_by_expression="(user_id, timestamp)", +)) + +# Example 2: Different ordering in primary key vs columns +class Product(BaseModel): + category: str + brand: str + product_id: str + name: str + +products_table = OlapTable[Product]("products", OlapConfig( + # Primary key order optimized for uniqueness + primary_key_expression="(product_id)", + # Order by optimized for common queries + order_by_fields=["category", "brand", "product_id"], +)) + +# Example 3: Override Key[T] annotation +class Record(BaseModel): + id: Key[str] # This Key[T] annotation will be IGNORED + other_id: str + +record_table = OlapTable[Record]("records", OlapConfig( + # This expression overrides the Key[T] annotation + primary_key_expression="(other_id, id)", + order_by_expression="(other_id, id)", +)) +``` + + + +**Rationale for Primary Key Expression:** + +1. **Function Support**: Primary keys can use ClickHouse functions like `cityHash64()` for better data distribution, which cannot be expressed through column-level annotations. + +2. **Flexible Ordering**: The ordering of columns in the primary key can be different from the ordering in the schema definition, allowing optimization for both data uniqueness and query patterns. + +3. **Separation of Concerns**: PRIMARY KEY and ORDER BY serve different purposes in ClickHouse: + - PRIMARY KEY defines uniqueness and deduplication + - ORDER BY defines physical data layout and query optimization + + Sometimes these need different column orderings for optimal performance. + +**Important Constraint:** + +⚠️ **PRIMARY KEY must be a prefix of ORDER BY in ClickHouse.** This means ORDER BY must start with all PRIMARY KEY columns in the same order. + +Valid: +- PRIMARY KEY `(userId)` with ORDER BY `(userId, timestamp)` ✅ +- PRIMARY KEY `(userId, cityHash64(eventId))` with ORDER BY `(userId, cityHash64(eventId), timestamp)` ✅ + +Invalid: +- PRIMARY KEY `(userId, eventId)` with ORDER BY `(userId, timestamp)` ❌ (missing eventId) +- PRIMARY KEY `(userId, eventId)` with ORDER BY `(eventId, userId)` ❌ (wrong order) + ### Using Both Primary Key and Order By Fields diff --git a/apps/framework-docs/llm-docs/python/constraints.md b/apps/framework-docs/llm-docs/python/constraints.md index 6c4191dcec..fddd6a9be5 100644 --- a/apps/framework-docs/llm-docs/python/constraints.md +++ b/apps/framework-docs/llm-docs/python/constraints.md @@ -14,12 +14,6 @@ Configuration constraints in Moose provide a way to enforce rules and limitation ## Table Configuration Constraints -### Key Requirements - -- Schema must have `Key[type]` on a top level field passed into IngestPipeline or OlapTable -- If using `order_by_fields`, the first field must be the primary key (`Key[type]`) when present -- If using `order_by_expression`, ensure your expression starts with the primary key column when a primary key exists (e.g., `(id, created_at, ...)`) - ### ORDER BY Requirements - Fields used in `order_by_fields` must exist on the top level schema @@ -27,6 +21,13 @@ Configuration constraints in Moose provide a way to enforce rules and limitation - When using `order_by_expression`, the expression should reference only top-level, non-optional columns from the schema - To disable sorting entirely, set `order_by_expression="tuple()"` +### PRIMARY KEY Requirements + +- By default, primary key is inferred from `Key[type]` column annotations +- Use `primary_key_expression` to explicitly define primary key with functions or custom ordering +- When `primary_key_expression` is specified, `Key[type]` annotations are ignored for PRIMARY KEY generation +- **CRITICAL**: PRIMARY KEY must be a prefix of ORDER BY (ORDER BY must start with all PRIMARY KEY columns in the same order) + ## Schema Design Constraints - No Optional objects or custom types in `order_by_fields` diff --git a/apps/framework-docs/llm-docs/typescript/constraints.md b/apps/framework-docs/llm-docs/typescript/constraints.md index 1d417ff468..d81993243d 100644 --- a/apps/framework-docs/llm-docs/typescript/constraints.md +++ b/apps/framework-docs/llm-docs/typescript/constraints.md @@ -9,8 +9,14 @@ language: typescript ## Table Configuration Constraints ### Key Requirements -- Schema ust have `Key` on a top level field passed into IngestPipeline` -- `Key` must be first field in `orderByFields` when specified +- Primary keys are optional in ClickHouse; only `ORDER BY` is required +- You can define primary keys in three ways: + 1. Use `Key` on a top-level field (automatically becomes primary key) + 2. Use `primaryKeyExpression` in table config (most flexible, overrides `Key`) + 3. Don't specify a primary key at all (only `ORDER BY` is used) +- If using `Key`, it must be the first field in `orderByFields` when specified +- If using `primaryKeyExpression`, it overrides column-level `Key` definitions +- `primaryKeyExpression` supports functions and custom column ordering ### OrderByFields Requirements - Fields used in `orderByFields` must exist on the top level schema diff --git a/apps/framework-docs/llm-docs/typescript/table-setup.md b/apps/framework-docs/llm-docs/typescript/table-setup.md index 59714c4a45..ba1de2075f 100644 --- a/apps/framework-docs/llm-docs/typescript/table-setup.md +++ b/apps/framework-docs/llm-docs/typescript/table-setup.md @@ -99,13 +99,17 @@ type OlapConfig = ### Key Requirements -When defining your schema, you must either: -1. Use `Key` for one of the top-level fields, or -2. Specify the key field in `orderByFields` +**Primary keys are optional** - ClickHouse only requires `ORDER BY`. You have several options: + +1. **No primary key** - Just specify `orderByFields` (ORDER BY only) +2. **Use `Key` for one of the top-level fields** (automatically becomes primary key) +3. **Use `primaryKeyExpression`** in table config (most flexible, overrides `Key`) Important requirements: +- `orderByFields` is required; fields must not be nullable (no optional fields or union with null) - If you use `Key`, it must be the first field in `orderByFields` when specified -- Fields used in `orderByFields` must not be nullable (no optional fields or union with null) +- If `primaryKeyExpression` is specified, it takes precedence over column-level `Key` definitions +- `primaryKeyExpression` supports function calls and custom column ordering beyond simple field lists ### Basic Configuration Examples @@ -140,6 +144,47 @@ export const Unkeyed = new OlapTable("Unkeyed", { }); ``` +### Primary Key Expression + +The `primaryKeyExpression` field provides maximum flexibility for defining primary keys. When specified, it overrides column-level `Key` definitions. + +**Why use `primaryKeyExpression`?** +- Primary keys can include function calls (e.g., `toStartOfHour(timestamp)`) +- Column ordering in the primary key can differ from schema order +- Supports composite keys with custom expressions + +```typescript +interface EventSchema { + id: string; + name: string; + timestamp: Date; +} + +// ✅ Explicit primary key with multiple columns +export const Events = new OlapTable("Events", { + primaryKeyExpression: "(id, name)", + orderByFields: ["id", "name"], // Must match primaryKeyExpression for MergeTree +}); + +// ✅ Primary key with function (advanced) +export const HourlyEvents = new OlapTable("HourlyEvents", { + primaryKeyExpression: "(toStartOfHour(timestamp), id)", + orderByFields: ["timestamp", "id"], +}); + +// ✅ Single column primary key +export const SimpleEvents = new OlapTable("SimpleEvents", { + primaryKeyExpression: "id", + orderByFields: ["id"], +}); +``` + +**Important Notes:** +- For MergeTree engines, `primaryKeyExpression` columns must match the start of `orderByFields` +- When using `primaryKeyExpression`, you don't need to use `Key` in your schema +- The expression supports any valid ClickHouse SQL expression +- Parentheses are required for multi-column keys: `"(col1, col2)"` not `"col1, col2"` + ## Table Examples ### Basic Table with Key diff --git a/packages/protobuf/infrastructure_map.proto b/packages/protobuf/infrastructure_map.proto index 96887f6167..307ffd6edc 100644 --- a/packages/protobuf/infrastructure_map.proto +++ b/packages/protobuf/infrastructure_map.proto @@ -154,6 +154,10 @@ message Table { // Optional cluster name for ON CLUSTER support in ClickHouse optional string cluster_name = 18; + + // Optional PRIMARY KEY expression + // When specified, overrides primary_key boolean flags on columns + optional string primary_key_expression = 19; } // Structured representation of ORDER BY to support either explicit fields diff --git a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py index c69622f645..7e50b67044 100644 --- a/packages/py-moose-lib/moose_lib/dmv2/olap_table.py +++ b/packages/py-moose-lib/moose_lib/dmv2/olap_table.py @@ -130,6 +130,11 @@ class OlapConfig(BaseModel): partition_by: Optional PARTITION BY expression (single ClickHouse SQL expression). sample_by_expression: Optional SAMPLE BY expression for data sampling (single ClickHouse SQL expression). Used to enable efficient approximate query processing with SAMPLE clause. + primary_key_expression: Optional PRIMARY KEY expression. When specified, this overrides the primary key + inferred from Key[T] column annotations. This allows for complex primary keys using + functions (e.g., "cityHash64(id)") or different column ordering in primary key vs + schema definition. Note: When this is set, any Key[T] annotations on columns are + ignored for PRIMARY KEY generation. engine: The ClickHouse table engine to use. Can be either a ClickHouseEngines enum value (for backward compatibility) or an EngineConfig instance (recommended). version: Optional version string for tracking configuration changes. @@ -146,6 +151,7 @@ class OlapConfig(BaseModel): order_by_expression: Optional[str] = None partition_by: Optional[str] = None sample_by_expression: Optional[str] = None + primary_key_expression: Optional[str] = None engine: Optional[Union[ClickHouseEngines, EngineConfig]] = None version: Optional[str] = None metadata: Optional[dict] = None diff --git a/packages/py-moose-lib/moose_lib/internal.py b/packages/py-moose-lib/moose_lib/internal.py index 46fee76921..fd72156169 100644 --- a/packages/py-moose-lib/moose_lib/internal.py +++ b/packages/py-moose-lib/moose_lib/internal.py @@ -204,6 +204,7 @@ class TableConfig(BaseModel): order_by: List of columns used for the ORDER BY clause. partition_by: The column name used for the PARTITION BY clause. sample_by_expression: Optional SAMPLE BY expression for data sampling. + primary_key_expression: Optional PRIMARY KEY expression (overrides column-level primary_key flags when specified). engine_config: Engine configuration with type-safe, engine-specific parameters. version: Optional version string of the table configuration. metadata: Optional metadata for the table. @@ -218,6 +219,7 @@ class TableConfig(BaseModel): order_by: List[str] | str partition_by: Optional[str] sample_by_expression: Optional[str] = None + primary_key_expression: Optional[str] = None engine_config: Optional[EngineConfigDict] = Field(None, discriminator='engine') version: Optional[str] = None metadata: Optional[dict] = None @@ -710,6 +712,7 @@ def to_infra_map() -> dict: order_by=order_by_value, partition_by=table.config.partition_by, sample_by_expression=table.config.sample_by_expression, + primary_key_expression=table.config.primary_key_expression, engine_config=engine_config, version=table.config.version, metadata=getattr(table, "metadata", None), diff --git a/packages/ts-moose-lib/src/dmv2/internal.ts b/packages/ts-moose-lib/src/dmv2/internal.ts index 7f003e79dd..ed70601698 100644 --- a/packages/ts-moose-lib/src/dmv2/internal.ts +++ b/packages/ts-moose-lib/src/dmv2/internal.ts @@ -198,6 +198,8 @@ interface TableJson { partitionBy?: string; /** SAMPLE BY expression for approximate query processing. */ sampleByExpression?: string; + /** PRIMARY KEY expression (overrides column-level primary_key flags when specified). */ + primaryKeyExpression?: string; /** Engine configuration with type-safe, engine-specific parameters */ engineConfig?: EngineConfig; /** Optional version string for the table configuration. */ @@ -746,6 +748,10 @@ export const toInfraMap = (registry: typeof moose_internal) => { "sampleByExpression" in table.config ? table.config.sampleByExpression : undefined, + primaryKeyExpression: + "primaryKeyExpression" in table.config ? + table.config.primaryKeyExpression + : undefined, engineConfig, version: table.config.version, metadata, diff --git a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts index 8e2b05da25..25d564c304 100644 --- a/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts +++ b/packages/ts-moose-lib/src/dmv2/sdk/olapTable.ts @@ -209,6 +209,20 @@ export type BaseOlapConfig = ( * - If using hash functions, the same expression must appear in orderByExpression */ sampleByExpression?: string; + /** + * Optional PRIMARY KEY expression. + * When specified, this overrides the primary key inferred from Key column annotations. + * + * This allows for: + * - Complex primary keys using functions (e.g., "cityHash64(id)") + * - Different column ordering in primary key vs schema definition + * - Primary keys that differ from ORDER BY + * + * Example: primaryKeyExpression: "(userId, cityHash64(eventId))" + * + * Note: When this is set, any Key annotations on columns are ignored for PRIMARY KEY generation. + */ + primaryKeyExpression?: string; version?: string; lifeCycle?: LifeCycle; settings?: { [key: string]: string }; diff --git a/templates/python-tests/src/ingest/models.py b/templates/python-tests/src/ingest/models.py index 70124db444..5f32d7e1cd 100644 --- a/templates/python-tests/src/ingest/models.py +++ b/templates/python-tests/src/ingest/models.py @@ -684,3 +684,46 @@ class DateTimePrecisionTestData(BaseModel): "DateTimePrecisionOutput", StreamConfig(destination=datetime_precision_output_table) ) + +# =======Primary Key Expression Tests========= + +# Test: Primary Key Expression with hash function +class PrimaryKeyExpressionTest(BaseModel): + user_id: str + event_id: str + timestamp: datetime + category: str + + +# Table using primary_key_expression with hash function for better distribution +# Note: PRIMARY KEY must be a prefix of ORDER BY in ClickHouse +primary_key_expression_table = OlapTable[PrimaryKeyExpressionTest]( + "PrimaryKeyExpressionTest", + OlapConfig( + # Primary key uses hash function for better distribution + primary_key_expression="(user_id, cityHash64(event_id))", + # Order by must start with the same columns as primary key + order_by_expression="(user_id, cityHash64(event_id), timestamp)", + ) +) + + +# Test: Primary Key Expression with different column ordering +class PrimaryKeyOrderingTest(BaseModel): + product_id: str + category: str + brand: str + timestamp: datetime + + +# Table where primary key order differs from schema order +# Note: ORDER BY must start with PRIMARY KEY columns +primary_key_ordering_table = OlapTable[PrimaryKeyOrderingTest]( + "PrimaryKeyOrderingTest", + OlapConfig( + # Primary key optimized for uniqueness + primary_key_expression="(product_id)", + # Order by starts with primary key, then adds other columns for query optimization + order_by_fields=["product_id", "category", "brand"], + ) +) diff --git a/templates/typescript-tests/src/ingest/models.ts b/templates/typescript-tests/src/ingest/models.ts index 7dc1e25332..cba0cf040e 100644 --- a/templates/typescript-tests/src/ingest/models.ts +++ b/templates/typescript-tests/src/ingest/models.ts @@ -684,3 +684,47 @@ export const dateTimePrecisionOutputStream = new Stream("DateTimePrecisionOutput", { destination: DateTimePrecisionOutputTable, }); + +/** =======Primary Key Expression Tests========= */ + +/** Test: Primary Key Expression with hash function */ +export interface PrimaryKeyExpressionTest { + userId: string; + eventId: string; + timestamp: DateTime; + category: string; +} + +/** + * Table using primary_key_expression with hash function for better distribution + * Note: PRIMARY KEY must be a prefix of ORDER BY in ClickHouse + */ +export const primaryKeyExpressionTable = + new OlapTable("PrimaryKeyExpressionTest", { + // Primary key uses hash function for better distribution + primaryKeyExpression: "(userId, cityHash64(eventId))", + // Order by must start with the same columns as primary key + orderByExpression: "(userId, cityHash64(eventId), timestamp)", + }); + +/** Test: Primary Key Expression with different column ordering */ +export interface PrimaryKeyOrderingTest { + productId: string; + category: string; + brand: string; + timestamp: DateTime; +} + +/** + * Table where primary key order differs from schema order + * Note: ORDER BY must start with PRIMARY KEY columns + */ +export const primaryKeyOrderingTable = new OlapTable( + "PrimaryKeyOrderingTest", + { + // Primary key optimized for uniqueness + primaryKeyExpression: "productId", + // Order by starts with primary key, then adds other columns for query optimization + orderByFields: ["productId", "category", "brand"], + }, +); From 8019462f05c0d2bee7201bd21bd5b8d82e8e79ba Mon Sep 17 00:00:00 2001 From: Olivia Kane Date: Thu, 20 Nov 2025 21:19:49 -0500 Subject: [PATCH 55/59] docs: migrate foundations documentation refactor Refactored migration documentation including: - Reorganized migration lifecycle documentation - Added new pages for auto-inferred migrations and lifecycle modes - Updated planned migrations and CLI documentation - Added plan format reference - Fixed broken links throughout migration docs - Updated navigation structure add copy disable to plan reference draft --- .../debezium-dev-to-prod-outline.mdx | 325 ++++++++++++++ .../migrate/apply-planned-migrations-cli.mdx | 70 ++++ .../apply-planned-migrations-service.mdx | 61 +++ .../moosestack/migrate/auto-inferred.mdx | 108 +++++ .../failed-migrations.mdx} | 23 +- .../content/moosestack/migrate/index.mdx | 395 +++--------------- .../migrate/lifecycle-deletion-protected.mdx | 70 ++++ .../migrate/lifecycle-externally-managed.mdx | 91 ++++ .../migrate/lifecycle-fully-managed.mdx | 70 ++++ .../content/moosestack/migrate/lifecycle.mdx | 232 +++------- .../content/moosestack/migrate/modes.mdx | 63 +++ .../moosestack/migrate/plan-format.mdx | 240 +++++++++++ .../moosestack/migrate/planned-migrations.mdx | 122 ++++++ .../src/config/navigation.ts | 53 ++- 14 files changed, 1396 insertions(+), 527 deletions(-) create mode 100644 apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx rename apps/framework-docs-v2/content/moosestack/{olap/schema-change.mdx => migrate/failed-migrations.mdx} (91%) create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/modes.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx diff --git a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx new file mode 100644 index 0000000000..f970dd0f30 --- /dev/null +++ b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx @@ -0,0 +1,325 @@ +--- +title: Stream Data from Postgres with Debezium +description: Learn how to adapt the Debezium CDC template to stream data from your PostgreSQL database to ClickHouse. +--- + +# Stream Data from Postgres with Debezium + +This guide shows you how to use the **Debezium CDC Template** with your own application. You will learn how to connect the pipeline to your PostgreSQL database and send your tables to ClickHouse for real-time analytics. + +## Architecture Overview + +At a high level, the pipeline works like this: +```txt +PostgreSQL -> Kafka -> ClickHouse +``` + +* **Debezium** acts as the bridge between PostgreSQL and Kafka. It watches for changes in your database and publishes them to Kafka topics. +* **MooseStack** acts as the bridge between Kafka and ClickHouse. It serves as your "pipeline-as-code" layer where you define your ClickHouse tables, your Kafka streams, and the transformation logic that connects them. + +This template uses two Kafka topics for each table: one for the raw data and one for the clean, processed data. The data flow is as follows: + +1. Change happens in PostgreSQL +2. Debezium publishes the change to Kafka (auto-creating a topic for each table) +3. Raw events are consumed from each Debezium-managed topic and transformed into a format that can be stored in ClickHouse +4. The transformed data is published to a second Moose Stream (the sink stream) +5. Data from the sink stream is synced into your ClickHouse table +6. Rows are deduplicated and versioned in the background in ClickHouse + +## Project Structure + +Here are the key files in the template you should know about: + +``` +cdc-pipeline/ +├── 1-sources/ # Defines Kafka topics from Debezium +├── 2-transforms/ # Sanitizes CDC events & maps to destination +├── 3-destinations/ # Defines ClickHouse tables & streams +docker-compose.dev.override.yaml # Infrastructure (Kafka Connect, Redpanda) +setup-cdc.ts # Script that registers the connector +moose.config.toml # Project config (enables streaming) +``` + +## Step 0: Clone the Template + +Make sure you clone the [Debezium CDC Template](https://github.com/514labs/debezium-cdc) and install the dependencies: + +```bash +git clone https://github.com/514labs/debezium-cdc.git +cd debezium-cdc +pnpm install +``` + +## Step 1: Configure Your Environment + +The template uses environment variables for database passwords and connector settings. + +1. Copy the `.env.example` file: + + ```bash + cp .env.example .env.dev + ``` + +2. Open `.env.dev` and customize the values for your environment. + + **Database Connection:** + Set these to point to your source PostgreSQL database. + ```properties + DB_HOST=your_postgres_host + DB_PORT=your_postgres_port + DB_NAME=your_postgres_db + DB_USER=your_postgres_user + DB_PASSWORD=your_postgres_password + ``` + + **CDC Configuration:** + Choose which tables you want to capture. + ```properties + # List of tables to capture (schema.table), separated by commas + CDC_TABLE_INCLUDE_LIST=public.* + + # Prefix for the Kafka topics (default: pg-cdc) + CDC_TOPIC_PREFIX=pg-cdc + ``` + +## Step 2: Prepare Your Database + +Debezium needs PostgreSQL's logical replication to work. + +1. **Check `wal_level`**: + Run this SQL command on your source database: + ```sql + SHOW wal_level; + ``` + It must be `logical`. If not, update your `postgresql.conf` and restart Postgres. + +2. **Create a Replication User**: + It is best to use a separate user for this. Run these commands: + ```sql + CREATE USER cdc_user WITH PASSWORD 'secure_password'; + ALTER USER cdc_user WITH REPLICATION; + GRANT USAGE ON SCHEMA public TO cdc_user; + GRANT SELECT ON ALL TABLES IN SCHEMA public TO cdc_user; + ``` + (Update your `.env.dev` file with this user's details). + +## Step 3: Start the Pipeline + +Start the development environment. The Moose CLI will start the infrastructure and run a script to register the Debezium connector. + +```bash +moose dev +``` + +Check the logs for these messages: +- Infrastructure starting (Redpanda, Kafka Connect, ClickHouse). +- `setup-cdc.ts` running. +- `✅ Connector registered!` + +Note: Moose does not manage Debezium or Kafka Connect by default. However, this template uses `docker-compose.dev.override.yaml` to add them. The example file starts Kafka Connect and includes a test database. If you want to use your own database, comment out the test database in that file and update `.env.dev`. See [Local Development](/moose/local-dev) for more details. + +## Step 4: Customize the Pipelines for Your Application + +The template comes set up for the provided test database. Follow these steps to change it for your own tables. + +> **Note:** These examples use the `customer_addresses` table from the template. Replace `CustomerAddress` with the names of your own tables (like `Users` or `Orders`). + +### 1. Import the Topics +When the connector runs and a change happens, Debezium automatically creates a topic in Redpanda if it hasn't seen an event for that table before. Since Debezium manages these topics, you need to import their definitions into your project: + +```bash +# Pulls topic definitions into cdc-pipeline/1-sources/externalTopics.ts +moose-cli kafka pull localhost:19092 --path cdc-pipeline/1-sources +``` + +### 2. Define Source Schemas +Moose imports the raw data streams as generic objects without types. You need to define what your data looks like so you when you transform the raw events you have complete type safety. + +#### Option A: Using your ORM Models (Recommended) +If you already use an ORM like Drizzle, you can reuse your existing models. + +The template uses Drizzle, and the models are in `postgres/src/schema.ts`. You can export the inferred type in `cdc-pipeline/oltp/schema.ts`: + +```typescript +import { customerAddresses } from "../../postgres/src/schema"; + +// Automatically infers: { id: number, first_name: string, ... } +export type CustomerAddress = typeof customerAddresses.$inferSelect; +``` + +Then, in your pipeline code, import the type and apply it to your stream: +```typescript +import { CustomerAddress } from "../../oltp/schema"; + +export const cdcCustomerAddresses = PgCdcPublicCustomerAddressesStream as Stream< + GenericCDCEvent +>; +``` + +#### Option B: Using Generation Tools +If you don't use an ORM, tools like [kanel](https://github.com/kristiandupont/kanel) or `pg-to-ts` can generate TypeScript interfaces from your database for you. + +```bash +# Example with kanel +npx kanel --connectionString $DATABASE_URL --output ./cdc-pipeline/generated-models +``` + +### 3. Model the Incoming Data (Create a Typed Topic) +This step models the raw data coming from Debezium. These events are complex objects that contain metadata, the "before" state of the row, and the "after" state. + +`GenericCDCEvent` (in `cdc-pipeline/models.ts`) matches this structure. By wrapping the raw topic with this type, your code knows exactly what the data looks like. + +```typescript +export type GenericCDCEvent = { + before: T | null; // The row before the change (null for inserts) + after: T | null; // The row after the change (null for deletes) + source: { // Debezium metadata + lsn: number; // Log Sequence Number (for ordering) + ts_ms: number; // Timestamp of the change + table: string; + }; + op: "c" | "u" | "d" | "r"; // Create, Update, Delete, Read + ts_ms: number; +}; +``` + +Update `cdc-pipeline/1-sources/typed-topics.ts` to export the typed stream. + +**Example:** + +```typescript +import { Stream } from "@514labs/moose-lib"; +import { PgCdcPublicCustomerAddressesStream } from "./externalTopics"; // Generated by kafka pull +import { GenericCDCEvent } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +export const cdcCustomerAddresses = PgCdcPublicCustomerAddressesStream as Stream< + GenericCDCEvent +>; +``` + +
    +✨ **Suggested Copilot Prompt** + +You can use this prompt to tell your AI assistant to generate the typed topics for all your tables at once. Open `cdc-pipeline/1-sources/typed-topics.ts` and ask: + +> "Import all the raw stream classes from `./externalTopics.ts` and all the OLTP types from `../../oltp/schema.ts`. For each table, export a new const named `cdc` that casts the raw stream to `Stream>`. Follow the pattern of the existing exports." + +
    + +### 4. Model the Destination Data (Flatten the Payload) +This step models the clean data that goes into ClickHouse. + +While the incoming data is nested (Step 3), the destination table should look just like your Postgres table. You need to "flatten" the structure so that `after.id` becomes just `id` in ClickHouse. + +You also need to add a few fields (`_is_deleted`, `lsn`, `ts_ms`) to handle updates and deletes correctly. + +Update `cdc-pipeline/3-destinations/olap-tables.ts`: + +```typescript +import { OlapTable, ClickHouseEngines, UInt64, UInt8 } from "@514labs/moose-lib"; +import { CustomerAddress } from "../../oltp/schema"; + +// 1. Define the OLAP Schema +// Take the fields from Postgres and add metadata +export type CdcFields = { + _is_deleted: UInt8; + ts_ms: UInt64; + lsn: UInt64; +}; + +export type OlapCustomerAddress = CustomerAddress & CdcFields; + +// 2. Define the ClickHouse Table +export const olapCustomerAddresses = new OlapTable( + "customer_addresses", + { + engine: ClickHouseEngines.ReplacingMergeTree, + ver: "lsn", + isDeleted: "_is_deleted", + orderByFields: ["id"], + } +); +``` + +You also need a sink stream. This acts as a buffer between your transformation and the final table. + +Update `cdc-pipeline/3-destinations/sink-topics.ts`: + +```typescript +import { Stream } from "@514labs/moose-lib"; +import { OlapCustomerAddress } from "../models"; +import { olapCustomerAddresses } from "./olap-tables"; + +// 3. Define the Destination Stream (The "Processed" Topic) +export const processedCustomerAddresses = new Stream( + "ProcessedCustomerAddresses", + { destination: olapCustomerAddresses } +); +``` + +### 5. Create the Transform +Write the function that maps the Source Stream to the Sink Stream. It cleans the data and converts types where needed. + +Create `cdc-pipeline/2-transforms/customer-addresses.ts`: + +```typescript +import { cdcCustomerAddresses } from "../1-sources/typed-topics"; +import { processedCustomerAddresses } from "../3-destinations/sink-topics"; +import { handleCDCPayload } from "./payload-handler"; // Helper from the template +import { GenericCDCEvent, OlapCustomerAddress } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +// Connect Source Stream -> Destination Stream +cdcCustomerAddresses.addTransform( + processedCustomerAddresses, + (message: GenericCDCEvent) => { + // Use the helper function to clean the payload + const result = handleCDCPayload(message); + + // Return the clean data + return result as unknown as OlapCustomerAddress; + } +); +``` + +The `handleCDCPayload` function is a helper included in the template. It handles the logic for cleaning the data and managing deletes. You pass it the type of your source row, and it handles the rest. + +## Verification + +The pipeline is running! Any change in your Postgres `customer_addresses` table will instantly appear in ClickHouse. + +Check it by querying ClickHouse with the Moose CLI: + +```bash +moose query "SELECT * FROM customer_addresses" +``` + +## Advanced: Optimizing for ClickHouse + +The setup above uses your Postgres types directly. To make your analytics faster and cheaper, you should optimize your ClickHouse schema. + +This involves using special column types like: +* **LowCardinality**: For columns with a finite number (10,000 or less) of unique values (e.g. countries, states, etc.). +* **UInt64**: For IDs and timestamps. +* **ClickHouseDefault**: To handle empty (null) values efficiently. + +Here is a preview of what an optimized schema looks like: + +```typescript +export type OlapCustomerAddress = Omit< + CustomerAddress, + "id" | "country" | "state" | "work_address" +> & + CdcFields & { + // Optimized types + id: UInt64; + country: string & LowCardinality; + state: string & LowCardinality; + work_address: string & ClickHouseDefault<"''">; + }; +``` + +For a full guide on how to optimize your tables, see [Optimizing ClickHouse Schemas](/guides/clickhouse-optimization). + +## Next Steps: Transitioning to Production diff --git a/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx new file mode 100644 index 0000000000..05bc3fb0d9 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-cli.mdx @@ -0,0 +1,70 @@ +--- +title: Serverless (moose migrate) +description: Reference documentation for the manual migration CLI command used in serverless deployments. +order: 7 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Serverless (moose migrate) + +The **Serverless** deployment model relies on the `moose migrate` CLI command to execute planned schema changes. This gives you explicit control over when migrations run, which is essential for architectures where Moose is integrated as a library rather than a standalone server. + +## Overview + +In serverless or library-based deployments, Moose does not control the application runtime. Therefore, migrations must be triggered externally, typically as a step in a CI/CD pipeline or a manual administrative action. + +| Feature | Description | +| :--- | :--- | +| **Manual Control** | Migrations run only when you explicitly execute the command. | +| **CI/CD Integration** | Designed to run as a discrete step in deployment pipelines (e.g., GitHub Actions). | +| **Drift Protection** | Validates `remote_state.json` against the target database before execution. | +| **Direct Connection** | Connects directly to ClickHouse using a connection string. | + +## Command Reference + +```bash +moose migrate --clickhouse-url +``` + +### Options + +| Option | Description | Required | +| :--- | :--- | :--- | +| `--clickhouse-url` | The full connection string to the target ClickHouse database (e.g., `clickhouse://user:pass@host:9440/db`). | Yes | + +## Execution Lifecycle + +When `moose migrate` is executed: + +1. **Load Plan:** Reads `migrations/plan.yaml` from the current directory. +2. **Check Database Drift:** Connects to the provided ClickHouse URL and compares the current schema against `remote_state.json`. +3. **Abort on Drift:** If the database state does not match the snapshot, the process exits with an error code. +4. **Execute Migration:** Applies the operations defined in `plan.yaml` sequentially. +5. **Report Success:** Exits with code 0 if all operations succeed. + +## Failure Modes + +| Condition | Outcome | Resolution | +| :--- | :--- | :--- | +| **Drift Detected** | Command fails (exit code 1). | Regenerate the plan against the current production DB and retry. | +| **Connection Error** | Command fails (exit code 1). | Check network connectivity and credentials in the connection string. | +| **SQL Error** | Command fails (exit code 1). | Fix the problematic operation in `plan.yaml` or the database state and retry. | + +## CI/CD Example + +This command is typically used in a deployment pipeline before updating the application code. + +```yaml +# Example GitHub Actions step +- name: Apply Migrations + run: moose migrate --clickhouse-url "$CLICKHOUSE_URL" + env: + CLICKHOUSE_URL: ${{ secrets.CLICKHOUSE_URL }} +``` + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - Generating the plan files. +- [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) - The automatic alternative for full server deployments. diff --git a/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx new file mode 100644 index 0000000000..37a6c4a7d5 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/apply-planned-migrations-service.mdx @@ -0,0 +1,61 @@ +--- +title: Server Runtime +description: Reference documentation for automatic migration execution in the Moose server runtime. +order: 8 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Server Runtime + +The **Server Runtime** deployment model (invoked via `moose prod`) includes an automatic migration runner that executes planned changes during the application boot sequence. + +## Overview + +When running as a full server, Moose orchestrates the entire lifecycle of your data stack. Migrations are treated as a prerequisite for starting the application: the server will not accept traffic or process data until the database schema matches the code definition. + +| Feature | Description | +| :--- | :--- | +| **Automatic Execution** | Migrations run automatically when the `moose prod` command starts. | +| **Drift Protection** | The server validates the database state against `remote_state.json` before applying changes. | +| **Code Validation** | The server ensures the deployed code matches `local_infra_map.json` to prevent mismatches. | +| **Zero-Touch** | No separate CLI commands or CI/CD steps are required to apply migrations. | + +## Command Reference + +The migration logic is embedded within the production server start command. + +```bash +moose prod +``` + +**Environment Variables:** +The server requires access to the ClickHouse database, typically configured via `moose.config.toml` or environment variables overridden at runtime. + +## Execution Lifecycle + +When `moose prod` starts, it performs the following sequence: + +1. **Load Plan:** Reads `migrations/plan.yaml` from the deployment artifact. +2. **Check Code Consistency:** Verifies that the running application code matches `local_infra_map.json`. If not, it aborts to prevent deploying code that doesn't match the plan. +3. **Check Database Drift:** Connects to ClickHouse and compares the current schema against `remote_state.json`. If drift is detected, it aborts. +4. **Execute Migration:** Applies the operations defined in `plan.yaml`. +5. **Start Services:** Once migrations succeed, the Ingestion API, Consumption API, and Streaming workers are started. + +## Failure Modes + +| Condition | Outcome | Resolution | +| :--- | :--- | :--- | +| **Drift Detected** | Server fails to start. | Regenerate the plan against the current production DB and redeploy. | +| **Plan Mismatch** | Server fails to start. | Ensure the `migrations/` directory matches the code in your deployment artifact. | +| **Migration Error** | Server fails to start. | Fix the schema issue or plan file, then redeploy. | + + +If you are using Boreal Hosting, this process is handled automatically. The platform ensures that your application only becomes healthy once `moose prod` has successfully completed the migration phase. + + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - Generating the plan files. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - The manual alternative for serverless deployments. diff --git a/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx b/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx new file mode 100644 index 0000000000..a97ae10c4c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/auto-inferred.mdx @@ -0,0 +1,108 @@ +--- +title: Auto-Inferred Migrations +description: Reference documentation for Moose's auto-inferred migration system used during local development. +order: 3 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Auto-Inferred Migrations + +**Auto-inferred migrations** are a schema evolution mechanism in Moose that automatically detects changes in your data models and applies them to the underlying database in real-time. This system is designed primarily for local development. + +## Command + +Auto-inferred migrations are enabled implicitly when running the development server: + +```bash +moose dev +``` + +## Production Usage + +While technically possible to use auto-inferred migrations in production environments, it is **strongly discouraged**. + + + Auto-inferred migrations will immediately drop columns containing data if a field is renamed or removed in the code. In production, this leads to irreversible data loss. + + +Production deployments should always use [Planned Migrations](/moosestack/migrate/planned-migrations) to ensure schema changes are reviewed, tested, and safe. + +## Behavior + +When active, the auto-inference engine performs the following cycle: + +1. **Monitor:** Watches the file system for changes in exported table definitions in your data model files. +2. **Diff:** Compares the code-defined schema against the actual schema of the running ClickHouse instance. +3. **Generate:** Creates the necessary SQL DDL statements to reconcile the difference. +4. **Apply:** Executes the SQL statements immediately against the database. + +## Operation Reference + +The following table describes how code changes are translated into database operations by the auto-inference engine. + +| Code Change | Database Operation | SQL Equivalent (Approximate) | Data Impact | +| :--- | :--- | :--- | :--- | +| **New Table** | Create Table | `CREATE TABLE ...` | Safe | +| **Add Field** | Add Column | `ALTER TABLE ... ADD COLUMN ...` | Safe | +| **Remove Field** | Drop Column | `ALTER TABLE ... DROP COLUMN ...` | **Destructive** (Data Loss) | +| **Change Field Type** | Modify Column | `ALTER TABLE ... MODIFY COLUMN ...` | Potentially Destructive (Cast dependent) | +| **Rename Field** | Drop + Add | `DROP COLUMN old`; `ADD COLUMN new` | **Destructive** (Data Loss - see limitations) | +| **Remove Table** | Drop Table | `DROP TABLE ...` | **Destructive** | + +## Limitations and Safety + +### Renaming Fields + +The auto-inference engine is stateless regarding user intent. It cannot distinguish between **renaming** a field and **deleting one field to add another**. + +If you rename `user_id` to `uid`: +1. Moose sees `user_id` is missing from the code -> Generates `DROP COLUMN user_id`. +2. Moose sees `uid` is new in the code -> Generates `ADD COLUMN uid`. + +**Result:** The column is dropped and re-added empty. Data in the original column is lost immediately. + + + To rename columns without data loss, you must use [Planned Migrations](/moosestack/migrate/planned-migrations) and manually adjust the migration plan to use a `rename` operation instead of `drop` + `add`. + + +### Destructive Operations + +Auto-inferred migrations do not prompt for confirmation before dropping tables or columns. If you comment out a table export or remove a field definition, the corresponding data structure in the database is removed immediately. + +## Configuration + +Auto-inferred migrations rely on the `olap` feature flag in your project configuration. + +```toml filename="moose.config.toml" +[features] +olap = true # enabled by default +``` + +## CLI Output Reference + +The CLI communicates migration actions via standard output prefixes in the terminal. + +| Symbol | Meaning | Description | +| :--- | :--- | :--- | +| `+` | Add | Creating a new table or adding a column. | +| `-` | Remove | Dropping a table or removing a column. | +| `~` | Modify | Changing a column's data type or properties. | + +### Example Output + +```text +⢹ Processing Infrastructure changes from file watcher + ~ Table page_views: + Column changes: + + user_agent: String + - referrer: String + ~ timestamp: DateTime -> DateTime64(3) +``` + +## See Also + +- [Planned Migrations](/moosestack/migrate/planned-migrations) - The reference for production-grade migration workflows. +- [Schema Change Reference](/moosestack/migrate/reference) - Detailed breakdown of migration plan objects. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - Commands for managing migrations manually. diff --git a/apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx similarity index 91% rename from apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx rename to apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx index 1e7e359634..9645df8304 100644 --- a/apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx @@ -1,7 +1,11 @@ --- -title: Handling Failed Migrations +title: Failed Migrations description: Recover from failed migrations and safely achieve desired type changes +<<<<<<<< HEAD:apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx order: 13 +======== +order: 4 +>>>>>>>> 49c5725d4 (docs: migrate foundations documentation refactor):apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx category: olap --- @@ -46,25 +50,20 @@ Copy the mutation ID from the terminal logs and run the following command to kil ### Kill the mutation - If you have the `mutation_id`: -```sql filename="ClickHouse" copy -KILL MUTATION WHERE mutation_id = ''; +```bash filename="Terminal" copy +moose query "KILL MUTATION WHERE mutation_id = '';" ``` - If you didn't capture the ID, find it and kill by table: -```sql filename="ClickHouse" copy -SELECT mutation_id, command, is_done, latest_fail_reason -FROM system.mutations -WHERE database = currentDatabase() AND table = '' -ORDER BY create_time DESC; - -KILL MUTATION WHERE database = currentDatabase() AND table = ''; +```bash filename="Terminal" copy +moose query "SELECT mutation_id, command, is_done, latest_fail_reason FROM system.mutations WHERE database = currentDatabase() AND table = '' ORDER BY create_time DESC;" ``` ClickHouse ALTERs are implemented as asynchronous mutations, not transactional. If a mutation fails mid-way, some parts may have been rewritten while others were not, leaving the table partially transformed. The failed mutation also remains queued until you kill it. Clear the mutation first, then proceed. - +{/* Soon, Moose will automatically generate a local DDL plan that kills the mutation and "rolls back" the transformation to the data that was changed before the failure occurred. - + */} ### Revert your code to match the current DB schema diff --git a/apps/framework-docs-v2/content/moosestack/migrate/index.mdx b/apps/framework-docs-v2/content/moosestack/migrate/index.mdx index bb2fbc6034..f8e7190ddc 100644 --- a/apps/framework-docs-v2/content/moosestack/migrate/index.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/index.mdx @@ -1,376 +1,117 @@ --- -title: Migrations & Planning -description: How Moose handles infrastructure migrations and planning +title: Migrations +description: Understanding how Moose manages database schema changes through code order: 0 category: migrate --- -import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; +import { Callout } from "@/components/mdx"; -# Moose Migrate +# Migrations -Moose's migration system works like version control for your infrastructure. It automatically detects changes in your code and applies them to your data infrastructure with confidence. +Migrations synchronize your code-defined database schema with your production infrastructure. As your application evolves, you'll add/remove fields, change data types, and restructure tables. Moose Migrate handles these schema changes safely and reliably. - -Moose tracks changes across: -- OLAP Tables and Materialized Views -- Streaming Topics -- API Endpoints -- Workflows - +## How Migrations Work +Moose Migrate operates by comparing two states: -## How It Works +1. **Your code** - Tables and streams defined in your application +2. **Your database** - The actual schema in ClickHouse, Kafka, or Redpanda -Moose collects all objects defined in your main file (`index.ts` for TypeScript or `main.py` for Python) and automatically generates infrastructure operations to match your code: +When these states differ, Moose Migrate generates operations to bring them into alignment. These operations might include: - - +- Adding or dropping tables +- Adding, removing, or renaming columns +- Changing data types +- Creating or modifying streaming topics -```ts file="app/index.ts" -interface UserSchema { - id: string; - name: string; - email: string; -} +The full list of operations is available in the [Migration Plan Format](/moosestack/migrate/plan-format) documentation. -export const usersTable = new OlapTable("Users"); -export const userEvents = new Stream("Users"); -``` +## Core Concepts - - +Migrations in Moose revolve around three key decisions: -```python file="app/main.py" -from pydantic import BaseModel -from moose_lib import OlapTable, Stream +| Concept | What it is | Where to go | +| :--- | :--- | :--- | +| [Lifecycle Management](#lifecycle-management) | Controlling *what* changes are allowed (e.g., preventing data deletion). | [Overview](/moosestack/migrate/lifecycle) • [Fully Managed](/moosestack/migrate/lifecycle-fully-managed) • [Deletion Protected](/moosestack/migrate/lifecycle-deletion-protected) • [Externally Managed](/moosestack/migrate/lifecycle-externally-managed) | +| [Generating Migrations](#generating-migrations) | Deciding *how* changes are generated. | [Overview](/moosestack/migrate/modes) • [Auto-Inferred](/moosestack/migrate/auto-inferred) • [Planned](/moosestack/migrate/planned-migrations) | +| [Applying Changes](#applying-changes-to-production) | The workflow for executing migrations: serverless (`moose migrate`) vs server runtime (`moose prod`). | [Serverless](/moosestack/migrate/apply-planned-migrations-cli) • [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) | -class UserSchema(BaseModel): - id: str - name: str - email: str -users_table = OlapTable[UserSchema]("Users") -user_events = Stream[UserSchema]("Users") -``` +### Lifecycle Management - - - -When you add these objects, Moose automatically creates: -- A ClickHouse table named `Users` with the `UserSchema` -- A Redpanda topic named `Users` with the `UserSchema` - -## Development Workflow - -When running your code in development mode, Moose will automatically hot-reload migrations to your local infrastructure as you save code changes. - -### Quick Start - -Start your development environment: - -```bash filename="Terminal" copy -moose dev -``` - -This automatically: -1. Recursively watches your `/app` directory for code changes -2. Parses objects defined in your main file -3. Compares the new objects with the current infrastructure state Moose stores internally -4. Generates and applies migrations in real-time based on the differences -5. Provides immediate feedback on any errors or warnings -6. Updates the internal state of your infrastructure to reflect the new state - -### Example: Adding a New Table +For each table and stream resource defined in your code, you can control *what* changes are allowed (e.g., preventing data deletion, ignoring schema changes, etc.) with the `LifeCycle` configuration property: +```ts +import { OlapTable, LifeCycle } from "@514labs/moose-lib"; -```ts file="app/index.ts" {6} copy -// Before -export const usersTable = new OlapTable("Users"); - -// After (add analytics table) -export const usersTable = new OlapTable("Users"); -export const analyticsTable = new OlapTable("Analytics"); -``` - - - - -```python file="app/main.py" {6} copy -# Before -users_table = OlapTable[UserSchema]("Users") - -# After (add analytics table) -users_table = OlapTable[UserSchema]("Users") -analytics_table = OlapTable[AnalyticsSchema]("Analytics") -``` - - - - -**What happens:** -- Moose detects the new `analyticsTable` object -- Compares: "No Analytics table exists" -- Generates migration: "Create Analytics table" -- Applies migration automatically -- Updates internal state - -In your terminal, you will see a log that shows the new table being created: -```bash -⠋ Processing Infrastructure changes from file watcher - + Table: Analytics Version None - id: String, number: Int64, status: String - - deduplicate: false -``` -### Example: Schema Changes - - - - -```ts file="app/index.ts" {8} copy -import { Key } from "@514labs/moose-lib"; - -// After (add age field) -interface UserSchema { - id: Key; +interface Schema { + id: string; name: string; - email: string; - age: number; // New field + age: number; } + +const table = new OlapTable("table_name", { + lifeCycle: LifeCycle.FULLY_MANAGED + orderByFields: ["id"] +}); ``` +```py +from moose_lib import OlapTable, LifeCycle, OlapConfig +from pydantic import BaseModel -```python file="app/main.py" {8} copy -from moose_lib import Key +class Schema(BaseModel): + id: str + name: str + age: int -# After (add age field) -class UserSchema(BaseModel): - id: Key[str] - name: str - email: str - age: int # New field +table = OlapTable[Schema]("table_name", OlapConfig( + life_cycle=LifeCycle.FULLY_MANAGED + order_by_fields=["id"] +)) ``` - -**What happens:** -- Moose detects the new `age` field -- Generates migration: "Add age column to Users table" -- Applies migration -- Existing rows get NULL/default values - -## Production Workflow +| Option | Behavior | Use When | +| :--- | :--- | :--- | +| [`FULLY_MANAGED`](/moosestack/migrate/lifecycle-fully-managed) (default) | Automatically modifies resources to match your code, including destructive operations. | When you're developing new tables that you want your application to manage and evolve over time. | +| [`DELETION_PROTECTED`](/moosestack/migrate/lifecycle-deletion-protected) | Automatically modifies resources to match your code, but blocks destructive operations (drops, deletions). | When you want to protect critical production tables from accidental data loss. | +| [`EXTERNALLY_MANAGED`](/moosestack/migrate/lifecycle-externally-managed) | Does not modify resources. You manage the schema manually directly in your database. | When you have existing tables that you want to manage outside of your application, or if you're using a Managed CDC service like [ClickPipes](https://clickhouse.com/cloud/clickpipes) or [PeerDB](https://peerdb.io) to manage your schema. | -Moose supports two deployment patterns: **Moose Server** and **Serverless**. - -### Moose Server Deployments - -For deployments with a running Moose server, preview changes before applying: - -```bash filename="Terminal" copy -moose plan --url https://your-production-instance --token -``` - - -Remote planning requires authentication: - -1. Generate a token: `moose generate hash-token` -2. Configure your server: -```toml filename="moose.config.toml" copy -[authentication] -admin_api_key = "your-hashed-token" -``` -3. Use the token with `--token` flag - - -**Deployment Flow:** -1. **Develop locally** with `moose dev` -2. **Test changes** in local environment -3. **Plan against production**: `moose plan --url --token ` -4. **Review changes** carefully -5. **Deploy** - Moose applies migrations automatically on startup - -### Serverless Deployments - -For serverless deployments (no Moose server), use the ClickHouse connection directly: - -```bash filename="Terminal" copy -# Step 1: Generate migration files -moose generate migration --clickhouse-url --save - -# Step 2: Preview changes in PR -moose plan --clickhouse-url clickhouse://user:pass@host:port/database - -# Step 3: Execute migration after merge -moose migrate --clickhouse-url -``` - -**Deployment Flow:** -1. **Develop locally** with `moose dev` -2. **Generate migration plan**: `moose generate migration --clickhouse-url --save` -3. **Create PR** with `plan.yaml`, `remote_state.json`, `local_infra_map.json` -4. **PR validation**: Run `moose plan --clickhouse-url ` in CI to preview changes -5. **Review** migration files and plan output -6. **Merge PR** -7. **Execute migration**: Run `moose migrate --clickhouse-url ` in CI/CD - - -Requires `state_config.storage = "clickhouse"` in `moose.config.toml`: -```toml filename="moose.config.toml" copy -[state_config] -storage = "clickhouse" - -[features] -olap = true -data_models_v2 = true -``` - - - -Your ClickHouse instance needs the KeeperMap engine for state storage and migration locking. - -✅ **ClickHouse Cloud**: Works out of the box -✅ **`moose dev` or `moose prod`**: Already configured -⚠️ **Self-hosted ClickHouse**: See [ClickHouse KeeperMap documentation](https://clickhouse.com/docs/en/engines/table-engines/special/keeper-map) for setup requirements - - -### State Storage Options - -Moose migrations require storing infrastructure state and coordinating locks. You can choose between two backends: - -**ClickHouse State Storage (Default)** -Uses the `_MOOSE_STATE` KeeperMap table. Best for: -- ClickHouse Cloud (works out of the box) -- Self-hosted with ClickHouse Keeper already configured - -**Redis State Storage** -Uses Redis for state and locking. Best for: -- Existing Redis infrastructure -- Multi-tenant deployments (isolated by `key_prefix`) -- When ClickHouse Keeper isn't available - -**Configuration:** -```toml filename="moose.config.toml" copy -[state_config] -storage = "redis" # or "clickhouse" (default) -``` - -**Usage with Redis:** -```bash filename="Terminal" copy -# With environment variable (recommended) -export MOOSE_REDIS_CONFIG__URL="redis://host:port" -moose migrate --clickhouse-url clickhouse://... - -# Or with CLI flag -moose migrate \ - --clickhouse-url clickhouse://... \ - --redis-url redis://host:port -``` - - -The ClickHouse URL is always required, even when using Redis for state storage. + +You configure lifecycle modes individually on each `OlapTable` and `Stream` object. This allows you to mix fully managed development tables with deletion-protected production tables and externally managed legacy resources in the same application. -### Understanding Plan Output - -Moose shows exactly what will change: - -```bash - + Table: Analytics Version None - id: String, number: Int64, status: String - - deduplicate: false - + Table: Users Version None - id: String, name: String, email: String - - deduplicate: false -``` - -## Migration Types - -| Change Type | Infrastructure Impact | Data Impact | -|-------------|----------------------|-------------| -| **Add new object** | New table/stream/API created | No impact | -| **Remove object** | Table/stream/API dropped | All data lost | -| **Add field** | New column created | Existing rows get NULL/default | -| **Remove field** | Column dropped | Data permanently lost | -| **Change type** | Column altered | Data converted if compatible | +[Compare Lifecycle Management Modes →](/moosestack/migrate/lifecycle) -For detailed examples of each migration type, see [Migration Types](/moosestack/migrate/migration-types). +### Generating Migrations -## Viewing Infrastructure State +Moose Migrate provides two complementary ways to generate migrations. Each is designed for use in different stages of the application lifecycle, and it's best practice to use both in your workflow: -### Via CLI -```bash -# Check current infrastructure objects -moose ls +| Option | Behavior | Use Case | +| :--- | :--- | :--- | +| [Auto-Inferred](/moosestack/migrate/auto-inferred) | Updates database instantly on file save. Fast iteration, but can be destructive. | Local development, fast prototyping | +| [Planned](/moosestack/migrate/planned-migrations) | Generates reviewable plan files. Safe, deterministic, with drift detection. | Production deployment, CI/CD | -# View migration logs -moose logs -``` - -### Via Direct Connection - -Connect to your local infrastructure using details from `moose.config.toml`: - -```toml file="moose.config.toml" -[features] -olap = true # ClickHouse for analytics -streaming_engine = true # Redpanda for streaming -workflows = false # Temporal for workflows - -[clickhouse_config] -host = "localhost" -host_port = 18123 -native_port = 9000 -db_name = "local" -user = "panda" -password = "pandapass" - -[redpanda_config] -broker = "localhost:19092" -message_timeout_ms = 1000 -retention_ms = 30000 -replication_factor = 1 -``` -## Best Practices +[Compare Migration Modes →](/moosestack/migrate/modes) -### Development -- Use `moose dev` for all local development -- Monitor plan outputs for warnings -- Test schema changes with sample data - -### Production -- Always use remote planning before deployments -- Review changes carefully in production plans -- Maintain proper authentication -- Test migrations in staging first - -### Managing TTL Outside Moose - -If you're managing ClickHouse TTL settings through other tools or want to avoid migration failures from TTL drift, you can configure Moose to ignore TTL changes: - -```toml filename="moose.config.toml" copy -[migration_config] -ignore_operations = ["ModifyTableTtl", "ModifyColumnTtl"] -``` +### Applying Changes -This tells Moose to: -- Skip generating TTL change operations in migration plans -- Ignore TTL differences during drift detection +You have two options for executing planned migrations against your production database: -You'll still get migrations for all other schema changes (adding tables, modifying columns, etc.), but TTL changes won't block your deployments. +| Mode | Behavior | Use Case | +| :--- | :--- | :--- | +| [Serverless (`moose migrate`)](/moosestack/migrate/apply-planned-migrations-cli) | You run migrations manually or via CI/CD. | Integrating Moose OLAP into an existing application as a library. | +| [Server Runtime (`moose prod`)](/moosestack/migrate/apply-planned-migrations-service) | Migrations are automatically run within the Moose Runtime on server startup. | Building a dedicated analytics service with the full Moose Runtime. | -## Troubleshooting -### Authentication Errors -- Verify your authentication token -- Generate a new token: `moose generate hash-token` -- Check server configuration in `moose.config.toml` +## Advanced Topics -### Migration Issues -- Check `moose logs` for detailed error messages -- Verify object definitions in your main file -- Ensure all required fields are properly typed -- **Stuck migration lock**: If you see "Migration already in progress" but no migration is running, wait 5 minutes for automatic expiry or manually clear it: - ```sql copy=false - DELETE FROM _MOOSE_STATE WHERE key = 'migration_lock'; - ``` +- [Failed Migrations](/moosestack/migrate/failed-migrations) - Recover from migration errors diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx new file mode 100644 index 0000000000..ac66cdd11f --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-deletion-protected.mdx @@ -0,0 +1,70 @@ +--- +title: Deletion Protected Lifecycle +description: Safe lifecycle mode that allows additive changes but prevents destructive operations like drops. +order: 22 +category: migrate +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Deletion Protected Lifecycle + +`LifeCycle.DELETION_PROTECTED` allows Moose to automatically add new database structures but prevents it from removing existing ones. This mode is perfect for production environments where you want to evolve your schema safely without risking data loss. + +## Behavior + +**What Moose will do:** +- Add new columns, tables +- Modify column types (if compatible) +- Update non-destructive configurations + +**What Moose won't do:** +- Drop columns or tables +- Perform destructive schema changes + +## Examples + + + + +```ts filename="DeletionProtectedExample.ts" copy +import { OlapTable, LifeCycle, ClickHouseEngines } from "@514labs/moose-lib"; + +interface ProductEvent { + id: string; + productId: string; + timestamp: Date; + action: string; +} + +const productAnalytics = new OlapTable("product_analytics", { + orderByFields: ["timestamp", "productId"], + engine: ClickHouseEngines.ReplacingMergeTree, + lifeCycle: LifeCycle.DELETION_PROTECTED +}); +``` + + + + +```py filename="DeletionProtectedExample.py" copy +from moose_lib import OlapTable, OlapConfig, LifeCycle, ClickHouseEngines +from pydantic import BaseModel +from datetime import datetime + +class ProductEvent(BaseModel): + id: str + product_id: str + timestamp: datetime + action: str + +product_analytics = OlapTable[ProductEvent]("product_analytics", OlapConfig( + order_by_fields=["timestamp", "product_id"], + engine=ClickHouseEngines.ReplacingMergeTree, + life_cycle=LifeCycle.DELETION_PROTECTED +)) +``` + + + + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx new file mode 100644 index 0000000000..881c68c24c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-externally-managed.mdx @@ -0,0 +1,91 @@ +--- +title: Externally Managed Lifecycle +description: Configure Moose to interact with existing, externally managed database schemas. +order: 23 +category: migrate +--- + +import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Externally Managed Lifecycle + +`LifeCycle.EXTERNALLY_MANAGED` tells Moose to interact with existing resources without managing their schema or lifecycle. In this mode, you are fully responsible for creating and maintaining the database schema outside the context of your code. + +## Use Cases + +- **Existing Infrastructure**: You're connecting to tables managed by another team or process. +- **Integrations**: You're integrating with external systems like PeerDB or CDC tools that manage the schema. +- **Strict Governance**: Your organization requires strict, manual database change management. + +## Syncing Schema + +Because Moose doesn't manage the schema, your code definitions must match the database exactly. Mismatches can cause runtime errors. + +Use `moose db pull` to generate Moose models from your remote database: + +```bash +moose db pull --clickhouse-url +``` + +## Configuration + + + + +```ts title="ExternallyManagedExample.ts" +import { Stream, OlapTable, LifeCycle, Key } from "@514labs/moose-lib"; + +interface ExternalUserData { + userId: Key; + fullName: string; + emailAddress: string; + createdAt: Date; +} + +// Connect to existing database table +const legacyUserTable = new OlapTable("legacy_users", { + lifeCycle: LifeCycle.EXTERNALLY_MANAGED +}); + +// Connect to existing Kafka topic +const legacyStream = new Stream("legacy_user_stream", { + lifeCycle: LifeCycle.EXTERNALLY_MANAGED, + destination: legacyUserTable +}); +``` + + + + +```py filename="ExternallyManagedExample.py" copy +from moose_lib import Stream, OlapTable, OlapConfig, StreamConfig, LifeCycle, Key +from pydantic import BaseModel +from datetime import datetime + +class ExternalUserData(BaseModel): + user_id: Key[str] + full_name: str + email_address: str + created_at: datetime + +# Connect to existing database table +legacy_user_table = OlapTable[ExternalUserData]("legacy_users", OlapConfig( + life_cycle=LifeCycle.EXTERNALLY_MANAGED +)) + +# Connect to existing Kafka topic +legacy_stream = Stream[ExternalUserData]("legacy_user_stream", StreamConfig( + life_cycle=LifeCycle.EXTERNALLY_MANAGED, + destination=legacy_user_table +)) +``` + + + + + +`moose dev` **WILL CREATE** `EXTERNALLY_MANAGED` tables in your local ClickHouse instance to enable development of queries and views against your schema. + +- **Local Updates**: Schema changes in code **WILL** update your local database. +- **No Remote Impact**: These changes are **NEVER** applied to the remote database. + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx new file mode 100644 index 0000000000..48114fa432 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle-fully-managed.mdx @@ -0,0 +1,70 @@ +--- +title: Fully Managed Lifecycle +description: Default lifecycle mode where Moose automatically manages all schema changes, including destructive ones. +order: 21 +category: migrate +--- + +import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Fully Managed Lifecycle + +`LifeCycle.FULLY_MANAGED` is the default behavior where Moose has complete control over your database resources. When you change your data models, Moose will automatically: + +- Add new columns or tables +- Remove columns or tables that no longer exist in your code +- Modify existing column types and constraints + + +This mode can perform destructive operations. Data may be lost if you remove fields from your data models or if you perform operations that require a destroy and recreate to be effective, like changing the `order_by_fields` (Python) or `orderByFields` (TypeScript) field. + + +## Examples + + + + +```ts filename="FullyManagedExample.ts" copy +import { OlapTable, LifeCycle } from "@514labs/moose-lib"; + +interface UserData { + id: string; + name: string; + email: string; +} + +// Default behavior - fully managed +const userTable = new OlapTable("users"); + +// Explicit fully managed configuration +const explicitTable = new OlapTable("users", { + orderByFields: ["id"], + lifeCycle: LifeCycle.FULLY_MANAGED +}); +``` + + + + +```py filename="FullyManagedExample.py" copy +from moose_lib import OlapTable, OlapConfig, LifeCycle +from pydantic import BaseModel + +class UserData(BaseModel): + id: str + name: str + email: str + +# Default behavior - fully managed +user_table = OlapTable[UserData]("users") + +# Explicit fully managed configuration +explicit_table = OlapTable[UserData]("users", OlapConfig( + order_by_fields=["id"], + life_cycle=LifeCycle.FULLY_MANAGED +)) +``` + + + + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx index fbc537281a..fd15d50807 100644 --- a/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/lifecycle.mdx @@ -1,75 +1,37 @@ --- title: Lifecycle Management -description: Control how Moose manages database and streaming resources when your code changes +description: Reference documentation for the LifeCycle enum configuration property on OlapTable and Stream objects. order: 2 category: migrate --- -import { Callout, BulletPointsCard, LanguageTabs, LanguageTabContent } from "@/components/mdx"; +import { BulletPointsCard, LanguageTabs, LanguageTabContent } from "@/components/mdx"; # Lifecycle Management - - - - - -## Overview +The `LifeCycle` enum is a configuration property that controls how Moose Migrate manages individual `OlapTable` and `Stream` resources during schema evolution. Each resource can have its own lifecycle mode, enabling hybrid management models within a single application. -The `LifeCycle` enum controls how Moose manages the lifecycle of database/streaming resources when your code changes. -This feature gives you fine-grained control over whether Moose automatically updates your database schema or -leaves it under external/manual control. +## Lifecycle Modes - +| Mode | Behavior | Default | +| :--- | :--- | :--- | +| `FULLY_MANAGED` | Moose automatically modifies resources to match your code, including destructive operations (drops, deletions). | Yes (for new resources) | +| `DELETION_PROTECTED` | Moose modifies resources to match your code but blocks destructive operations (drops, deletions). | No | +| `EXTERNALLY_MANAGED` | Moose does not modify resources. You are responsible for managing the schema manually. | No | -## LifeCycle Modes +## Configuration Syntax -### `FULLY_MANAGED` (Default) +The `lifeCycle`/`life_cycle` property is set in the configuration object when creating `OlapTable` or `Stream` instances. -This is the default behavior where Moose has complete control over your database resources. When you change your data models, Moose will automatically: - -- Add new columns or tables -- Remove columns or tables that no longer exist in your code -- Modify existing column types and constraints - - -This mode can perform destructive operations. Data may be lost if you remove fields from your data models or if you perform operations that require a destroy and recreate to be effective, like changing the `order_by_fields` (Python) or `orderByFields` (TypeScript) field. - +### OlapTable Configuration -```ts filename="FullyManagedExample.ts" copy -import { OlapTable, LifeCycle } from "@514labs/moose-lib"; - -interface UserData { - id: string; - name: string; - email: string; -} +```ts +import { OlapTable, OlapConfig, LifeCycle } from "@514labs/moose-lib"; -// Default behavior - fully managed -const userTable = new OlapTable("users"); - -// Explicit fully managed configuration -const explicitTable = new OlapTable("users", { - orderByFields: ["id"], +const table = new OlapTable("table_name", { lifeCycle: LifeCycle.FULLY_MANAGED }); ``` @@ -77,21 +39,10 @@ const explicitTable = new OlapTable("users", { -```py filename="FullyManagedExample.py" copy +```py from moose_lib import OlapTable, OlapConfig, LifeCycle -from pydantic import BaseModel - -class UserData(BaseModel): - id: str - name: str - email: str - -# Default behavior - fully managed -user_table = OlapTable[UserData]("users") -# Explicit fully managed configuration -explicit_table = OlapTable[UserData]("users", OlapConfig( - order_by_fields=["id"], +table = OlapTable[DataType]("table_name", OlapConfig( life_cycle=LifeCycle.FULLY_MANAGED )) ``` @@ -99,141 +50,86 @@ explicit_table = OlapTable[UserData]("users", OlapConfig( -### `DELETION_PROTECTED` - -This mode allows Moose to automatically add new database structures but prevents it from removing existing ones. -Perfect for production environments where you want to evolve your schema safely without risking data loss. - -**What Moose will do:** -- Add new columns, tables -- Modify column types (if compatible) -- Update non-destructive configurations - -**What Moose won't do:** -- Drop columns or tables -- Perform destructive schema changes +### Stream Configuration -```ts filename="DeletionProtectedExample.ts" copy -import { IngestPipeline, LifeCycle } from "@514labs/moose-lib"; - -interface ProductEvent { - id: string; - productId: string; - timestamp: Date; - action: string; -} +```ts +import { Stream, StreamConfig, LifeCycle } from "@514labs/moose-lib"; -const productAnalytics = new IngestPipeline("product_analytics", { - table: { - orderByFields: ["timestamp", "productId"], - engine: ClickHouseEngines.ReplacingMergeTree, - }, - stream: { - parallelism: 4, - }, - ingestApi: true, - // automatically applied to the table and stream - lifeCycle: LifeCycle.DELETION_PROTECTED +const stream = new Stream("stream_name", { + destination: table, + lifeCycle: LifeCycle.FULLY_MANAGED }); ``` -```py filename="DeletionProtectedExample.py" copy -from moose_lib import IngestPipeline, IngestPipelineConfig, OlapConfig, StreamConfig, LifeCycle -from pydantic import BaseModel -from datetime import datetime - -class ProductEvent(BaseModel): - id: str - product_id: str - timestamp: datetime - action: str +```py +from moose_lib import Stream, StreamConfig, LifeCycle -product_analytics = IngestPipeline[ProductEvent]("product_analytics", IngestPipelineConfig( - table=OlapConfig( - order_by_fields=["timestamp", "product_id"], - engine=ClickHouseEngines.ReplacingMergeTree, - ), - stream=StreamConfig( - parallelism=4, - ), - ingest_api=True, - # automatically applied to the table and stream - life_cycle=LifeCycle.DELETION_PROTECTED +stream = Stream[DataType]("stream_name", StreamConfig( + destination=table, + life_cycle=LifeCycle.FULLY_MANAGED )) ``` -### `EXTERNALLY_MANAGED` +### IngestPipeline Configuration -This mode tells Moose to completely hands-off your resources. -You become responsible for creating and managing the database schema. This is useful when: - -- You have existing database tables managed by another team -- You're integrating with another system (e.g. PeerDB) -- You have strict database change management processes - - -With externally managed resources, you must ensure your database schema matches your data models exactly, or you may encounter runtime errors. - +For `IngestPipeline`, you can set lifecycle modes independently for the table and stream components. -```ts filename="ExternallyManagedExample.ts" copy -import { Stream, OlapTable, LifeCycle, Key } from "@514labs/moose-lib"; - -interface ExternalUserData { - userId: Key; - fullName: string; - emailAddress: string; - createdAt: Date; -} +```ts +import { IngestPipeline, IngestPipelineConfig, LifeCycle } from "@514labs/moose-lib"; -// Connect to existing database table -const legacyUserTable = new OlapTable("legacy_users", { - lifeCycle: LifeCycle.EXTERNALLY_MANAGED -}); - -// Connect to existing Kafka topic -const legacyStream = new Stream("legacy_user_stream", { - lifeCycle: LifeCycle.EXTERNALLY_MANAGED, - destination: legacyUserTable +const pipeline = new IngestPipeline("pipeline_name", { + table: { + lifeCycle: LifeCycle.DELETION_PROTECTED + }, + stream: { + lifeCycle: LifeCycle.FULLY_MANAGED + } }); ``` -```py filename="ExternallyManagedExample.py" copy -from moose_lib import Stream, OlapTable, OlapConfig, StreamConfig, LifeCycle, Key -from pydantic import BaseModel -from datetime import datetime - -class ExternalUserData(BaseModel): - user_id: Key[str] - full_name: str - email_address: str - created_at: datetime - -# Connect to existing database table -legacy_user_table = OlapTable[ExternalUserData]("legacy_users", OlapConfig( - life_cycle=LifeCycle.EXTERNALLY_MANAGED -)) +```py +from moose_lib import IngestPipeline, IngestPipelineConfig, OlapConfig, StreamConfig, LifeCycle -# Connect to existing Kafka topic -legacy_stream = Stream[ExternalUserData]("legacy_user_stream", StreamConfig( - life_cycle=LifeCycle.EXTERNALLY_MANAGED, - destination=legacy_user_table +pipeline = IngestPipeline[DataType]("pipeline_name", IngestPipelineConfig( + table=OlapConfig( + life_cycle=LifeCycle.DELETION_PROTECTED + ), + stream=StreamConfig( + life_cycle=LifeCycle.FULLY_MANAGED + ) )) ``` + +## Use Cases + +| Scenario | Recommended Mode | Rationale | +| :--- | :--- | :--- | +| Development/iteration | `FULLY_MANAGED` | Allows rapid schema changes including destructive operations. | +| Production tables | `DELETION_PROTECTED` | Prevents accidental data loss while allowing schema evolution. | +| Legacy/shared tables | `EXTERNALLY_MANAGED` | Tables managed by another team or system. | +| CDC-managed streams | `EXTERNALLY_MANAGED` | Topics created by ClickPipes, PeerDB, or other CDC services. | +| Moose-managed streams | `FULLY_MANAGED` | Topics created and managed by Moose. | + +## See Also + +- [Fully Managed](/moosestack/migrate/lifecycle-fully-managed) - Default lifecycle mode +- [Deletion Protected](/moosestack/migrate/lifecycle-deletion-protected) - Safe production mode +- [Externally Managed](/moosestack/migrate/lifecycle-externally-managed) - Read-only mode diff --git a/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx b/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx new file mode 100644 index 0000000000..3a266a5604 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/modes.mdx @@ -0,0 +1,63 @@ +--- +title: Generating Migrations +description: Overview of the two schema evolution modes in Moose used for development and production. +order: 1 +category: migrate +--- + +import { Callout } from "@/components/mdx"; + +# Generating Migrations + +Moose Migrate offers two complementary modes for generating migrations. Each is designed for use in different stages of the application lifecycle, and it's best practice to use both in your workflow: + +- [Auto-Inferred Migrations](#auto-inferred-migrations) - Changes are instantly applied to your database without any manual review. Designed for local development. +- [Planned Migrations](#planned-migrations) - Changes are staged in a reviewable and editable plan file. Designed for production deployments. + +## Overview + +| | Auto-Inferred Migrations | Planned Migrations | +| :--- | :--- | :--- | +| **Primary Use Case** | Local Development | Production Deployment | +| **Trigger** | File Save / Watcher | `moose generate migration` | +| **Artifact** | None (Immediate SQL Execution) | Migration Plan (`plan.yaml`) | +| **Safety** | Low (Optimized for speed) | High (Optimized for safety) | +| **Reviewable** | No | Yes | +| **Drift Detection** | No | Yes | + +## Auto-Inferred Migrations + +Designed for the "inner loop" of development, **Auto-Inferred Migrations** prioritize velocity. As you iterate on your data models in code, Moose automatically applies the necessary changes to your local database in real-time. + +* **Optimized for:** Prototyping, local development, rapid iteration. +* **Key Characteristic:** Invisible, instant schema updates. +* **Risk:** Can be destructive (e.g., renaming a field drops the column). + +[Read the Auto-Inferred Migrations Reference →](/moosestack/migrate/auto-inferred) + +## Planned Migrations + +Designed for the "outer loop" of deployment, **Planned Migrations** prioritize safety. This mode separates the *generation* of changes from their *execution*, creating a static artifact that can be reviewed, tested, and versioned. + +* **Optimized for:** Production deployments, team collaboration, CI/CD. +* **Key Characteristic:** Reviewable, editable migration plans. +* **Risk:** Minimal (requires explicit approval and handles drift detection). + +[Read the Planned Migrations Reference →](/moosestack/migrate/planned-migrations) + +## Use Cases + +| Scenario | Recommended Mode | Rationale | +| :--- | :--- | :--- | +| **Local Prototyping** | `Auto-Inferred` | You want to iterate quickly on your schema without running commands for every change. | +| **Production Deployment** | `Planned` | You need to ensure that schema changes are safe and do not accidentally delete user data. | +| **CI/CD Pipeline** | `Planned` | You need to apply migrations in a deterministic way as part of your deployment process. | +| **Renaming Columns** | `Planned` | You need to explicitly tell the database to rename a column instead of dropping and re-adding it. | +| **Team Review** | `Planned` | You want to review schema changes in a Pull Request before they are applied. | + +## See Also + +- [Auto-Inferred Migrations](/moosestack/migrate/auto-inferred) +- [Planned Migrations](/moosestack/migrate/planned-migrations) +- [Lifecycle Management](/moosestack/migrate/lifecycle) + diff --git a/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx b/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx new file mode 100644 index 0000000000..a031e69ea3 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/plan-format.mdx @@ -0,0 +1,240 @@ +--- +title: Migration Plan Format +description: Detailed specification of the migration plan.yaml structure, operations, and types. +order: 5 +category: migrate +--- + +import { Callout } from "@/components/mdx"; + +# Migration Plan Format + +The `migrations/plan.yaml` file is the declarative source of truth for pending database changes in Moose. It defines the exact sequence of operations Moose will execute to transition your production database to the desired state. + +This reference documents the file structure, supported operations, and data type definitions found in generated plans. + +## Plan Structure + +A migration plan is a YAML document with the following root properties: + +| Field | Type | Description | +| :--- | :--- | :--- | +| `created_at` | String (ISO 8601) | Timestamp when the plan was generated. Used for versioning and audit trails. | +| `operations` | Array | Ordered list of migration operations to execute. | + +```yaml title="migrations/plan.yaml" copy=false +created_at: 2025-01-14T10:30:00Z +operations: + - DropTableColumn: ... + - AddTableColumn: ... +``` + +## Operation Reference + +Operations are the atomic units of change in a migration plan. Moose executes them sequentially. + +### Table Operations + +#### `CreateTable` + +Creates a new ClickHouse table. + +```yaml title="migrations/plan.yaml" copy=false +- CreateTable: + table: + name: + columns: [] + order_by: [] + engine: +``` + +| Field | Description | +| :--- | :--- | +| `name` | Name of the table to create. | +| `columns` | List of column definitions. | +| `order_by` | Array of column names used for the sorting key. | +| `engine` | ClickHouse table engine (e.g., `MergeTree`, `ReplacingMergeTree`). | + +#### `DropTable` + +Permanently removes a table and all its data. + +```yaml title="migrations/plan.yaml" copy=false +- DropTable: + table: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Name of the table to drop. | + +### Column Operations + +#### `AddTableColumn` + +Adds a new column to an existing table. + +```yaml title="migrations/plan.yaml" copy=false +- AddTableColumn: + table: + column: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `column` | Full definition of the new column. | + +#### `DropTableColumn` + +Removes a column from a table. **Destructive operation.** + +```yaml title="migrations/plan.yaml" copy=false +- DropTableColumn: + table: + column_name: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `column_name` | Name of the column to remove. | +#### `RenameTableColumn` + +Renames a column while preserving its data. + +```yaml title="migrations/plan.yaml" copy=false +- RenameTableColumn: + table: + before_column_name: + after_column_name: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `before_column_name` | Current name of the column. | +| `after_column_name` | New name for the column. | + +#### `ModifyTableColumn` + +Changes a column's data type or properties. + +```yaml title="migrations/plan.yaml" copy=false +- ModifyTableColumn: + table: + before_column: + after_column: +``` + +| Field | Description | +| :--- | :--- | +| `table` | Target table name. | +| `before_column` | Snapshot of the column state before modification. | +| `after_column` | Desired state of the column. | + +### SQL Operations + +#### `RawSql` + +Executes arbitrary SQL statements. Used for custom migrations, backfills, or unsupported operations. + +```yaml title="migrations/plan.yaml" copy=false +- RawSql: + sql: [] + description: +``` + +| Field | Description | +| :--- | :--- | +| `sql` | List of SQL statements to execute in order. | +| `description` | Human-readable explanation of the operation's purpose. | + +## Column Definition + +Every column in a `CreateTable`, `AddTableColumn`, or `ModifyTableColumn` operation follows this structure: + +```yaml title="migrations/plan.yaml" copy=false +name: +data_type: +required: +unique: +primary_key: +default: | null +comment: | null +annotations: [[, ]] +``` + +| Property | Type | Description | +| :--- | :--- | :--- | +| `name` | String | Column identifier. | +| `data_type` | Type | ClickHouse data type (see below). | +| `required` | Boolean | If `true`, the column is `NOT NULL`. | +| `unique` | Boolean | (Metadata) Whether the field is marked unique in the model. | +| `primary_key` | Boolean | Whether the field is part of the primary key. | +| `default` | String | Default value expression (e.g., `'active'`, `0`, `now()`). | + +## Data Types + +Moose maps data model types to ClickHouse types in the plan. + +### Scalar Types + +| Type | YAML Representation | +| :--- | :--- | +| **String** | `String` | +| **Boolean** | `Boolean` | +| **Integer** | `Int8`, `Int16`, `Int32`, `Int64`, `UInt8`... | +| **Float** | `Float32`, `Float64` | +| **Date** | `Date`, `Date32`, `DateTime` | +| **UUID** | `UUID` | + +### Complex Types + +#### Nullable +Wraps another type to allow `NULL` values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Nullable: + nullable: String +``` + +#### Arrays +List of values of a single type. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Array: + elementType: String + elementNullable: false +``` + +#### Enums +Fixed set of string or integer values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Enum: + name: "Status" + values: + - name: "ACTIVE" + value: { String: "active" } + - name: "INACTIVE" + value: { String: "inactive" } +``` + +#### Nested (Structs) +Hierarchical data structures. +```yaml title="migrations/plan.yaml" copy=false +data_type: + Nested: + name: "Address" + columns: [] + jwt: false +``` + +#### LowCardinality +Storage optimization for columns with few unique values. +```yaml title="migrations/plan.yaml" copy=false +data_type: + LowCardinality: + nullable: String +``` diff --git a/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx b/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx new file mode 100644 index 0000000000..fc2e2adfd3 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/migrate/planned-migrations.mdx @@ -0,0 +1,122 @@ +--- +title: Planned Migrations +description: Reference documentation for the Planned Migrations system in Moose. +order: 6 +category: olap +--- + +import { Callout } from "@/components/mdx"; + +# Planned Migrations + +**Planned migrations** are the production-grade schema evolution mechanism in Moose. Unlike auto-inferred migrations, this system separates the *generation* of schema changes from their *execution*, introducing a reviewable artifact (the plan) into your deployment lifecycle. + +## Command + +Generate a migration plan by comparing your local code against a production environment: + +```bash +# For Server Runtime (connect to Moose Admin API) +moose generate migration --url --token --save + +# For Serverless (connect to ClickHouse directly) +moose generate migration --clickhouse-url --save +``` + +**Key Benefits:** +* **Deterministic:** The plan is a static file (`plan.yaml`) that won't change at runtime. +* **Drift Detection:** Snapshots (`remote_state.json`) ensure the DB hasn't changed since the plan was created. +* **Reviewable:** You can audit every operation (e.g., `DropColumn`, `AddTable`) before it runs, and you can edit the plan to override Moose's assumptions. +* **Versioned:** Commit plans to Git to create a permanent audit trail. + +## Workflow + +The lifecycle consists of four distinct stages: + +1. **Code Change** — Modify your data models (tables, views) in your Moose project. +2. **Generation** — Run the CLI to compare your code against production. + ```bash + moose generate migration --save ... + ``` +3. **Review** — Inspect the generated `migrations/plan.yaml` file and commit it to Git. +4. **Application** — Execute the plan during deployment (if using Moose Runtime), manually via the CLI or your own CI/CD pipeline (if using Serverless). + ```bash + moose migrate ... + + ## or if using the Moose Runtime + moose prod + ``` + +## Generated Artifacts + +Running the generation command produces three files in the `migrations/` directory. + +| File | Purpose | +| :--- | :--- | +| `plan.yaml` | The imperative list of operations (e.g., `AddTableColumn`) to execute. See [Plan Reference](/moosestack/migrate/reference). | +| `remote_state.json` | A snapshot of the production database schema at the time of generation. Used to detect drift. | +| `local_infra_map.json` | A snapshot of your local code's schema definitions. Used to validate the plan against the code. | + +## Configuration + +Planned migrations are enabled via the `ddl_plan` feature flag in your project configuration. + +```toml filename="moose.config.toml" +[features] +olap = true +ddl_plan = true +``` + +## Command Options + +The `moose generate migration` command accepts different arguments depending on your [deployment model for applying changes](/moosestack/migrate#applying-changes). + +### via Moose Runtime (`moose prod`) + +Connect to the **Admin API** of the running service. + +```bash +moose generate migration --url --token --save +``` + +| Option | Description | +| :--- | :--- | +| `--url` | The endpoint of your production Moose Admin API. | +| `--token` | The authentication token for the Admin API. | +| `--save` | Writes the generated plan to the `migrations/` directory. Without this, it performs a dry run. | + +### via Serverless (`moose migrate`) + +Connect directly to the **ClickHouse database**. + +```bash +moose generate migration --clickhouse-url --save +``` + +| Option | Description | +| :--- | :--- | +| `--clickhouse-url` | Direct connection string (e.g., `clickhouse://user:pass@host:9440/db`). | +| `--save` | Writes the generated plan to the `migrations/` directory. Without this, it performs a dry run. | + +## Drift Detection + +Drift occurs when the target database's schema changes between the time you generate a plan and the time you apply it. + +**How it works:** +1. `moose generate` writes the current DB state to `remote_state.json`. +2. `moose migrate` (serverless) or `moose prod` (server runtime) compares `remote_state.json` with the *current* DB state. +3. If they differ (hash mismatch), the migration **aborts**. + +**Resolution:** +To fix drift, you must regenerate the plan against the new production state. + +```bash +# Regenerate to accept the new state +moose generate migration ... --save +``` + +## See Also + +- [Migration Plan Reference](/moosestack/migrate/reference) - Detailed syntax of `plan.yaml`. +- [Serverless (moose migrate)](/moosestack/migrate/apply-planned-migrations-cli) - Execution guide. +- [Server Runtime](/moosestack/migrate/apply-planned-migrations-service) - Execution guide. diff --git a/apps/framework-docs-v2/src/config/navigation.ts b/apps/framework-docs-v2/src/config/navigation.ts index 5d5fecbdb8..f674818b2b 100644 --- a/apps/framework-docs-v2/src/config/navigation.ts +++ b/apps/framework-docs-v2/src/config/navigation.ts @@ -497,57 +497,70 @@ const moosestackNavigationConfig: NavigationConfig = [ icon: IconGitMerge, languages: ["typescript", "python"], children: [ - { type: "label", title: "Migration Modes" }, + { type: "separator" }, + { type: "label", title: "Lifecycle Management" }, { type: "page", - slug: "moosestack/migrate/auto-inferred", - title: "Auto-Inferred", + slug: "moosestack/migrate/lifecycle", + title: "Overview", languages: ["typescript", "python"], }, { type: "page", - slug: "moosestack/migrate/planned-migrations", - title: "Planned", + slug: "moosestack/migrate/lifecycle-fully-managed", + title: "Fully Managed", languages: ["typescript", "python"], }, { type: "page", - slug: "moosestack/migrate/plan-format", - title: "Plan Format", + slug: "moosestack/migrate/lifecycle-deletion-protected", + title: "Deletion Protected", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/migrate/lifecycle-externally-managed", + title: "Externally Managed", languages: ["typescript", "python"], }, { type: "separator" }, - { type: "label", title: "Applying Migrations" }, + { type: "label", title: "Generating Migrations" }, { type: "page", - slug: "moosestack/migrate/apply-planned-migrations-cli", - title: "Serverless (moose migrate)", + slug: "moosestack/migrate/modes", + title: "Overview", languages: ["typescript", "python"], }, { type: "page", - slug: "moosestack/migrate/apply-planned-migrations-service", - title: "Server Runtime", + slug: "moosestack/migrate/auto-inferred", + title: "Auto-Inferred", languages: ["typescript", "python"], }, - { type: "separator" }, - { type: "label", title: "Lifecycle Management" }, { type: "page", - slug: "moosestack/migrate/lifecycle-fully-managed", - title: "Fully Managed", + slug: "moosestack/migrate/planned-migrations", + title: "Planned", languages: ["typescript", "python"], }, { type: "page", - slug: "moosestack/migrate/lifecycle-deletion-protected", - title: "Deletion Protected", + slug: "moosestack/migrate/plan-format", + title: "Plan Format", languages: ["typescript", "python"], }, + { type: "separator" }, + { type: "label", title: "Applying Migrations" }, { type: "page", - slug: "moosestack/migrate/lifecycle-externally-managed", - title: "Externally Managed", + slug: "moosestack/migrate/apply-planned-migrations-cli", + title: "Serverless (moose migrate)", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/migrate/apply-planned-migrations-service", + title: "Server Runtime", languages: ["typescript", "python"], }, { type: "separator" }, From 83c39f97088f366a71322f1e74e0f06672ec8a59 Mon Sep 17 00:00:00 2001 From: Olivia Kane Date: Wed, 26 Nov 2025 19:19:28 -0500 Subject: [PATCH 56/59] fix build error --- .../content/moosestack/migrate/failed-migrations.mdx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx index 9645df8304..7b10689f44 100644 --- a/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx +++ b/apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx @@ -1,12 +1,8 @@ --- title: Failed Migrations description: Recover from failed migrations and safely achieve desired type changes -<<<<<<<< HEAD:apps/framework-docs-v2/content/moosestack/olap/schema-change.mdx order: 13 -======== -order: 4 ->>>>>>>> 49c5725d4 (docs: migrate foundations documentation refactor):apps/framework-docs-v2/content/moosestack/migrate/failed-migrations.mdx -category: olap +category: migrate --- import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; From 6605b387c9ed1d3f2c32caece4ca542f631b38d0 Mon Sep 17 00:00:00 2001 From: Olivia Kane Date: Mon, 1 Dec 2025 13:11:02 -0500 Subject: [PATCH 57/59] fix build error and add cdc debezium to nav --- .../debezium-dev-to-prod-outline.mdx | 28 +- apps/framework-docs-v2/public/sitemap-0.xml | 311 +++++++++--------- .../src/components/mdx/server-figure.tsx | 10 +- .../src/config/navigation.ts | 9 + pnpm-lock.yaml | 10 +- 5 files changed, 201 insertions(+), 167 deletions(-) diff --git a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx index f970dd0f30..7aaa4e47b7 100644 --- a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx +++ b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx @@ -3,6 +3,8 @@ title: Stream Data from Postgres with Debezium description: Learn how to adapt the Debezium CDC template to stream data from your PostgreSQL database to ClickHouse. --- +import { FileTree } from "@/components/mdx"; + # Stream Data from Postgres with Debezium This guide shows you how to use the **Debezium CDC Template** with your own application. You will learn how to connect the pipeline to your PostgreSQL database and send your tables to ClickHouse for real-time analytics. @@ -30,15 +32,23 @@ This template uses two Kafka topics for each table: one for the raw data and one Here are the key files in the template you should know about: -``` -cdc-pipeline/ -├── 1-sources/ # Defines Kafka topics from Debezium -├── 2-transforms/ # Sanitizes CDC events & maps to destination -├── 3-destinations/ # Defines ClickHouse tables & streams -docker-compose.dev.override.yaml # Infrastructure (Kafka Connect, Redpanda) -setup-cdc.ts # Script that registers the connector -moose.config.toml # Project config (enables streaming) -``` + + + + + + + + + + + +* **`cdc-pipeline/1-sources/`**: Defines Kafka topics from Debezium +* **`cdc-pipeline/2-transforms/`**: Sanitizes CDC events and maps them to the destination +* **`cdc-pipeline/3-destinations/`**: Defines ClickHouse tables and streams +* **`docker-compose.dev.override.yaml`**: Infrastructure configuration (Kafka Connect, Redpanda) +* **`setup-cdc.ts`**: Script that registers the Debezium connector +* **`moose.config.toml`**: Project configuration (enables streaming) ## Step 0: Clone the Template diff --git a/apps/framework-docs-v2/public/sitemap-0.xml b/apps/framework-docs-v2/public/sitemap-0.xml index 88aaa0ea28..a8d33c86f3 100644 --- a/apps/framework-docs-v2/public/sitemap-0.xml +++ b/apps/framework-docs-v2/public/sitemap-0.xml @@ -1,154 +1,163 @@ -https://docs.fiveonefour.com/ai2025-11-21T02:43:16.692Zdaily0.7 -https://docs.fiveonefour.com/ai/data-collection-policy2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/context2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/dlqs2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/egress2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/ingest2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/model-data2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/demos/mvs2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/claude2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/cursor2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/other-clients2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/vs-code2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/getting-started/windsurf2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/clickhouse-chat2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/clickhouse-proj2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/from-template2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/guides/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/cli-reference2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/mcp-json-reference2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/ai/reference/tool-reference2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/hosting2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/hosting/deployment2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/hosting/getting-started2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/hosting/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/hosting/overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/admin-api2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/analytics-api2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/auth2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/ingest-api2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/openapi-sdk2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/apis/trigger-api2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/express2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastapi2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastify2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/koa2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/nextjs2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/app-api-frameworks/raw-nodejs2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/changelog2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/configuration2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/contribution/documentation2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/contribution/framework2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/data-modeling2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/data-sources2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/configuring-moose-for-cloud2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-an-offline-server2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-ecs2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-on-kubernetes2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/deploying-with-docker-compose2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/monitoring2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/packaging-moose-for-deployment2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/deploying/preparing-clickhouse-redpanda2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/from-clickhouse2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/getting-started/quickstart2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/minimum-requirements2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/help/troubleshooting2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/in-your-stack2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/local-dev-environment2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/metrics2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/lifecycle2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/migrate/migration-types2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/moose-cli2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/moosedev-mcp2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/apply-migrations2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/db-pull2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/external-tables2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/indexes2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/insert-data2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-materialized-view2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-table2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/model-view2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/planned-migrations2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/read-data2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-change2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-optimization2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/schema-versioning2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/supported-types2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/olap/ttl2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/quickstart2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/reference/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/connect-cdc2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/consumer-functions2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/create-stream2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/dead-letter-queues2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/from-your-code2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/schema-registry2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/sync-to-table2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/streaming/transform-functions2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/cancel-workflow2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/define-workflow2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/index2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/retries-and-timeouts2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/schedule-workflow2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/moosestack/workflows/trigger-workflow2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/automated-reports2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/automated-reports/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/going-to-production2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/going-to-production/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/applications/performant-dashboards/scratch/1-init2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/change-data-capture2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/change-data-capture/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/impact-analysis2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/impact-analysis/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/migrations2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-management/migrations/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/connectors2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/connectors/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/pipelines2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/pipelines/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/data-as-code2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/data-as-code/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/dora-for-data2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/methodology/dora-for-data/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/ai-enablement2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/ai-enablement/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/data-foundation2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/data-foundation/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/olap-evaluation2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/olap-evaluation/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/platform-engineering2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides/strategy/platform-engineering/guide-overview2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/guides2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com2025-11-21T02:43:16.693Zdaily0.7 -https://docs.fiveonefour.com/templates2025-11-21T02:43:16.693Zdaily0.7 +https://docs.fiveonefour.com/ai2025-12-01T18:10:45.728Zdaily0.7 +https://docs.fiveonefour.com/ai/data-collection-policy2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/context2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/dlqs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/egress2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/ingest2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/model-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/demos/mvs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/claude2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/cursor2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/other-clients2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/vs-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/getting-started/windsurf2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-chat2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/clickhouse-proj2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/from-template2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/guides/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/cli-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/mcp-json-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/ai/reference/tool-reference2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/deployment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/getting-started2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/hosting/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/admin-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/analytics-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/auth2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/ingest-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/openapi-sdk2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/apis/trigger-api2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/express2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastapi2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/fastify2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/koa2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/nextjs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/app-api-frameworks/raw-nodejs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/changelog2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/configuration2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/documentation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/contribution/framework2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-modeling2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/data-sources2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/configuring-moose-for-cloud2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-an-offline-server2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-ecs2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-on-kubernetes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/deploying-with-docker-compose2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/monitoring2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/packaging-moose-for-deployment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/deploying/preparing-clickhouse-redpanda2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/from-clickhouse2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/getting-started/quickstart2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/minimum-requirements2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/help/troubleshooting2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/in-your-stack2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/local-dev-environment2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/metrics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/apply-planned-migrations-cli2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/apply-planned-migrations-service2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/auto-inferred2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/failed-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-deletion-protected2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-externally-managed2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/lifecycle-fully-managed2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/migration-types2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/modes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/plan-format2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/migrate/planned-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moose-cli2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/moosedev-mcp2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/apply-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/db-pull2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/external-tables2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/indexes2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/insert-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-materialized-view2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-table2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/model-view2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/planned-migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/read-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-optimization2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/schema-versioning2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/supported-types2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/olap/ttl2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/quickstart2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/reference/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/connect-cdc2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/consumer-functions2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/create-stream2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/dead-letter-queues2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/from-your-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/schema-registry2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/sync-to-table2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/streaming/transform-functions2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/cancel-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/define-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/index2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/retries-and-timeouts2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/schedule-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/moosestack/workflows/trigger-workflow2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/automated-reports/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/going-to-production/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/in-app-chat-analytics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/existing-oltp-db/1-setup-connection2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/applications/performant-dashboards/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture/debezium-dev-to-prod-outline2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/change-data-capture/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/impact-analysis/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-management/migrations/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/connectors/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/customer-data-platform/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/operational-analytics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/pipelines/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/data-warehousing/startup-metrics/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/data-as-code/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/methodology/dora-for-data/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/ai-enablement/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/data-foundation/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/olap-evaluation/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides/strategy/platform-engineering/guide-overview2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/guides2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com2025-12-01T18:10:45.729Zdaily0.7 +https://docs.fiveonefour.com/templates2025-12-01T18:10:45.729Zdaily0.7 \ No newline at end of file diff --git a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx index 75b1a7621a..d8b2bf90c8 100644 --- a/apps/framework-docs-v2/src/components/mdx/server-figure.tsx +++ b/apps/framework-docs-v2/src/components/mdx/server-figure.tsx @@ -77,14 +77,20 @@ export function ServerFigure({ // Extract filename from figcaption (title from markdown) let figcaptionTitle: string | undefined; if (figcaption !== null) { - const figcaptionProps = figcaption.props as Record; + const figcaptionProps = (figcaption as React.ReactElement).props as Record< + string, + unknown + >; figcaptionTitle = extractTextFromNode( figcaptionProps.children as React.ReactNode, ).trim(); } const preProps = - preElement ? (preElement.props as Record) || {} : {}; + preElement ? + ((preElement as React.ReactElement).props as Record) || + {} + : {}; // Prioritize figcaption title (from markdown title="...") over any existing attributes const filename = diff --git a/apps/framework-docs-v2/src/config/navigation.ts b/apps/framework-docs-v2/src/config/navigation.ts index f674818b2b..bb7c2bffa9 100644 --- a/apps/framework-docs-v2/src/config/navigation.ts +++ b/apps/framework-docs-v2/src/config/navigation.ts @@ -972,6 +972,15 @@ const guidesNavigationConfig: NavigationConfig = [ title: "Change Data Capture", icon: IconBolt, languages: ["typescript", "python"], + children: [ + { + type: "page", + slug: "guides/data-management/change-data-capture/debezium-dev-to-prod-outline", + title: "Debezium", + icon: IconDatabase, + languages: ["typescript", "python"], + }, + ], }, ], }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e7726b6930..432c2855a2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -15593,7 +15593,7 @@ snapshots: eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 eslint-import-resolver-typescript: 3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1) - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) eslint-plugin-jsx-a11y: 6.10.2(eslint@8.57.1) eslint-plugin-react: 7.37.4(eslint@8.57.1) eslint-plugin-react-hooks: 5.0.0-canary-7118f5dd7-20230705(eslint@8.57.1) @@ -15653,7 +15653,7 @@ snapshots: tinyglobby: 0.2.15 unrs-resolver: 1.11.1 optionalDependencies: - eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1) + eslint-plugin-import: 2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1) transitivePeerDependencies: - supports-color @@ -15683,7 +15683,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-module-utils@2.12.1(@typescript-eslint/parser@8.46.3(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.1(jiti@2.6.1)): + eslint-module-utils@2.12.1(@typescript-eslint/parser@8.46.3(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.1(jiti@2.6.1)))(eslint@9.39.1(jiti@2.6.1)): dependencies: debug: 3.2.7 optionalDependencies: @@ -15694,7 +15694,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-plugin-import@2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1)(eslint@8.57.1): + eslint-plugin-import@2.32.0(@typescript-eslint/parser@5.62.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.9 @@ -15734,7 +15734,7 @@ snapshots: doctrine: 2.1.0 eslint: 9.39.1(jiti@2.6.1) eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.46.3(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1)(eslint@9.39.1(jiti@2.6.1)) + eslint-module-utils: 2.12.1(@typescript-eslint/parser@8.46.3(eslint@9.39.1(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.10.1(eslint-plugin-import@2.32.0)(eslint@9.39.1(jiti@2.6.1)))(eslint@9.39.1(jiti@2.6.1)) hasown: 2.0.2 is-core-module: 2.16.1 is-glob: 4.0.3 From 7c5fd095b9ac2345a5294d4a921e6132d12014e4 Mon Sep 17 00:00:00 2001 From: Olivia Kane Date: Mon, 1 Dec 2025 13:17:59 -0500 Subject: [PATCH 58/59] intro message ifx --- .../change-data-capture/debezium-dev-to-prod-outline.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx index 7aaa4e47b7..27583ea855 100644 --- a/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx +++ b/apps/framework-docs-v2/content/guides/data-management/change-data-capture/debezium-dev-to-prod-outline.mdx @@ -7,7 +7,7 @@ import { FileTree } from "@/components/mdx"; # Stream Data from Postgres with Debezium -This guide shows you how to use the **Debezium CDC Template** with your own application. You will learn how to connect the pipeline to your PostgreSQL database and send your tables to ClickHouse for real-time analytics. +This guide shows you how to use the [**Debezium CDC Template**](https://github.com/514-labs/debezium-cdc). You will learn how to set up the Debezium connector with your PostgreSQL database and mirror your data into ClickHouse in real-time. ## Architecture Overview @@ -52,10 +52,10 @@ Here are the key files in the template you should know about: ## Step 0: Clone the Template -Make sure you clone the [Debezium CDC Template](https://github.com/514labs/debezium-cdc) and install the dependencies: +Make sure you clone the [Debezium CDC Template](https://github.com/514-labs/debezium-cdc) and install the dependencies: ```bash -git clone https://github.com/514labs/debezium-cdc.git +git clone https://github.com/514-labs/debezium-cdc.git cd debezium-cdc pnpm install ``` From ccf58270b6f765969493a3581aa7787b2c95cbd2 Mon Sep 17 00:00:00 2001 From: Olivia Kane Date: Tue, 2 Dec 2025 12:17:19 -0500 Subject: [PATCH 59/59] nav reorg and content model --- .../moosestack/data-types/aggregates.mdx | 114 +++ .../content/moosestack/data-types/arrays.mdx | 82 ++ .../moosestack/data-types/booleans.mdx | 41 + .../moosestack/data-types/datetime.mdx | 138 +++ .../moosestack/data-types/decimals.mdx | 82 ++ .../content/moosestack/data-types/enums.mdx | 98 ++ .../content/moosestack/data-types/floats.mdx | 80 ++ .../moosestack/data-types/geometry.mdx | 172 ++++ .../content/moosestack/data-types/index.mdx | 58 ++ .../moosestack/data-types/integers.mdx | 97 ++ .../content/moosestack/data-types/json.mdx | 197 ++++ .../content/moosestack/data-types/maps.mdx | 54 ++ .../content/moosestack/data-types/nested.mdx | 96 ++ .../content/moosestack/data-types/network.mdx | 74 ++ .../moosestack/data-types/nullable.mdx | 98 ++ .../content/moosestack/data-types/strings.mdx | 97 ++ .../content/moosestack/data-types/tuples.mdx | 81 ++ .../engines/aggregating-merge-tree.mdx | 211 +++++ .../content/moosestack/engines/index.mdx | 96 ++ .../content/moosestack/engines/merge-tree.mdx | 125 +++ .../engines/replacing-merge-tree.mdx | 183 ++++ .../content/moosestack/engines/replicated.mdx | 208 +++++ .../moosestack/engines/summing-merge-tree.mdx | 118 +++ .../moosestack/olap/supported-types.mdx | 862 ------------------ .../guides-specs/GUIDE-CONTENT-MODEL.md | 614 +++++++++++++ .../guides-specs/content-model.ts | 235 +++++ .../guides-specs/debezium-guide-example.mdx | 384 ++++++++ apps/framework-docs-v2/guides-specs/index.ts | 7 + .../src/config/navigation.ts | 161 +++- 29 files changed, 3999 insertions(+), 864 deletions(-) create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/aggregates.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/arrays.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/booleans.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/datetime.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/decimals.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/enums.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/floats.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/geometry.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/index.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/integers.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/json.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/maps.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/nested.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/network.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/nullable.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/strings.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/data-types/tuples.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/aggregating-merge-tree.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/index.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/merge-tree.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/replacing-merge-tree.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/replicated.mdx create mode 100644 apps/framework-docs-v2/content/moosestack/engines/summing-merge-tree.mdx delete mode 100644 apps/framework-docs-v2/content/moosestack/olap/supported-types.mdx create mode 100644 apps/framework-docs-v2/guides-specs/GUIDE-CONTENT-MODEL.md create mode 100644 apps/framework-docs-v2/guides-specs/content-model.ts create mode 100644 apps/framework-docs-v2/guides-specs/debezium-guide-example.mdx create mode 100644 apps/framework-docs-v2/guides-specs/index.ts diff --git a/apps/framework-docs-v2/content/moosestack/data-types/aggregates.mdx b/apps/framework-docs-v2/content/moosestack/data-types/aggregates.mdx new file mode 100644 index 0000000000..0426bbae46 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/aggregates.mdx @@ -0,0 +1,114 @@ +--- +title: Aggregate Types +description: SimpleAggregateFunction for pre-aggregated data in Moose +order: 17 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Aggregate Types + +`SimpleAggregateFunction` stores pre-aggregated values that ClickHouse automatically merges when combining rows with the same primary key. Use with `AggregatingMergeTree` tables. + +## SimpleAggregateFunction + +Define columns that automatically aggregate during merges: + + + +```typescript +import { + SimpleAggregated, + OlapTable, + ClickHouseEngines, + Key, + DateTime +} from "@514labs/moose-lib"; + +interface DailyStats { + date: DateTime; + userId: string; + totalViews: number & SimpleAggregated<"sum", number>; + maxScore: number & SimpleAggregated<"max", number>; + minLatency: number & SimpleAggregated<"min", number>; + lastSeen: DateTime & SimpleAggregated<"anyLast", DateTime>; +} + +const statsTable = new OlapTable("daily_stats", { + engine: ClickHouseEngines.AggregatingMergeTree, + orderByFields: ["date", "userId"], +}); +``` + + +```python +from moose_lib import ( + simple_aggregated, + Key, + OlapTable, + OlapConfig, + AggregatingMergeTreeEngine +) +from pydantic import BaseModel +from datetime import datetime + +class DailyStats(BaseModel): + date: datetime + user_id: str + total_views: simple_aggregated('sum', int) + max_score: simple_aggregated('max', float) + min_latency: simple_aggregated('min', float) + last_seen: simple_aggregated('anyLast', datetime) + +stats_table = OlapTable[DailyStats]( + "daily_stats", + OlapConfig( + engine=AggregatingMergeTreeEngine(), + order_by_fields=["date", "user_id"] + ) +) +``` + + + +## Supported Aggregate Functions + +| Function | Description | Typical Use | +|----------|-------------|-------------| +| `sum` | Sum of values | Totals, counts | +| `max` | Maximum value | Peak metrics | +| `min` | Minimum value | Minimum thresholds | +| `any` | Any single value | Non-deterministic pick | +| `anyLast` | Last inserted value | Latest timestamp, status | +| `argMax` | Value at max of another column | Value when metric was highest | +| `argMin` | Value at min of another column | Value when metric was lowest | + +## How It Works + +1. **Insert**: Data is written with initial aggregate values +2. **Background Merge**: ClickHouse periodically merges rows with the same ORDER BY key +3. **Automatic Aggregation**: `SimpleAggregateFunction` columns are combined using their specified function + +``` +// Two rows with same (date, userId): +{ date: "2025-01-01", userId: "u1", totalViews: 10 } +{ date: "2025-01-01", userId: "u1", totalViews: 15 } + +// After merge: +{ date: "2025-01-01", userId: "u1", totalViews: 25 } // sum +``` + + +Use when you need real-time aggregations on high-volume data. Instead of storing every event and aggregating at query time, pre-aggregate during ingestion for faster queries. + + + +`SimpleAggregateFunction` only works with `AggregatingMergeTree` or `ReplicatedAggregatingMergeTree` table engines. The table's ORDER BY clause defines which rows get merged. + + +## See Also + +- [ClickHouse SimpleAggregateFunction docs](https://clickhouse.com/docs/en/sql-reference/data-types/simpleaggregatefunction) +- [Table Engines](/moosestack/engines) for configuring `AggregatingMergeTree` + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/arrays.mdx b/apps/framework-docs-v2/content/moosestack/data-types/arrays.mdx new file mode 100644 index 0000000000..58b61911ef --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/arrays.mdx @@ -0,0 +1,82 @@ +--- +title: Array Types +description: Array column types in Moose +order: 10 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Array Types + +Arrays store ordered collections of elements of the same type. + +## Basic Arrays + + + +```typescript +interface User { + tags: string[]; // Array(String) + scores: number[]; // Array(Float64) + ids: Int32[]; // Array(Int32) + flags: boolean[]; // Array(Boolean) +} +``` + + +```python +from typing import List + +class User(BaseModel): + tags: List[str] # Array(String) + scores: List[float] # Array(Float64) + ids: List[int] # Array(Int64) + flags: List[bool] # Array(Boolean) +``` + + + +## Arrays of Complex Types + +Arrays can contain complex types like JSON or tuples. + + + +```typescript +import { ClickHouseNamedTuple } from "@514labs/moose-lib"; + +interface Event { + metadata: Record[]; // Array(Json) + points: { + x: number; + y: number; + }[]; // Array of nested objects +} +``` + + +```python +from typing import List, Dict, Any, Tuple + +class Event(BaseModel): + metadata: List[Dict[str, Any]] # Array(Json) + points: List[Tuple[float, float]] # Array(Tuple) +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `Array(String)` | `string[]` | `List[str]` | +| `Array(Float64)` | `number[]` | `List[float]` | +| `Array(Int32)` | `Int32[]` | `List[int]` | +| `Array(Boolean)` | `boolean[]` | `List[bool]` | +| `Array(Json)` | `Record[]` | `List[Dict[str, Any]]` | + + +You can nest arrays to create multi-dimensional structures: `number[][]` maps to `Array(Array(Float64))`. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/booleans.mdx b/apps/framework-docs-v2/content/moosestack/data-types/booleans.mdx new file mode 100644 index 0000000000..1c665849a4 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/booleans.mdx @@ -0,0 +1,41 @@ +--- +title: Boolean Type +description: Boolean column type in Moose +order: 6 +category: reference +--- + +import { LanguageTabs, LanguageTabContent } from "@/components/mdx"; + +# Boolean Type + +Booleans represent true/false values. In ClickHouse, they're stored as `UInt8` (0 or 1) but typed as `Boolean` for clarity. + +## Usage + + + +```typescript +interface User { + is_active: boolean; + verified: boolean; + has_subscription: boolean; +} +``` + + +```python +class User(BaseModel): + is_active: bool + verified: bool + has_subscription: bool +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `Boolean` | `boolean` | `bool` | + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/datetime.mdx b/apps/framework-docs-v2/content/moosestack/data-types/datetime.mdx new file mode 100644 index 0000000000..97d1d50d35 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/datetime.mdx @@ -0,0 +1,138 @@ +--- +title: Date & Time Types +description: Date, DateTime, and DateTime64 column types in Moose +order: 7 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Date & Time Types + +ClickHouse provides several date and time types with varying precision levels. + +## Date + +Stores a calendar date (year, month, day) without time. + + + +```typescript +interface User { + birth_date: Date; // Date +} +``` + + +```python +from datetime import date + +class User(BaseModel): + birth_date: date # Date +``` + + + +## DateTime + +Stores date and time with second precision. + + + +```typescript +import { DateTime } from "@514labs/moose-lib"; + +interface Event { + created_at: DateTime; // DateTime (second precision) +} +``` + + +```python +from datetime import datetime + +class Event(BaseModel): + created_at: datetime # DateTime +``` + + + +## DateTime64 + +Stores date and time with sub-second precision. The precision parameter `P` specifies decimal places (0-9). + + + +```typescript +import { DateTime64, ClickHousePrecision } from "@514labs/moose-lib"; + +interface Event { + logged_at: DateTime64<3>; // DateTime64(3) — milliseconds + measured_at: DateTime64<6>; // DateTime64(6) — microseconds + precise_at: DateTime64<9>; // DateTime64(9) — nanoseconds +} + +// Verbose syntax alternative: +interface EventVerbose { + logged_at: Date & ClickHousePrecision<3>; +} +``` + + +```python +from moose_lib import clickhouse_datetime64 + +class Event(BaseModel): + logged_at: clickhouse_datetime64(3) # DateTime64(3) — milliseconds + measured_at: clickhouse_datetime64(6) # DateTime64(6) — microseconds + precise_at: clickhouse_datetime64(9) # DateTime64(9) — nanoseconds +``` + + + +## Date16 + +Compact date format for storage optimization. + + + +```typescript +interface CompactRecord { + record_date: Date; // Date16 +} +``` + + +```python +from typing import Annotated +from datetime import date +from moose_lib import ClickhouseSize + +class CompactRecord(BaseModel): + record_date: Annotated[date, ClickhouseSize(2)] # Date16 +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript Helper | TypeScript Verbose | Python | +|-----------------|-------------------|-------------------|--------| +| `Date` | `Date` | `Date` | `date` | +| `Date16` | `Date` | `Date` | `Annotated[date, ClickhouseSize(2)]` | +| `DateTime` | `DateTime` | `Date` | `datetime` | +| `DateTime64(P)` | `DateTime64

    ` | `Date & ClickHousePrecision

    ` | `clickhouse_datetime64(P)` | + +## Precision Guide + +| Precision | Resolution | Use Case | +|-----------|------------|----------| +| 0 | Seconds | General timestamps | +| 3 | Milliseconds | Web events, logs | +| 6 | Microseconds | High-frequency trading, metrics | +| 9 | Nanoseconds | Scientific measurements | + + +Use `DateTime` (second precision) for most use cases. Only use `DateTime64` with higher precision when sub-second accuracy is required—it uses more storage. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/decimals.mdx b/apps/framework-docs-v2/content/moosestack/data-types/decimals.mdx new file mode 100644 index 0000000000..adfc45a8a2 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/decimals.mdx @@ -0,0 +1,82 @@ +--- +title: Decimal Types +description: Fixed-point decimal column types for precise numeric values in Moose +order: 5 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Decimal Types + +Decimals provide fixed-point precision, ideal for financial calculations where floating-point errors are unacceptable. + +## Decimal(P, S) + +A decimal number with `P` total digits (precision) and `S` digits after the decimal point (scale). + + + +```typescript +import { Decimal } from "@514labs/moose-lib"; + +interface FinancialData { + amount: Decimal<10, 2>; // Decimal(10,2) — up to 99999999.99 + rate: Decimal<5, 4>; // Decimal(5,4) — up to 9.9999 + fee: Decimal<8, 3>; // Decimal(8,3) — up to 99999.999 +} +``` + +You can also use the verbose syntax: + +```typescript +import { ClickHouseDecimal } from "@514labs/moose-lib"; + +interface FinancialDataVerbose { + amount: string & ClickHouseDecimal<10, 2>; + rate: string & ClickHouseDecimal<5, 4>; +} +``` + + +```python +from moose_lib import clickhouse_decimal + +class FinancialData(BaseModel): + amount: clickhouse_decimal(10, 2) # Decimal(10,2) + rate: clickhouse_decimal(5, 4) # Decimal(5,4) + fee: clickhouse_decimal(8, 3) # Decimal(8,3) +``` + + + +## Common Precision Patterns + +| Use Case | Precision | Scale | Example Type | +|----------|-----------|-------|--------------| +| Currency (cents) | 10 | 2 | `Decimal<10, 2>` | +| Currency (4 decimals) | 12 | 4 | `Decimal<12, 4>` | +| Interest rates | 5 | 4 | `Decimal<5, 4>` | +| Percentages | 5 | 2 | `Decimal<5, 2>` | +| Scientific | 18 | 9 | `Decimal<18, 9>` | + +## Type Mapping Reference + +| ClickHouse Type | TypeScript Helper | TypeScript Verbose | Python | +|-----------------|-------------------|-------------------|--------| +| `Decimal(P,S)` | `Decimal` | `string & ClickHouseDecimal` | `clickhouse_decimal(P, S)` | + + +Use `Decimal` for monetary values, rates, and any calculation where exact decimal representation matters. Floating-point types (`Float32`/`Float64`) can introduce rounding errors in these scenarios. + + + +ClickHouse stores Decimals based on precision: +- P ≤ 9: 4 bytes (Decimal32) +- P ≤ 18: 8 bytes (Decimal64) +- P ≤ 38: 16 bytes (Decimal128) +- P ≤ 76: 32 bytes (Decimal256) + +Choose the smallest precision that fits your needs. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/enums.mdx b/apps/framework-docs-v2/content/moosestack/data-types/enums.mdx new file mode 100644 index 0000000000..1afca00762 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/enums.mdx @@ -0,0 +1,98 @@ +--- +title: Enum Types +description: Enum column types for categorical values in Moose +order: 14 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Enum Types + +Enums map to ClickHouse enums, storing categorical values efficiently. + +## String Enums + + + +```typescript +enum UserRole { + ADMIN = "admin", + USER = "user", + GUEST = "guest" +} + +enum Status { + PENDING = "pending", + ACTIVE = "active", + INACTIVE = "inactive" +} + +interface User { + role: UserRole; + status: Status; +} +``` + + +```python +from enum import Enum + +class UserRole(str, Enum): + ADMIN = "admin" + USER = "user" + GUEST = "guest" + +class Status(str, Enum): + PENDING = "pending" + ACTIVE = "active" + INACTIVE = "inactive" + +class User(BaseModel): + role: UserRole + status: Status +``` + + + +## Numeric Enums + + + +```typescript +enum Priority { + LOW = 1, + MEDIUM = 2, + HIGH = 3, + CRITICAL = 4 +} + +interface Task { + priority: Priority; +} +``` + + +```python +from enum import IntEnum + +class Priority(IntEnum): + LOW = 1 + MEDIUM = 2 + HIGH = 3 + CRITICAL = 4 + +class Task(BaseModel): + priority: Priority +``` + + + + +Enums are stored as integers internally, making them more storage-efficient than strings while maintaining readability in queries. + + + +Adding new enum values is safe, but removing or renaming values requires careful migration planning as it affects existing data. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/floats.mdx b/apps/framework-docs-v2/content/moosestack/data-types/floats.mdx new file mode 100644 index 0000000000..cc95eaf870 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/floats.mdx @@ -0,0 +1,80 @@ +--- +title: Floating Point Types +description: Float32 and Float64 column types in Moose +order: 4 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Floating Point Types + +ClickHouse supports IEEE 754 floating-point numbers in 32-bit and 64-bit precision. + +## Float64 (Default) + +64-bit double-precision floating point. This is the default for `number` in TypeScript and `float` in Python. + + + +```typescript +import { Float64 } from "@514labs/moose-lib"; + +interface SensorData { + temperature: Float64; // Float64 (explicit) + humidity: number; // Float64 (default for number) +} +``` + + +```python +class SensorData(BaseModel): + temperature: float # Float64 + humidity: float # Float64 +``` + + + +## Float32 + +32-bit single-precision floating point. Use when storage is critical and reduced precision is acceptable. + + + +```typescript +import { Float32 } from "@514labs/moose-lib"; +import { tags } from "typia"; + +interface Coordinates { + latitude: Float32; // Float32 (helper) + longitude: number & tags.Type<"float">; // Float32 (typia tag) +} +``` + + +```python +from typing import Annotated +from moose_lib import ClickhouseSize + +class Coordinates(BaseModel): + latitude: Annotated[float, ClickhouseSize(4)] # Float32 + longitude: Annotated[float, ClickhouseSize(4)] # Float32 +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript Helper | TypeScript Verbose | Python | +|-----------------|-------------------|-------------------|--------| +| `Float32` | `Float32` | `number & tags.Type<"float">` | `Annotated[float, ClickhouseSize(4)]` | +| `Float64` | `Float64` or `number` | `number` | `float` | + + +Prefer `Float64` for most calculations—it's the default and provides better precision. Only use `Float32` when storage savings are critical and you can tolerate ~7 digits of precision instead of ~15. + + + +Floating-point numbers have inherent precision limitations. For financial calculations or when exact decimal representation is required, use [Decimal types](/moosestack/data-types/decimals) instead. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/geometry.mdx b/apps/framework-docs-v2/content/moosestack/data-types/geometry.mdx new file mode 100644 index 0000000000..71971a2e6d --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/geometry.mdx @@ -0,0 +1,172 @@ +--- +title: Geometry Types +description: Point, Ring, Polygon, and other geometry column types in Moose +order: 9 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Geometry Types + +ClickHouse provides geometry types for spatial data. Moose exposes type-safe helpers that map to the correct ClickHouse representations. + +## Point + +A single coordinate pair `[x, y]`. + + + +```typescript +import { ClickHousePoint } from "@514labs/moose-lib"; + +interface Location { + coordinates: ClickHousePoint; // Point → [number, number] +} +``` + + +```python +from moose_lib import Point + +class Location(BaseModel): + coordinates: Point # tuple[float, float] +``` + + + +## Ring + +A closed sequence of points forming a boundary (first and last points are the same). + + + +```typescript +import { ClickHouseRing } from "@514labs/moose-lib"; + +interface Boundary { + outline: ClickHouseRing; // Ring → Array<[number, number]> +} +``` + + +```python +from moose_lib import Ring + +class Boundary(BaseModel): + outline: Ring # list[tuple[float, float]] +``` + + + +## LineString + +A sequence of connected points forming a line. + + + +```typescript +import { ClickHouseLineString } from "@514labs/moose-lib"; + +interface Route { + path: ClickHouseLineString; // LineString → Array<[number, number]> +} +``` + + +```python +from moose_lib import LineString + +class Route(BaseModel): + path: LineString # list[tuple[float, float]] +``` + + + +## MultiLineString + +Multiple line strings. + + + +```typescript +import { ClickHouseMultiLineString } from "@514labs/moose-lib"; + +interface Network { + roads: ClickHouseMultiLineString; // Array> +} +``` + + +```python +from moose_lib import MultiLineString + +class Network(BaseModel): + roads: MultiLineString # list[list[tuple[float, float]]] +``` + + + +## Polygon + +A shape defined by an outer ring and optional inner rings (holes). + + + +```typescript +import { ClickHousePolygon } from "@514labs/moose-lib"; + +interface Zone { + area: ClickHousePolygon; // Polygon → Array> +} +``` + + +```python +from moose_lib import Polygon + +class Zone(BaseModel): + area: Polygon # list[list[tuple[float, float]]] +``` + + + +## MultiPolygon + +Multiple polygons. + + + +```typescript +import { ClickHouseMultiPolygon } from "@514labs/moose-lib"; + +interface Region { + territories: ClickHouseMultiPolygon; // Array>> +} +``` + + +```python +from moose_lib import MultiPolygon + +class Region(BaseModel): + territories: MultiPolygon # list[list[list[tuple[float, float]]]] +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `Point` | `ClickHousePoint` | `Point` (tuple[float, float]) | +| `Ring` | `ClickHouseRing` | `Ring` (list[tuple[float, float]]) | +| `LineString` | `ClickHouseLineString` | `LineString` (list[tuple[float, float]]) | +| `MultiLineString` | `ClickHouseMultiLineString` | `MultiLineString` (list[list[...]]) | +| `Polygon` | `ClickHousePolygon` | `Polygon` (list[list[...]]) | +| `MultiPolygon` | `ClickHouseMultiPolygon` | `MultiPolygon` (list[list[list[...]]]) | + + +Coordinates are represented as `[x, y]` pairs. For geographic data, this typically means `[longitude, latitude]`. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/index.mdx b/apps/framework-docs-v2/content/moosestack/data-types/index.mdx new file mode 100644 index 0000000000..1a9167512f --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/index.mdx @@ -0,0 +1,58 @@ +--- +title: Data Types +description: Reference for ClickHouse column types supported in Moose +order: 1 +category: reference +--- + +import { Callout } from "@/components/mdx"; + +# Data Types + +Moose supports a comprehensive set of ClickHouse column types across both TypeScript and Python. This section covers all supported types, their syntax, and how they map to ClickHouse. + +## Type Categories + +### Basic Types +- [**Strings**](/moosestack/data-types/strings) — `String`, `LowCardinality(String)`, `UUID` +- [**Integers**](/moosestack/data-types/integers) — `Int8` to `Int64`, `UInt8` to `UInt64` +- [**Floats**](/moosestack/data-types/floats) — `Float32`, `Float64` +- [**Decimals**](/moosestack/data-types/decimals) — Fixed-point `Decimal(P,S)` +- [**Booleans**](/moosestack/data-types/booleans) — `Boolean` +- [**Date & Time**](/moosestack/data-types/datetime) — `Date`, `DateTime`, `DateTime64` +- [**Network**](/moosestack/data-types/network) — `IPv4`, `IPv6` + +### Complex Types +- [**Arrays**](/moosestack/data-types/arrays) — `Array(T)` +- [**Maps**](/moosestack/data-types/maps) — `Map(K, V)` +- [**Nested**](/moosestack/data-types/nested) — Embedded objects +- [**Tuples**](/moosestack/data-types/tuples) — Named tuples +- [**Enums**](/moosestack/data-types/enums) — Enumerated values +- [**Geometry**](/moosestack/data-types/geometry) — `Point`, `Ring`, `Polygon`, etc. + +### Special Types +- [**JSON**](/moosestack/data-types/json) — Dynamic and typed JSON columns +- [**Nullable**](/moosestack/data-types/nullable) — Optional fields +- [**Aggregates**](/moosestack/data-types/aggregates) — `SimpleAggregateFunction` for pre-aggregation + +## Quick Reference + +| Category | TypeScript | Python | +|----------|------------|--------| +| String | `string` | `str` | +| Integer | `Int32`, `UInt64`, etc. | `Annotated[int, "int32"]` | +| Float | `Float32`, `Float64`, `number` | `float` | +| Boolean | `boolean` | `bool` | +| Date | `Date`, `DateTime` | `date`, `datetime` | +| Array | `T[]` | `List[T]` | +| Map | `Record` | `Dict[K, V]` | +| Optional | `field?: T` | `Optional[T]` | + +## Best Practices + +- **Use specific integer types** when you know the value ranges to save storage +- **Prefer `Float64`** for most floating-point calculations unless storage is critical +- **Use `LowCardinality`** for string columns with repeated values (< 10,000 unique) +- **Choose appropriate DateTime precision** based on your accuracy needs +- **Order columns by cardinality** (low to high) for better compression + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/integers.mdx b/apps/framework-docs-v2/content/moosestack/data-types/integers.mdx new file mode 100644 index 0000000000..19b7cafe31 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/integers.mdx @@ -0,0 +1,97 @@ +--- +title: Integer Types +description: Signed and unsigned integer column types in Moose +order: 3 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Integer Types + +ClickHouse provides signed (`Int`) and unsigned (`UInt`) integers in various sizes. Choose the smallest type that fits your data range for optimal storage efficiency. + +## Signed Integers + + + +```typescript +import { Int8, Int16, Int32, Int64 } from "@514labs/moose-lib"; + +interface Metrics { + small_value: Int8; // Int8: -128 to 127 + medium_value: Int16; // Int16: -32,768 to 32,767 + user_id: Int32; // Int32: -2.1B to 2.1B + big_value: Int64; // Int64: -9.2E18 to 9.2E18 +} +``` + +You can also use the verbose syntax: + +```typescript +import { ClickHouseInt } from "@514labs/moose-lib"; + +interface MetricsVerbose { + user_id: number & ClickHouseInt<"int32">; + big_value: number & ClickHouseInt<"int64">; +} +``` + + +```python +from typing import Annotated + +class Metrics(BaseModel): + small_value: Annotated[int, "int8"] # Int8 + medium_value: Annotated[int, "int16"] # Int16 + user_id: Annotated[int, "int32"] # Int32 + big_value: Annotated[int, "int64"] # Int64 +``` + + + +## Unsigned Integers + + + +```typescript +import { UInt8, UInt16, UInt32, UInt64 } from "@514labs/moose-lib"; + +interface Counters { + flags: UInt8; // UInt8: 0 to 255 + port: UInt16; // UInt16: 0 to 65,535 + count: UInt32; // UInt32: 0 to 4.2B + total: UInt64; // UInt64: 0 to 18.4E18 +} +``` + + +```python +from typing import Annotated + +class Counters(BaseModel): + flags: Annotated[int, "uint8"] # UInt8 + port: Annotated[int, "uint16"] # UInt16 + count: Annotated[int, "uint32"] # UInt32 + total: Annotated[int, "uint64"] # UInt64 +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript Helper | TypeScript Verbose | Python | Range | +|-----------------|-------------------|-------------------|--------|-------| +| `Int8` | `Int8` | `number & ClickHouseInt<"int8">` | `Annotated[int, "int8"]` | -128 to 127 | +| `Int16` | `Int16` | `number & ClickHouseInt<"int16">` | `Annotated[int, "int16"]` | -32,768 to 32,767 | +| `Int32` | `Int32` | `number & ClickHouseInt<"int32">` | `Annotated[int, "int32"]` | -2.1B to 2.1B | +| `Int64` | `Int64` | `number & ClickHouseInt<"int64">` | `Annotated[int, "int64"]` | ±9.2×10¹⁸ | +| `UInt8` | `UInt8` | `number & ClickHouseInt<"uint8">` | `Annotated[int, "uint8"]` | 0 to 255 | +| `UInt16` | `UInt16` | `number & ClickHouseInt<"uint16">` | `Annotated[int, "uint16"]` | 0 to 65,535 | +| `UInt32` | `UInt32` | `number & ClickHouseInt<"uint32">` | `Annotated[int, "uint32"]` | 0 to 4.2B | +| `UInt64` | `UInt64` | `number & ClickHouseInt<"uint64">` | `Annotated[int, "uint64"]` | 0 to 18.4×10¹⁸ | + + +Use the smallest integer type that fits your data. For example, use `UInt8` for values 0-255 (like status codes), `UInt16` for ports, and `Int32` for most IDs. This reduces storage and improves cache efficiency. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/json.mdx b/apps/framework-docs-v2/content/moosestack/data-types/json.mdx new file mode 100644 index 0000000000..a5c5470c54 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/json.mdx @@ -0,0 +1,197 @@ +--- +title: JSON Type +description: Dynamic and typed JSON column types in Moose +order: 15 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# JSON Type + +The JSON type stores arbitrary JSON data, supporting both unstructured and typed configurations for performance and type safety. + +## Basic JSON (Unstructured) + +For completely dynamic JSON data without a fixed schema: + + + +```typescript +interface Event { + metadata: Record; // Basic JSON - accepts any structure + config: any; // Basic JSON - fully dynamic +} +``` + + +```python +from typing import Any, Dict + +class Event(BaseModel): + metadata: Dict[str, Any] # Basic JSON - accepts any structure + config: Any # Basic JSON - fully dynamic +``` + + + +## Typed JSON with ClickHouseJson + +For better performance and validation, define typed fields within your JSON using `ClickHouseJson`. This creates a ClickHouse `JSON` column with explicit type hints for specific paths while still allowing additional dynamic fields. + + + +```typescript +import { ClickHouseJson } from "@514labs/moose-lib"; + +// Define the structure for your JSON payload +interface PayloadStructure { + name: string; + count: number; + timestamp?: Date; +} + +interface Event { + id: string; + // JSON with typed paths - better performance, allows extra fields + payload: PayloadStructure & ClickHouseJson; +} +``` + + +```python +from typing import Annotated, Optional +from pydantic import BaseModel, ConfigDict +from moose_lib.data_models import ClickHouseJson +from datetime import datetime + +# Define the structure for your JSON payload +class PayloadStructure(BaseModel): + model_config = ConfigDict(extra='allow') # Required for JSON types + name: str + count: int + timestamp: Optional[datetime] = None + +class Event(BaseModel): + id: str + # JSON with typed paths - better performance, allows extra fields + payload: Annotated[PayloadStructure, ClickHouseJson()] +``` + + + +## Performance Tuning Options + +Configure `ClickHouseJson` with options to control resource usage: + + + +```typescript +import { ClickHouseJson, Key, DateTime } from "@514labs/moose-lib"; + +interface ProductProperties { + category: string; + price: number; + inStock: boolean; +} + +interface ProductEvent { + eventId: Key; + timestamp: DateTime; + properties: ProductProperties & ClickHouseJson< + 128, // max_dynamic_paths: limit tracked paths + 8, // max_dynamic_types: limit type variations + ["_internal"], // skip_paths: exclude specific paths + ["^debug_"] // skip_regexps: exclude paths matching regex + >; +} +``` + + +```python +from typing import Annotated, Optional +from pydantic import BaseModel, ConfigDict +from moose_lib import Key, ClickHouseJson +from datetime import datetime + +class ProductProperties(BaseModel): + model_config = ConfigDict(extra='allow') + category: str + price: float + in_stock: bool + +class ProductEvent(BaseModel): + event_id: Key[str] + timestamp: datetime + properties: Annotated[ProductProperties, ClickHouseJson( + max_dynamic_paths=128, # Limit tracked paths + max_dynamic_types=8, # Limit type variations + skip_paths=("_internal",), # Exclude specific paths + skip_regexes=(r"^debug_",) # Exclude paths matching regex + )] +``` + + + +## Configuration Options + +| Option | Type | Description | +|--------|------|-------------| +| `max_dynamic_paths` | `number` | Maximum unique JSON paths to track. Controls memory for variable structures. | +| `max_dynamic_types` | `number` | Maximum type variations per path. Useful when paths may contain different types. | +| `skip_paths` | `string[]` | Exact JSON paths to ignore (e.g., `["temp", "debug.info"]`). | +| `skip_regexps` | `string[]` | Regex patterns for paths to exclude (e.g., `["^tmp\\.", ".*_internal$"]`). | + +## Example: Flexible Event Data + +With typed JSON, you get the best of both worlds—type safety for known fields and flexibility for dynamic data: + + + +```typescript +// This data is valid: +{ + "eventId": "evt_123", + "timestamp": "2025-10-22T12:00:00Z", + "properties": { + "category": "electronics", // Typed field ✓ + "price": 99.99, // Typed field ✓ + "inStock": true, // Typed field ✓ + "customTag": "holiday-sale", // Extra field - accepted ✓ + "brandId": 42, // Extra field - accepted ✓ + "_internal": "ignored" // Skipped by skip_paths ✓ + } +} +``` + + +```python +# This data is valid: +{ + "event_id": "evt_123", + "timestamp": "2025-10-22T12:00:00Z", + "properties": { + "category": "electronics", # Typed field ✓ + "price": 99.99, # Typed field ✓ + "in_stock": True, # Typed field ✓ + "custom_tag": "holiday-sale", # Extra field - accepted ✓ + "brand_id": 42, # Extra field - accepted ✓ + "_internal": "ignored" # Skipped by skip_paths ✓ + } +} +``` + + + +## Benefits of Typed JSON + +- **Better Performance**: ClickHouse optimizes storage and queries for known paths +- **Type Safety**: Validates that specified paths match expected types +- **Flexible Schema**: Allows additional fields beyond typed paths +- **Memory Control**: Configure limits to prevent unbounded resource usage + + +- **Basic JSON** (`any`, `Dict[str, Any]`): Use when JSON structure is completely unknown or rarely queried +- **Typed JSON** (`ClickHouseJson`): Use when you have known fields that need indexing/querying, but want to allow additional dynamic fields + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/maps.mdx b/apps/framework-docs-v2/content/moosestack/data-types/maps.mdx new file mode 100644 index 0000000000..12ea746c10 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/maps.mdx @@ -0,0 +1,54 @@ +--- +title: Map Types +description: Map column types for key-value pairs in Moose +order: 11 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Map Types + +Maps store key-value pairs with specified key and value types. + +## Usage + + + +```typescript +interface User { + preferences: Record; // Map(String, String) + metrics: Record; // Map(String, Float64) + settings: Record; // Map(String, Boolean) +} +``` + + +```python +from typing import Dict + +class User(BaseModel): + preferences: Dict[str, str] # Map(String, String) + metrics: Dict[str, float] # Map(String, Float64) + settings: Dict[str, bool] # Map(String, Boolean) +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `Map(String, String)` | `Record` | `Dict[str, str]` | +| `Map(String, Float64)` | `Record` | `Dict[str, float]` | +| `Map(String, Int64)` | `Record` | `Dict[str, int]` | +| `Map(String, Boolean)` | `Record` | `Dict[str, bool]` | + + +Map keys must be `String`, `Int*`, or `UInt*` types. String keys are most common. + + + +Use `Map` when all values have the same type and you need efficient key lookups. Use [JSON](/moosestack/data-types/json) when values have mixed types or the structure is highly dynamic. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/nested.mdx b/apps/framework-docs-v2/content/moosestack/data-types/nested.mdx new file mode 100644 index 0000000000..bef9c2f982 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/nested.mdx @@ -0,0 +1,96 @@ +--- +title: Nested Types +description: Nested object types for embedded structures in Moose +order: 12 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Nested Types + +Nested types allow embedding complex objects within table rows. + +## Usage + +Define a separate model and use it as a field type. + + + +```typescript +interface Address { + street: string; + city: string; + zip: string; + country: string; +} + +interface User { + name: string; + email: string; + address: Address; // Nested type +} +``` + + +```python +class Address(BaseModel): + street: str + city: str + zip: str + country: str + +class User(BaseModel): + name: str + email: str + address: Address # Nested type +``` + + + +## Multiple Levels + +Nested types can contain other nested types. + + + +```typescript +interface Coordinates { + lat: number; + lng: number; +} + +interface Address { + street: string; + city: string; + location: Coordinates; // Nested within nested +} + +interface User { + name: string; + address: Address; +} +``` + + +```python +class Coordinates(BaseModel): + lat: float + lng: float + +class Address(BaseModel): + street: str + city: str + location: Coordinates # Nested within nested + +class User(BaseModel): + name: str + address: Address +``` + + + + +Use nested types when the structure is known and fixed. Use [JSON](/moosestack/data-types/json) for dynamic or variable structures. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/network.mdx b/apps/framework-docs-v2/content/moosestack/data-types/network.mdx new file mode 100644 index 0000000000..ada1ca3d5d --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/network.mdx @@ -0,0 +1,74 @@ +--- +title: Network Types +description: IPv4 and IPv6 column types in Moose +order: 8 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Network Types + +ClickHouse provides native IP address types for efficient storage and querying. + +## IPv4 + +Stores IPv4 addresses in 4 bytes. + + + +```typescript +import { tags } from "typia"; + +interface NetworkEvent { + source_ip: string & tags.Format<"ipv4">; // IPv4 + client_ip: string & tags.Format<"ipv4">; // IPv4 +} +``` + + +```python +from ipaddress import IPv4Address + +class NetworkEvent(BaseModel): + source_ip: IPv4Address # IPv4 + client_ip: IPv4Address # IPv4 +``` + + + +## IPv6 + +Stores IPv6 addresses in 16 bytes. Also handles IPv4-mapped IPv6 addresses. + + + +```typescript +import { tags } from "typia"; + +interface NetworkEvent { + dest_ip: string & tags.Format<"ipv6">; // IPv6 +} +``` + + +```python +from ipaddress import IPv6Address + +class NetworkEvent(BaseModel): + dest_ip: IPv6Address # IPv6 +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `IPv4` | `string & tags.Format<"ipv4">` | `IPv4Address` | +| `IPv6` | `string & tags.Format<"ipv6">` | `IPv6Address` | + + +Native IP types are more storage-efficient than strings and enable IP-specific functions like range queries, subnet matching, and CIDR operations directly in ClickHouse. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/nullable.mdx b/apps/framework-docs-v2/content/moosestack/data-types/nullable.mdx new file mode 100644 index 0000000000..312740dd3c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/nullable.mdx @@ -0,0 +1,98 @@ +--- +title: Nullable Types +description: Optional and nullable column types in Moose +order: 16 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Nullable Types + +All types support nullable variants using optional syntax. Nullable columns can store `NULL` values. + +## Basic Optional Fields + + + +```typescript +interface User { + name: string; // Required - NOT NULL + email?: string; // Nullable(String) + age?: number; // Nullable(Float64) + verified?: boolean; // Nullable(Boolean) +} +``` + + +```python +from typing import Optional + +class User(BaseModel): + name: str # Required - NOT NULL + email: Optional[str] = None # Nullable(String) + age: Optional[int] = None # Nullable(Int64) + verified: Optional[bool] = None # Nullable(Boolean) +``` + + + +## Optional with ClickHouse Defaults + +If a field is optional but you provide a ClickHouse default, Moose creates a non-nullable column with a `DEFAULT` clause instead of a `Nullable` column. + + + +```typescript +import { ClickHouseDefault, WithDefault } from "@514labs/moose-lib"; + +interface User { + name: string; + + // Optional without default → Nullable(Int64) + age?: number; + + // Optional with default → Int64 DEFAULT 18 (non-nullable) + minAge?: number & ClickHouseDefault<"18">; + + // Alternative helper syntax + status?: WithDefault; +} +``` + + +```python +from typing import Optional, Annotated +from moose_lib import clickhouse_default + +class User(BaseModel): + name: str + + # Optional without default → Nullable(Int64) + age: Optional[int] = None + + # Optional with default → Int64 DEFAULT 18 (non-nullable) + min_age: Annotated[Optional[int], clickhouse_default("18")] = None + + # String with default + status: Annotated[Optional[str], clickhouse_default("'active'")] = None +``` + + + +## Type Mapping Reference + +| Pattern | TypeScript | Python | ClickHouse Result | +|---------|------------|--------|-------------------| +| Required | `field: T` | `field: T` | `T NOT NULL` | +| Optional | `field?: T` | `Optional[T] = None` | `Nullable(T)` | +| Optional + Default | `field?: T & ClickHouseDefault<"val">` | `Annotated[Optional[T], clickhouse_default("val")]` | `T DEFAULT val` | + + +Nullable columns have a small storage overhead (1 extra byte per row for the null bitmap). When possible, use defaults instead of nulls for frequently-queried columns. + + + +ClickHouse defaults are SQL expressions. String defaults must include quotes: `"'active'"` not `"active"`. Numeric defaults are bare: `"18"` not `"'18'"`. + + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/strings.mdx b/apps/framework-docs-v2/content/moosestack/data-types/strings.mdx new file mode 100644 index 0000000000..61b4093fc6 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/strings.mdx @@ -0,0 +1,97 @@ +--- +title: String Types +description: String, LowCardinality, and UUID column types in Moose +order: 2 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# String Types + +ClickHouse provides several string-based types optimized for different use cases. + +## String + +Variable-length strings for general text data. + + + +```typescript +interface User { + name: string; // String + email: string; // String +} +``` + + +```python +class User(BaseModel): + name: str # String + email: str # String +``` + + + +## LowCardinality(String) + +Optimized storage for strings with many repeated values. Use when you have fewer than ~10,000 unique values. + + + +```typescript +import { LowCardinality } from "@514labs/moose-lib"; + +interface Event { + status: string & LowCardinality; // LowCardinality(String) + country: string & LowCardinality; // LowCardinality(String) +} +``` + + +```python +from typing import Annotated + +class Event(BaseModel): + status: Annotated[str, "LowCardinality"] # LowCardinality(String) + country: Annotated[str, "LowCardinality"] # LowCardinality(String) +``` + + + + +Use `LowCardinality` for columns like status codes, country codes, categories, or any string field with a limited set of repeated values. It significantly reduces storage and improves query performance. + + +## UUID + +Universally unique identifiers stored in native UUID format. + + + +```typescript +import { tags } from "typia"; + +interface User { + id: string & tags.Format<"uuid">; // UUID +} +``` + + +```python +from uuid import UUID + +class User(BaseModel): + id: UUID # UUID +``` + + + +## Type Mapping Reference + +| ClickHouse Type | TypeScript | Python | +|-----------------|------------|--------| +| `String` | `string` | `str` | +| `LowCardinality(String)` | `string & LowCardinality` | `Annotated[str, "LowCardinality"]` | +| `UUID` | `string & tags.Format<"uuid">` | `UUID` | + diff --git a/apps/framework-docs-v2/content/moosestack/data-types/tuples.mdx b/apps/framework-docs-v2/content/moosestack/data-types/tuples.mdx new file mode 100644 index 0000000000..62af333b4e --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/data-types/tuples.mdx @@ -0,0 +1,81 @@ +--- +title: Named Tuple Types +description: Named tuple column types for structured data in Moose +order: 13 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Named Tuple Types + +Named tuples provide structured data with named fields, similar to lightweight nested objects. + +## Usage + + + +```typescript +import { ClickHouseNamedTuple } from "@514labs/moose-lib"; + +interface Point { + x: number; + y: number; +} + +interface Shape { + center: Point & ClickHouseNamedTuple; // Named tuple + radius: number; +} +``` + + +```python +from typing import Annotated + +class Point(BaseModel): + x: float + y: float + +class Shape(BaseModel): + center: Annotated[Point, "ClickHouseNamedTuple"] # Named tuple + radius: float +``` + + + +## Arrays of Tuples + + + +```typescript +import { ClickHouseNamedTuple } from "@514labs/moose-lib"; + +interface Coordinate { + x: number; + y: number; +} + +interface Path { + points: (Coordinate & ClickHouseNamedTuple)[]; // Array(Tuple(x Float64, y Float64)) +} +``` + + +```python +from typing import List, Annotated + +class Coordinate(BaseModel): + x: float + y: float + +class Path(BaseModel): + points: List[Annotated[Coordinate, "ClickHouseNamedTuple"]] +``` + + + + +Named tuples are more lightweight than nested types and are stored inline. Use tuples for simple structures with a few fields; use [nested types](/moosestack/data-types/nested) for more complex hierarchies. + + diff --git a/apps/framework-docs-v2/content/moosestack/engines/aggregating-merge-tree.mdx b/apps/framework-docs-v2/content/moosestack/engines/aggregating-merge-tree.mdx new file mode 100644 index 0000000000..af273a364c --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/aggregating-merge-tree.mdx @@ -0,0 +1,211 @@ +--- +title: AggregatingMergeTree +description: ClickHouse table engine for pre-aggregated rollups +order: 4 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# AggregatingMergeTree + +`AggregatingMergeTree` stores pre-aggregated states that are automatically merged during background compaction. Use with `SimpleAggregateFunction` columns for real-time rollups. + +## When to Use + +- Real-time dashboards with high-cardinality dimensions +- Pre-computed metrics (daily totals, running counts) +- Reducing query-time computation on large datasets +- Time-series aggregations at multiple granularities + +## Usage + + + +```typescript +import { + OlapTable, + ClickHouseEngines, + SimpleAggregated, + DateTime +} from "@514labs/moose-lib"; + +interface DailyStats { + date: DateTime; + user_id: string; + total_views: number & SimpleAggregated<"sum", number>; + max_score: number & SimpleAggregated<"max", number>; + last_activity: DateTime & SimpleAggregated<"anyLast", DateTime>; +} + +const dailyStats = new OlapTable("daily_stats", { + engine: ClickHouseEngines.AggregatingMergeTree, + orderByFields: ["date", "user_id"] +}); +``` + + +```python +from moose_lib import OlapTable, OlapConfig, simple_aggregated +from moose_lib.blocks import AggregatingMergeTreeEngine +from pydantic import BaseModel +from datetime import datetime + +class DailyStats(BaseModel): + date: datetime + user_id: str + total_views: simple_aggregated('sum', int) + max_score: simple_aggregated('max', float) + last_activity: simple_aggregated('anyLast', datetime) + +daily_stats = OlapTable[DailyStats]("daily_stats", OlapConfig( + engine=AggregatingMergeTreeEngine(), + order_by_fields=["date", "user_id"] +)) +``` + + + +## How It Works + +1. **Insert**: Rows are written with initial aggregate values +2. **Background Merge**: Rows with the same ORDER BY key are combined using their aggregate functions +3. **Query**: Read pre-computed aggregates directly—no GROUP BY needed for simple queries + +``` +// Two inserts with same (date, user_id): +{ date: "2025-01-01", user_id: "u1", total_views: 10, max_score: 85 } +{ date: "2025-01-01", user_id: "u1", total_views: 15, max_score: 92 } + +// After merge: +{ date: "2025-01-01", user_id: "u1", total_views: 25, max_score: 92 } +``` + +## Supported Aggregate Functions + +| Function | Description | Use Case | +|----------|-------------|----------| +| `sum` | Sum of values | Totals, counts | +| `max` | Maximum value | Peak metrics, high scores | +| `min` | Minimum value | Minimum thresholds | +| `any` | Any single value | Non-deterministic pick | +| `anyLast` | Last inserted value | Latest timestamp, status | + +## Example: Materialized View Rollup + +A common pattern is populating an `AggregatingMergeTree` table from a materialized view: + + + +```typescript +import { + OlapTable, + MaterializedView, + ClickHouseEngines, + SimpleAggregated, + DateTime +} from "@514labs/moose-lib"; + +// Source: raw events +interface PageView { + timestamp: DateTime; + user_id: string; + page: string; + duration_ms: number; +} + +// Target: daily aggregates +interface DailyPageStats { + date: DateTime; + user_id: string; + view_count: number & SimpleAggregated<"sum", number>; + total_duration: number & SimpleAggregated<"sum", number>; + max_duration: number & SimpleAggregated<"max", number>; +} + +const pageViews = new OlapTable("page_views", { + orderByFields: ["timestamp", "user_id"] +}); + +const dailyStats = new OlapTable("daily_page_stats", { + engine: ClickHouseEngines.AggregatingMergeTree, + orderByFields: ["date", "user_id"] +}); + +const rollupView = new MaterializedView({ + selectExpression: ` + toDate(timestamp) as date, + user_id, + count() as view_count, + sum(duration_ms) as total_duration, + max(duration_ms) as max_duration + `, + groupByExpression: "date, user_id", + sourceTable: pageViews, + destinationTable: dailyStats +}); +``` + + +```python +from moose_lib import ( + OlapTable, + OlapConfig, + MaterializedView, + MaterializedViewConfig, + simple_aggregated +) +from moose_lib.blocks import AggregatingMergeTreeEngine + +# Source: raw events +class PageView(BaseModel): + timestamp: datetime + user_id: str + page: str + duration_ms: int + +# Target: daily aggregates +class DailyPageStats(BaseModel): + date: datetime + user_id: str + view_count: simple_aggregated('sum', int) + total_duration: simple_aggregated('sum', int) + max_duration: simple_aggregated('max', int) + +page_views = OlapTable[PageView]("page_views", OlapConfig( + order_by_fields=["timestamp", "user_id"] +)) + +daily_stats = OlapTable[DailyPageStats]("daily_page_stats", OlapConfig( + engine=AggregatingMergeTreeEngine(), + order_by_fields=["date", "user_id"] +)) + +rollup_view = MaterializedView[PageView, DailyPageStats]( + MaterializedViewConfig( + select_expression=""" + toDate(timestamp) as date, + user_id, + count() as view_count, + sum(duration_ms) as total_duration, + max(duration_ms) as max_duration + """, + group_by_expression="date, user_id", + source_table=page_views, + destination_table=daily_stats + ) +) +``` + + + + +Pre-aggregating with AggregatingMergeTree can reduce query latency by orders of magnitude for dashboard queries, since aggregation happens at insert time rather than query time. + + +## See Also + +- [Aggregate Types](/moosestack/data-types/aggregates) — `SimpleAggregateFunction` column types +- [Materialized Views](/moosestack/olap/model-materialized-view) — Populating aggregate tables +- [SummingMergeTree](/moosestack/engines/summing-merge-tree) — Simpler option for sum-only aggregations + diff --git a/apps/framework-docs-v2/content/moosestack/engines/index.mdx b/apps/framework-docs-v2/content/moosestack/engines/index.mdx new file mode 100644 index 0000000000..bca3be21de --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/index.mdx @@ -0,0 +1,96 @@ +--- +title: Table Engines +description: Reference for ClickHouse table engines supported in Moose +order: 1 +category: reference +--- + +import { Callout } from "@/components/mdx"; + +# Table Engines + +ClickHouse table engines determine how data is stored, indexed, queried, and replicated. Moose supports the MergeTree family of engines, which are optimized for high-volume analytical workloads. + +## Engine Overview + +| Engine | Use Case | Key Feature | +|--------|----------|-------------| +| [**MergeTree**](/moosestack/engines/merge-tree) | General analytics | Default, fast inserts and queries | +| [**ReplacingMergeTree**](/moosestack/engines/replacing-merge-tree) | Mutable data | Deduplication by primary key | +| [**AggregatingMergeTree**](/moosestack/engines/aggregating-merge-tree) | Pre-aggregation | Real-time rollups with `SimpleAggregateFunction` | +| [**SummingMergeTree**](/moosestack/engines/summing-merge-tree) | Counters/metrics | Auto-sums numeric columns | +| [**Replicated Engines**](/moosestack/engines/replicated) | High availability | Multi-node replication | + +## Choosing an Engine + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Need deduplication? │ +│ │ +│ No ──────────┬────────── Yes │ +│ │ │ +│ ▼ │ +│ MergeTree ReplacingMergeTree │ +│ │ +├─────────────────────────────────────────────────────────────┤ +│ Need pre-aggregation? │ +│ │ +│ Simple sums ────┬──── Complex aggregates │ +│ │ │ +│ ▼ │ +│ SummingMergeTree AggregatingMergeTree │ +│ │ +├─────────────────────────────────────────────────────────────┤ +│ Need replication? │ +│ │ +│ Add "Replicated" prefix to any engine above │ +│ e.g., ReplicatedReplacingMergeTree │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Quick Start + +Most tables should use `MergeTree` (the default) or `ReplacingMergeTree` (for upserts): + +```typescript +import { OlapTable, ClickHouseEngines } from "@514labs/moose-lib"; + +// Default: MergeTree (omit engine parameter) +const events = new OlapTable("events", { + orderByFields: ["timestamp", "id"] +}); + +// Deduplication: ReplacingMergeTree +const users = new OlapTable("users", { + engine: ClickHouseEngines.ReplacingMergeTree, + orderByFields: ["id"], + ver: "updated_at" +}); +``` + + +When you don't specify an engine, Moose uses `MergeTree`—the fastest option for append-only analytical data. + + +## Available Engines + +### Standard Engines +- `MergeTree` — Default, optimized for fast inserts and analytical queries +- `ReplacingMergeTree` — Deduplicates rows by ORDER BY key during background merges +- `AggregatingMergeTree` — Stores pre-aggregated states for real-time rollups +- `SummingMergeTree` — Automatically sums numeric columns during merges + +### Replicated Engines +- `ReplicatedMergeTree` — MergeTree with multi-node replication +- `ReplicatedReplacingMergeTree` — ReplacingMergeTree with replication +- `ReplicatedAggregatingMergeTree` — AggregatingMergeTree with replication +- `ReplicatedSummingMergeTree` — SummingMergeTree with replication + +### Special Engines +- `S3Queue` — Stream data from S3 buckets + +## See Also + +- [Tables](/moosestack/olap/model-table) — Full table configuration reference +- [Aggregate Types](/moosestack/data-types/aggregates) — Using `SimpleAggregateFunction` with AggregatingMergeTree + diff --git a/apps/framework-docs-v2/content/moosestack/engines/merge-tree.mdx b/apps/framework-docs-v2/content/moosestack/engines/merge-tree.mdx new file mode 100644 index 0000000000..a24b5d750d --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/merge-tree.mdx @@ -0,0 +1,125 @@ +--- +title: MergeTree +description: The default ClickHouse table engine for analytical workloads +order: 2 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# MergeTree + +`MergeTree` is the default and most commonly used ClickHouse table engine. It's optimized for high-volume inserts and fast analytical queries on append-only data. + +## When to Use + +- Append-only event data (logs, analytics, time series) +- Data that doesn't need updates or deduplication +- Maximum insert and query performance + +## Usage + + + +```typescript +import { OlapTable, ClickHouseEngines } from "@514labs/moose-lib"; + +interface Event { + id: string; + timestamp: Date; + user_id: string; + action: string; + properties: Record; +} + +// MergeTree is the default—no engine parameter needed +const events = new OlapTable("events", { + orderByFields: ["timestamp", "user_id", "id"] +}); + +// Or explicitly specify +const explicitEvents = new OlapTable("events", { + engine: ClickHouseEngines.MergeTree, + orderByFields: ["timestamp", "user_id", "id"] +}); +``` + + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import MergeTreeEngine +from pydantic import BaseModel +from datetime import datetime +from typing import Dict, Any + +class Event(BaseModel): + id: str + timestamp: datetime + user_id: str + action: str + properties: Dict[str, Any] + +# MergeTree is the default—no engine parameter needed +events = OlapTable[Event]("events", OlapConfig( + order_by_fields=["timestamp", "user_id", "id"] +)) + +# Or explicitly specify +explicit_events = OlapTable[Event]("events", OlapConfig( + engine=MergeTreeEngine(), + order_by_fields=["timestamp", "user_id", "id"] +)) +``` + + + +## How It Works + +1. **Insert**: Data is written to in-memory buffers, then flushed to disk as "parts" +2. **Background Merge**: ClickHouse periodically merges smaller parts into larger ones +3. **Query**: Sparse primary index enables efficient filtering by ORDER BY columns + +## Configuration Options + +| Option | Description | +|--------|-------------| +| `orderByFields` | Columns for sorting and primary index (critical for query performance) | +| `partitionBy` | Partition expression (e.g., `toYYYYMM(timestamp)`) | +| `settings` | Engine-specific settings as key-value pairs | + + +The `orderByFields` determines both the sort order and the primary index. Place your most common filter columns first for best query performance. + + +## Example: Time-Series Events + + + +```typescript +const timeSeriesEvents = new OlapTable("events", { + orderByFields: ["timestamp", "user_id"], + partitionBy: "toYYYYMM(timestamp)", + settings: { + "index_granularity": "8192" + } +}); +``` + + +```python +time_series_events = OlapTable[Event]("events", OlapConfig( + order_by_fields=["timestamp", "user_id"], + partition_by="toYYYYMM(timestamp)", + settings={ + "index_granularity": "8192" + } +)) +``` + + + +## See Also + +- [ReplacingMergeTree](/moosestack/engines/replacing-merge-tree) — When you need deduplication +- [Tables](/moosestack/olap/model-table) — Full table configuration reference + diff --git a/apps/framework-docs-v2/content/moosestack/engines/replacing-merge-tree.mdx b/apps/framework-docs-v2/content/moosestack/engines/replacing-merge-tree.mdx new file mode 100644 index 0000000000..a5308038b5 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/replacing-merge-tree.mdx @@ -0,0 +1,183 @@ +--- +title: ReplacingMergeTree +description: ClickHouse table engine with automatic deduplication +order: 3 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# ReplacingMergeTree + +`ReplacingMergeTree` extends MergeTree with automatic deduplication. During background merges, it keeps only one row per unique ORDER BY key—useful for mutable data that receives updates. + +## When to Use + +- Data with updates (user profiles, product catalogs, entity state) +- Upsert patterns where you want the latest version +- CDC (Change Data Capture) pipelines +- Any data that needs deduplication by a unique key + +## Basic Usage + + + +```typescript +import { OlapTable, ClickHouseEngines } from "@514labs/moose-lib"; + +interface User { + id: string; + name: string; + email: string; + updated_at: Date; +} + +const users = new OlapTable("users", { + engine: ClickHouseEngines.ReplacingMergeTree, + orderByFields: ["id"] +}); +``` + + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import ReplacingMergeTreeEngine +from pydantic import BaseModel +from datetime import datetime + +class User(BaseModel): + id: str + name: str + email: str + updated_at: datetime + +users = OlapTable[User]("users", OlapConfig( + engine=ReplacingMergeTreeEngine(), + order_by_fields=["id"] +)) +``` + + + +## Version Column + +Use the `ver` parameter to control which row survives during deduplication. The row with the highest version value is kept: + + + +```typescript +const users = new OlapTable("users", { + engine: ClickHouseEngines.ReplacingMergeTree, + orderByFields: ["id"], + ver: "updated_at" // Keep row with latest updated_at +}); +``` + + +```python +users = OlapTable[User]("users", OlapConfig( + engine=ReplacingMergeTreeEngine(ver="updated_at"), + order_by_fields=["id"] +)) +``` + + + +## Soft Deletes + +Use `isDeleted` (requires `ver`) to mark rows for deletion. Rows with `isDeleted=1` are removed during merges: + + + +```typescript +interface User { + id: string; + name: string; + updated_at: Date; + deleted: number; // UInt8: 0 = active, 1 = deleted +} + +const users = new OlapTable("users", { + engine: ClickHouseEngines.ReplacingMergeTree, + orderByFields: ["id"], + ver: "updated_at", + isDeleted: "deleted" +}); +``` + + +```python +class User(BaseModel): + id: str + name: str + updated_at: datetime + deleted: int = 0 # 0 = active, 1 = deleted + +users = OlapTable[User]("users", OlapConfig( + engine=ReplacingMergeTreeEngine( + ver="updated_at", + is_deleted="deleted" + ), + order_by_fields=["id"] +)) +``` + + + +## How It Works + +1. **Insert**: All rows (including duplicates) are written immediately +2. **Background Merge**: ClickHouse periodically merges parts, keeping only the latest row per ORDER BY key +3. **Query**: Before merges complete, queries may see duplicates + + +Deduplication happens during background merges, not at insert time. To get deduplicated results immediately, use `FINAL` in queries or the `final=true` option in Moose reads. + + +## Querying with FINAL + +To force deduplication at query time: + + + +```typescript +// In raw SQL +const result = await client.query("SELECT * FROM users FINAL WHERE id = '123'"); + +// Or use aggregation to get latest +const latest = await client.query(` + SELECT argMax(name, updated_at) as name + FROM users + WHERE id = '123' +`); +``` + + +```python +# In raw SQL +result = client.query("SELECT * FROM users FINAL WHERE id = '123'") + +# Or use aggregation to get latest +latest = client.query(""" + SELECT argMax(name, updated_at) as name + FROM users + WHERE id = '123' +""") +``` + + + +## Configuration Options + +| Option | Type | Description | +|--------|------|-------------| +| `orderByFields` | `string[]` | Columns that define uniqueness (required) | +| `ver` | `string` | Version column—keeps row with highest value | +| `isDeleted` | `string` | Soft delete column (UInt8)—removes rows where value is 1 | + +## See Also + +- [MergeTree](/moosestack/engines/merge-tree) — For append-only data without deduplication +- [AggregatingMergeTree](/moosestack/engines/aggregating-merge-tree) — For pre-aggregated rollups +- [Replicated Engines](/moosestack/engines/replicated) — For high availability + diff --git a/apps/framework-docs-v2/content/moosestack/engines/replicated.mdx b/apps/framework-docs-v2/content/moosestack/engines/replicated.mdx new file mode 100644 index 0000000000..c577ab46b2 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/replicated.mdx @@ -0,0 +1,208 @@ +--- +title: Replicated Engines +description: High-availability table engines with multi-node replication +order: 6 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# Replicated Engines + +Replicated engines provide high availability and data replication across multiple ClickHouse nodes. Moose supports replicated variants of all MergeTree family engines. + +## Available Replicated Engines + +| Engine | Base Engine | Use Case | +|--------|-------------|----------| +| `ReplicatedMergeTree` | MergeTree | General analytics with HA | +| `ReplicatedReplacingMergeTree` | ReplacingMergeTree | Mutable data with HA | +| `ReplicatedAggregatingMergeTree` | AggregatingMergeTree | Pre-aggregation with HA | +| `ReplicatedSummingMergeTree` | SummingMergeTree | Counters with HA | + +## Basic Usage + + + +```typescript +import { OlapTable, ClickHouseEngines } from "@514labs/moose-lib"; + +interface Event { + id: string; + timestamp: Date; + data: string; +} + +// For ClickHouse Cloud—no extra config needed +const events = new OlapTable("events", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["timestamp", "id"] +}); +``` + + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import ReplicatedMergeTreeEngine +from pydantic import BaseModel +from datetime import datetime + +class Event(BaseModel): + id: str + timestamp: datetime + data: str + +# For ClickHouse Cloud—no extra config needed +events = OlapTable[Event]("events", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + order_by_fields=["timestamp", "id"] +)) +``` + + + +## Configuration Approaches + +### Default (Recommended) + +Omit replication parameters. Moose uses smart defaults that work in both ClickHouse Cloud and self-managed environments: + + + +```typescript +const table = new OlapTable("events", { + engine: ClickHouseEngines.ReplicatedMergeTree, + orderByFields: ["id"] + // No keeperPath or replicaName needed +}); +``` + + +```python +table = OlapTable[Event]("events", OlapConfig( + engine=ReplicatedMergeTreeEngine(), # No parameters + order_by_fields=["id"] +)) +``` + + + +Moose auto-injects: `/clickhouse/tables/{database}/{shard}/{table_name}` and `{replica}` in local development. ClickHouse Cloud uses its own patterns automatically. + +### Explicit Paths (Self-Managed) + +For self-managed ClickHouse with custom ZooKeeper/Keeper paths: + + + +```typescript +const table = new OlapTable("events", { + engine: ClickHouseEngines.ReplicatedMergeTree, + keeperPath: "/clickhouse/tables/{database}/{shard}/events", + replicaName: "{replica}", + orderByFields: ["id"] +}); +``` + + +```python +table = OlapTable[Event]("events", OlapConfig( + engine=ReplicatedMergeTreeEngine( + keeper_path="/clickhouse/tables/{database}/{shard}/events", + replica_name="{replica}" + ), + order_by_fields=["id"] +)) +``` + + + +### Cluster Mode + +For multi-node deployments with `ON CLUSTER` DDL: + + + +```typescript +const table = new OlapTable("events", { + engine: ClickHouseEngines.ReplicatedMergeTree, + cluster: "my_cluster", + orderByFields: ["id"] +}); +``` + + +```python +table = OlapTable[Event]("events", OlapConfig( + engine=ReplicatedMergeTreeEngine(), + cluster="my_cluster", + order_by_fields=["id"] +)) +``` + + + +## Replicated ReplacingMergeTree + +Combines deduplication with replication: + + + +```typescript +interface User { + id: string; + name: string; + updated_at: Date; + deleted: number; +} + +const users = new OlapTable("users", { + engine: ClickHouseEngines.ReplicatedReplacingMergeTree, + orderByFields: ["id"], + ver: "updated_at", + isDeleted: "deleted" +}); +``` + + +```python +from moose_lib.blocks import ReplicatedReplacingMergeTreeEngine + +class User(BaseModel): + id: str + name: str + updated_at: datetime + deleted: int = 0 + +users = OlapTable[User]("users", OlapConfig( + engine=ReplicatedReplacingMergeTreeEngine( + ver="updated_at", + is_deleted="deleted" + ), + order_by_fields=["id"] +)) +``` + + + +## Configuration Options + +| Option | Type | Description | +|--------|------|-------------| +| `keeperPath` | `string` | ZooKeeper/Keeper path for coordination (optional) | +| `replicaName` | `string` | Unique replica identifier (optional) | +| `cluster` | `string` | Cluster name for ON CLUSTER DDL | +| `ver` | `string` | Version column (ReplicatedReplacingMergeTree only) | +| `isDeleted` | `string` | Soft delete column (ReplicatedReplacingMergeTree only) | +| `columns` | `string[]` | Columns to sum (ReplicatedSummingMergeTree only) | + + +When using ClickHouse Cloud, replication is handled automatically. Just use the replicated engine variant without any extra configuration. + + +## See Also + +- [MergeTree](/moosestack/engines/merge-tree) — Non-replicated base engine +- [ReplacingMergeTree](/moosestack/engines/replacing-merge-tree) — Deduplication details +- [AggregatingMergeTree](/moosestack/engines/aggregating-merge-tree) — Pre-aggregation details + diff --git a/apps/framework-docs-v2/content/moosestack/engines/summing-merge-tree.mdx b/apps/framework-docs-v2/content/moosestack/engines/summing-merge-tree.mdx new file mode 100644 index 0000000000..3d41af1906 --- /dev/null +++ b/apps/framework-docs-v2/content/moosestack/engines/summing-merge-tree.mdx @@ -0,0 +1,118 @@ +--- +title: SummingMergeTree +description: ClickHouse table engine that automatically sums numeric columns +order: 5 +category: reference +--- + +import { LanguageTabs, LanguageTabContent, Callout } from "@/components/mdx"; + +# SummingMergeTree + +`SummingMergeTree` automatically sums numeric columns when merging rows with the same ORDER BY key. It's a simpler alternative to `AggregatingMergeTree` when you only need sum aggregations. + +## When to Use + +- Simple counters and metrics (page views, clicks, totals) +- Incrementing values by key +- When you only need SUM operations (not MAX, MIN, etc.) + +## Usage + + + +```typescript +import { OlapTable, ClickHouseEngines } from "@514labs/moose-lib"; + +interface DailyMetrics { + date: Date; + page: string; + views: number; + clicks: number; + revenue: number; +} + +const metrics = new OlapTable("daily_metrics", { + engine: ClickHouseEngines.SummingMergeTree, + orderByFields: ["date", "page"] +}); +``` + + +```python +from moose_lib import OlapTable, OlapConfig +from moose_lib.blocks import SummingMergeTreeEngine +from pydantic import BaseModel +from datetime import date + +class DailyMetrics(BaseModel): + date: date + page: str + views: int + clicks: int + revenue: float + +metrics = OlapTable[DailyMetrics]("daily_metrics", OlapConfig( + engine=SummingMergeTreeEngine(), + order_by_fields=["date", "page"] +)) +``` + + + +## How It Works + +1. **Insert**: Rows are written with their numeric values +2. **Background Merge**: Rows with the same ORDER BY key are combined—numeric columns are summed, non-numeric columns take the first value +3. **Query**: Read summed results directly + +``` +// Two inserts with same (date, page): +{ date: "2025-01-01", page: "/home", views: 100, clicks: 5 } +{ date: "2025-01-01", page: "/home", views: 150, clicks: 8 } + +// After merge: +{ date: "2025-01-01", page: "/home", views: 250, clicks: 13 } +``` + +## Specifying Columns to Sum + +By default, all numeric columns are summed. You can specify which columns to sum: + + + +```typescript +const metrics = new OlapTable("daily_metrics", { + engine: ClickHouseEngines.SummingMergeTree, + orderByFields: ["date", "page"], + columns: ["views", "clicks"] // Only sum these columns +}); +``` + + +```python +metrics = OlapTable[DailyMetrics]("daily_metrics", OlapConfig( + engine=SummingMergeTreeEngine(columns=["views", "clicks"]), + order_by_fields=["date", "page"] +)) +``` + + + + +SummingMergeTree only supports summation. For other aggregations (max, min, count distinct), use [AggregatingMergeTree](/moosestack/engines/aggregating-merge-tree) instead. + + +## SummingMergeTree vs AggregatingMergeTree + +| Feature | SummingMergeTree | AggregatingMergeTree | +|---------|------------------|----------------------| +| Aggregations | SUM only | Any (sum, max, min, etc.) | +| Setup | Simpler—just use numeric columns | Requires `SimpleAggregateFunction` types | +| Use case | Simple counters | Complex rollups | + +## See Also + +- [AggregatingMergeTree](/moosestack/engines/aggregating-merge-tree) — For multiple aggregate functions +- [Replicated Engines](/moosestack/engines/replicated) — For high availability + diff --git a/apps/framework-docs-v2/content/moosestack/olap/supported-types.mdx b/apps/framework-docs-v2/content/moosestack/olap/supported-types.mdx deleted file mode 100644 index f35b5d1b15..0000000000 --- a/apps/framework-docs-v2/content/moosestack/olap/supported-types.mdx +++ /dev/null @@ -1,862 +0,0 @@ ---- -title: Supported Column Types -description: Complete guide to defining columns for ClickHouse tables in Moose -order: 4 -category: olap ---- - -import { Callout, LanguageTabs, LanguageTabContent } from "@/components/mdx"; - -# Supported Column Types - -Moose supports a comprehensive set of ClickHouse column types across both TypeScript and Python libraries. This guide covers all supported types, their syntax, and best practices for defining table schemas. - -## Basic Types - -### String Types - - - -```typescript -interface User { - string: string; // String - lowCardinality: string & LowCardinality; // LowCardinality(String) - uuid: string & tags.Format<"uuid">; // UUID (with typia tags) -} -``` - -| ClickHouse Type | TypeScript | Description | -|------|------------|--------| -| `String` | `string` | Variable-length string | -| `LowCardinality(String)` | `string & LowCardinality` | Optimized for repeated values | -| `UUID` | `string & tags.Format<"uuid">` | UUID format strings | - - -```python -from typing import Literal -from uuid import UUID - -class User(BaseModel): - string: str # String - low_cardinality: Annotated[str, "LowCardinality"] # LowCardinality(String) - uuid: UUID # UUID -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `String` | `str` | Variable-length string | -| `LowCardinality(String)` | `str` with `Literal[str]` | Optimized for repeated values | -| `UUID` | `UUID` | UUID format strings | - - - -### Numeric Types - -#### Integer Types - - - -```typescript -import { Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64 } from "@514labs/moose-lib"; - -interface Metrics { - user_id: Int32; // Int32 - count: UInt64; // UInt64 - small_value: Int8; // Int8 -} - -// Alternative: You can still use the verbose syntax if preferred -interface MetricsVerbose { - user_id: number & ClickHouseInt<"int32">; - count: number & ClickHouseInt<"uint64">; - small_value: number & ClickHouseInt<"int8">; -} -``` - -| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description | -|------|------------|------------|--------| -| `Int8` | `Int8` | `number & ClickHouseInt<"int8">` | -128 to 127 | -| `Int16` | `Int16` | `number & ClickHouseInt<"int16">` | -32,768 to 32,767 | -| `Int32` | `Int32` | `number & ClickHouseInt<"int32">` | -2,147,483,648 to 2,147,483,647 | -| `Int64` | `Int64` | `number & ClickHouseInt<"int64">` | -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 | -| `UInt8` | `UInt8` | `number & ClickHouseInt<"uint8">` | 0 to 255 | -| `UInt16` | `UInt16` | `number & ClickHouseInt<"uint16">` | 0 to 65,535 | -| `UInt32` | `UInt32` | `number & ClickHouseInt<"uint32">` | 0 to 4,294,967,295 | -| `UInt64` | `UInt64` | `number & ClickHouseInt<"uint64">` | 0 to 18,446,744,073,709,551,615 | - - -```python -from typing import Annotated - -class Metrics(BaseModel): - user_id: Annotated[int, "int32"] # Int32 - count: Annotated[int, "int64"] # Int64 - small_value: Annotated[int, "uint8"] # UInt8 -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `Int8` | `Annotated[int, "int8"]` | -128 to 127 | -| `Int16` | `Annotated[int, "int16"]` | -32,768 to 32,767 | -| `Int32` | `Annotated[int, "int32"]` | -2,147,483,648 to 2,147,483,647 | -| `Int64` | `Annotated[int, "int64"]` | -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 | -| `UInt8` | `Annotated[int, "uint8"]` | 0 to 255 | -| `UInt16` | `Annotated[int, "uint16"]` | 0 to 65,535 | -| `UInt32` | `Annotated[int, "uint32"]` | 0 to 4,294,967,295 | -| `UInt64` | `Annotated[int, "uint64"]` | 0 to 18,446,744,073,709,551,615 | - - - -### Floating Point Types - - - -```typescript -import { Float32, Float64 } from "@514labs/moose-lib"; -import { tags } from "typia"; - -interface SensorData { - temperature: Float32; // Float32 - humidity: Float64; // Float64 - pressure: number; // Default Float64 -} - -// Alternative: You can still use the verbose syntax if preferred -interface SensorDataVerbose { - temperature: number & tags.Type<"float">; // Float32 - humidity: number; // Float64 -} -``` - -| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description | -|-----------------|------------|------------|---------------------| -| `Float32` | `Float32` | `number & tags.Type<"float">` | 32-bit floating point | -| `Float64` | `Float64` or `number` | `number` | 64-bit floating point (default) | - - -```python -from moose_lib import ClickhouseSize - -class SensorData(BaseModel): - temperature: float # Float64 - humidity: Annotated[float, ClickhouseSize(4)] # Float32 -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `Float64` | `float` | floating point number | - - - -### Decimal Types - - - -```typescript -import { Decimal, ClickHouseDecimal } from "@514labs/moose-lib"; - -interface FinancialData { - amount: Decimal<10, 2>; // Decimal(10,2) - rate: Decimal<5, 4>; // Decimal(5,4) - fee: Decimal<8, 3>; // Decimal(8,3) -} - -// Alternative: You can still use the verbose syntax if preferred -interface FinancialDataVerbose { - amount: string & ClickHouseDecimal<10, 2>; // Decimal(10,2) - rate: string & ClickHouseDecimal<5, 4>; // Decimal(5,4) -} -``` - -| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description | -|------|------------|------------|---------------------| -| `Decimal(P,S)` | `Decimal` | `string & ClickHouseDecimal` | Fixed-point decimal with P total digits, S decimal places | - - -```python -from moose_lib import clickhouse_decimal - -class FinancialData(BaseModel): - amount: clickhouse_decimal(10, 2) # Decimal(10,2) - rate: clickhouse_decimal(5, 4) # Decimal(5,4) -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `Decimal(P,S)` | `clickhouse_decimal(P,S)` | Fixed-point decimal | - - - -### Boolean Type - - - -```typescript -interface User { - is_active: boolean; - verified: boolean; -} -``` - -| ClickHouse Type | TypeScript | Description | -|------|------------|--------| -| `Boolean` | `boolean` | `boolean` | - - -```python -class User(BaseModel): - is_active: bool - verified: bool -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `Boolean` | `bool` | `bool` | - - - -### Date and Time Types - - - -```typescript -import { DateTime, DateTime64, ClickHousePrecision } from "@514labs/moose-lib"; - -interface Event { - created_at: DateTime; // DateTime - updated_at: DateTime64<3>; // DateTime(3) - logged_at: DateTime64<6>; // DateTime(6) - microsecond precision - birth_date: Date; // Date -} - -// Alternative: You can still use the verbose syntax if preferred -interface EventVerbose { - created_at: Date; // DateTime - updated_at: Date & ClickHousePrecision<3>; // DateTime(3) - birth_date: Date; // Date -} -``` - -| ClickHouse Type | TypeScript (New Helper) | TypeScript (Verbose) | Description | -|-----------------|------------|------------|-------------| -| `Date` | `Date` | `Date` | Date only | -| `Date16` | `Date` | `Date` | Compact date format | -| `DateTime` | `DateTime` | `Date` | Date and time | -| `DateTime(P)` | `DateTime64

    ` | `Date & ClickHousePrecision

    ` | DateTime with precision (P=0-9) | - - -```python -from datetime import date, datetime -from moose_lib import ClickhouseSize, clickhouse_datetime64 - -class Event(BaseModel): - created_at: datetime # DateTime - updated_at: clickhouse_datetime64(3) # DateTime(3) - birth_date: date # Date - compact_date: Annotated[date, ClickhouseSize(2)] # Date16 -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `Date` | `date` | Date only | -| `Date16` | `date` | `Annotated[date, ClickhouseSize(2)]` | Compact date format | -| `DateTime` | `datetime` | Date and time | - - - -### Network Types - - - -```typescript -import { tags } from "typia"; - -interface NetworkEvent { - source_ip: string & tags.Format<"ipv4">; - dest_ip: string & tags.Format<"ipv6">; -} -``` - -| ClickHouse Type | TypeScript | Description | -|------|------------|--------| -| `IPv4` | `string & tags.Format<"ipv4">` | IPv4 addresses | -| `IPv6` | `string & tags.Format<"ipv6">` | IPv6 addresses | - - -```python -from ipaddress import IPv4Address, IPv6Address - -class NetworkEvent(BaseModel): - source_ip: IPv4Address - dest_ip: IPv6Address -``` - -| ClickHouse Type | Python | Description | -|------|------------|--------| -| `IPv4` | `ipaddress.IPv4Address` | IPv4 addresses | -| `IPv6` | `ipaddress.IPv6Address` | IPv6 addresses | - - - -## Complex Types - -### Geometry Types - -Moose supports ClickHouse geometry types. Use the helpers in each language to get type-safe models and correct ClickHouse mappings. - - - -```typescript -import { - ClickHousePoint, - ClickHouseRing, - ClickHouseLineString, - ClickHouseMultiLineString, - ClickHousePolygon, - ClickHouseMultiPolygon, -} from "@514labs/moose-lib"; - -interface GeoTypes { - point: ClickHousePoint; // Point → [number, number] - ring: ClickHouseRing; // Ring → Array<[number, number]> - lineString: ClickHouseLineString; // LineString → Array<[number, number]> - multiLineString: ClickHouseMultiLineString; // MultiLineString → Array> - polygon: ClickHousePolygon; // Polygon → Array> - multiPolygon: ClickHouseMultiPolygon; // MultiPolygon → Array>> -} -``` - -| ClickHouse Type | TypeScript | -|------|------------| -| `Point` | `ClickHousePoint` | -| `Ring` | `ClickHouseRing` | -| `LineString` | `ClickHouseLineString` | -| `MultiLineString` | `ClickHouseMultiLineString` | -| `Polygon` | `ClickHousePolygon` | -| `MultiPolygon` | `ClickHouseMultiPolygon` | - - -```python -from moose_lib import Point, Ring, LineString, MultiLineString, Polygon, MultiPolygon - -class GeoTypes(BaseModel): - point: Point # tuple[float, float] - ring: Ring # list[tuple[float, float]] - line_string: LineString # list[tuple[float, float]] - multi_line_string: MultiLineString # list[list[tuple[float, float]]] - polygon: Polygon # list[list[tuple[float, float]]] - multi_polygon: MultiPolygon # list[list[list[tuple[float, float]]]] -``` - -| ClickHouse Type | Python | -|------|------------| -| `Point` | `Point` (tuple[float, float]) | -| `Ring` | `Ring` (list[tuple[float, float]]) | -| `LineString` | `LineString` (list[tuple[float, float]]) | -| `MultiLineString` | `MultiLineString` (list[list[tuple[float, float]]]) | -| `Polygon` | `Polygon` (list[list[tuple[float, float]]]) | -| `MultiPolygon` | `MultiPolygon` (list[list[list[tuple[float, float]]]]) | - - - - -Geometry coordinates are represented as numeric pairs `[x, y]` (TypeScript) or `tuple[float, float]` (Python). - - -### Array Types - -Arrays are supported for all basic types and some complex types. - - - -```typescript -interface User { - tags: string[]; // Array(String) - scores: number[]; // Array(Float64) - metadata: Record[]; // Array(Json) - tuple: { - name: string; - age: number; - } & ClickHouseNamedTuple[]; // Array(Tuple(String, Int32)) -} -``` - - -```python -from typing import List, Dict, Any - -class User(BaseModel): - tags: List[str] # Array(String) - scores: List[float] # Array(Float64) - metadata: List[Dict[str, Any]] # Array(Json) - tuple: List[Tuple[str, int]] # Array(Tuple(String, Int32)) -``` - - - -### Map Types - -Maps store key-value pairs with specified key and value types. - - - -```typescript -interface User { - preferences: Record; // Map(String, String) - metrics: Record; // Map(String, Float64) -} -``` - - -```python -from typing import Dict - -class User(BaseModel): - preferences: Dict[str, str] # Map(String, String) - metrics: Dict[str, float] # Map(String, Float64) -``` - - - -### Nested Types - -Nested types allow embedding complex objects within tables. - - - -```typescript -interface Address { - street: string; - city: string; - zip: string; -} - -interface User { - name: string; - address: Address; // Nested type -} -``` - - -```python -class Address(BaseModel): - street: str - city: str - zip: str - -class User(BaseModel): - name: str - address: Address # Nested type -``` - - - -### Named Tuple Types - -Named tuples provide structured data with named fields. - - - -```typescript -import { ClickHouseNamedTuple } from "@514labs/moose-lib"; - -interface Point { - x: number; - y: number; -} - -interface Shape { - center: Point & ClickHouseNamedTuple; // Named tuple - radius: number; -} -``` - - -```python -from typing import Annotated - -class Point(BaseModel): - x: float - y: float - -class Shape(BaseModel): - center: Annotated[Point, "ClickHouseNamedTuple"] # Named tuple - radius: float -``` - - - -### Enum Types - -Enums map to ClickHouse enums with string or integer values. - - - -```typescript -enum UserRole { - ADMIN = "admin", - USER = "user", - GUEST = "guest" -} - -interface User { - role: UserRole; // Enum with string values -} -``` - - -```python -from enum import Enum - -class UserRole(str, Enum): - ADMIN = "admin" - USER = "user" - GUEST = "guest" - -class User(BaseModel): - role: UserRole # Enum with string values -``` - - - -## Special Types - -### JSON Type - -The `Json` type stores arbitrary JSON data with optional schema configuration for performance and type safety. - -#### Basic JSON (Unstructured) - -For completely dynamic JSON data without any schema: - - - -```typescript -interface Event { - metadata: Record; // Basic JSON - accepts any structure - config: any; // Basic JSON - fully dynamic -} -``` - - -```python -from typing import Any, Dict - -class Event(BaseModel): - metadata: Dict[str, Any] # Basic JSON - accepts any structure - config: Any # Basic JSON - fully dynamic -``` - - - -#### Rich JSON with Type Configuration - -For better performance and validation, you can define typed fields within your JSON using `ClickHouseJson`. This creates a ClickHouse `JSON` column with explicit type hints for specific paths. - - - -```typescript -import { ClickHouseJson } from "@514labs/moose-lib"; - -// Define the structure for your JSON payload -interface PayloadStructure { - name: string; - count: number; - timestamp?: Date; -} - -interface Event { - id: string; - // JSON with typed paths - better performance, allows extra fields - payload: PayloadStructure & ClickHouseJson; - - // JSON with performance tuning options - metadata: PayloadStructure & ClickHouseJson< - 256, // max_dynamic_paths: limit tracked paths (default: no limit) - 16, // max_dynamic_types: limit type variations (default: no limit) - ["skip.me"], // skip_paths: exclude specific paths - ["^tmp\\."] // skip_regexps: exclude paths matching regex - >; -} -``` - - -```python -from typing import Annotated -from pydantic import BaseModel, ConfigDict -from moose_lib.data_models import ClickHouseJson - -# Define the structure for your JSON payload -class PayloadStructure(BaseModel): - model_config = ConfigDict(extra='allow') # Required for JSON types - name: str - count: int - timestamp: Optional[datetime] = None - -class Event(BaseModel): - id: str - # JSON with typed paths - better performance, allows extra fields - payload: Annotated[PayloadStructure, ClickHouseJson()] - - # JSON with performance tuning options - metadata: Annotated[PayloadStructure, ClickHouseJson( - max_dynamic_paths=256, # Limit tracked paths - max_dynamic_types=16, # Limit type variations - skip_paths=("skip.me",), # Exclude specific paths - skip_regexes=(r"^tmp\\.",) # Exclude paths matching regex - )] -``` - - - -#### Configuration Options - -| Option | Type | Description | -|--------|------|-------------| -| `max_dynamic_paths` | `number` | Maximum number of unique JSON paths to track. Helps control memory usage for highly variable JSON structures. | -| `max_dynamic_types` | `number` | Maximum number of type variations allowed per path. Useful when paths may contain different types. | -| `skip_paths` | `string[]` | Array of exact JSON paths to ignore during ingestion (e.g., `["temp", "debug.info"]`). | -| `skip_regexps` | `string[]` | Array of regex patterns for paths to exclude (e.g., `["^tmp\\.", ".*_internal$"]`). | - -#### Benefits of Typed JSON - -1. **Better Performance**: ClickHouse can optimize storage and queries for known paths -2. **Type Safety**: Validates that specified paths match expected types -3. **Flexible Schema**: Allows additional fields beyond typed paths -4. **Memory Control**: Configure limits to prevent unbounded resource usage - - -- **Basic JSON** (`any`, `Dict[str, Any]`): Use when JSON structure is completely unknown or rarely queried -- **Rich JSON** (`ClickHouseJson`): Use when you have known fields that need indexing/querying, but want to allow additional dynamic fields - - -#### Example: Product Event Tracking - - - -```typescript -import { ClickHouseJson, Key } from "@514labs/moose-lib"; - -interface ProductProperties { - category: string; - price: number; - inStock: boolean; -} - -interface ProductEvent { - eventId: Key; - timestamp: DateTime; - // Typed paths for common fields, but allows custom properties - properties: ProductProperties & ClickHouseJson< - 128, // Track up to 128 unique paths - 8, // Allow up to 8 type variations per path - ["_internal"], // Ignore internal fields - ["^debug_"] // Ignore debug fields - >; -} -``` - -With this schema, you can send events like: -```json -{ - "eventId": "evt_123", - "timestamp": "2025-10-22T12:00:00Z", - "properties": { - "category": "electronics", // Typed field ✓ - "price": 99.99, // Typed field ✓ - "inStock": true, // Typed field ✓ - "customTag": "holiday-sale", // Extra field - accepted ✓ - "brandId": 42, // Extra field - accepted ✓ - "_internal": "ignored" // Skipped by skip_paths ✓ - } -} -``` - - -```python -from typing import Annotated, Optional -from pydantic import BaseModel, ConfigDict -from moose_lib import Key, ClickHouseJson -from datetime import datetime - -class ProductProperties(BaseModel): - model_config = ConfigDict(extra='allow') - category: str - price: float - in_stock: bool - -class ProductEvent(BaseModel): - event_id: Key[str] - timestamp: datetime - # Typed paths for common fields, but allows custom properties - properties: Annotated[ProductProperties, ClickHouseJson( - max_dynamic_paths=128, # Track up to 128 unique paths - max_dynamic_types=8, # Allow up to 8 type variations per path - skip_paths=("_internal",), # Ignore internal fields - skip_regexes=(r"^debug_",) # Ignore debug fields - )] -``` - -With this schema, you can send events like: -```python -{ - "event_id": "evt_123", - "timestamp": "2025-10-22T12:00:00Z", - "properties": { - "category": "electronics", # Typed field ✓ - "price": 99.99, # Typed field ✓ - "in_stock": True, # Typed field ✓ - "custom_tag": "holiday-sale", # Extra field - accepted ✓ - "brand_id": 42, # Extra field - accepted ✓ - "_internal": "ignored" # Skipped by skip_paths ✓ - } -} -``` - - - -### Nullable Types - -All types support nullable variants using optional types. - - - -```typescript -interface User { - name: string; // Required - email?: string; // Nullable - age?: number; // Nullable -} -``` - - -```python -from typing import Optional - -class User(BaseModel): - name: str # Required - email: Optional[str] = None # Nullable - age: Optional[int] = None # Nullable -``` - - - - -If a field is optional in your app model but you provide a ClickHouse default, Moose infers a non-nullable ClickHouse column with a DEFAULT clause. - - - - - Optional without default (e.g., `field?: number`) → ClickHouse Nullable type. - - Optional with default (e.g., `field?: number & ClickHouseDefault<"18">` or `WithDefault`) → non-nullable column with default `18`. - - - - Optional without default → ClickHouse Nullable type. - - Optional with default (using `clickhouse_default("18")` in annotations) → non-nullable column with default `18`. - - - -This lets you keep optional fields at the application layer while avoiding Nullable columns in ClickHouse when a server-side default exists. - - -### SimpleAggregateFunction - -`SimpleAggregateFunction` is designed for use with `AggregatingMergeTree` tables. It stores pre-aggregated values that are automatically merged when ClickHouse combines rows with the same primary key. - - - -```typescript -import { SimpleAggregated, OlapTable, ClickHouseEngines, Key, DateTime } from "@514labs/moose-lib"; - -interface DailyStats { - date: DateTime; - userId: string; - totalViews: number & SimpleAggregated<"sum", number>; - maxScore: number & SimpleAggregated<"max", number>; - lastSeen: DateTime & SimpleAggregated<"anyLast", DateTime>; -} - -const statsTable = new OlapTable("daily_stats", { - engine: ClickHouseEngines.AggregatingMergeTree, - orderByFields: ["date", "userId"], -}); -``` - - -```python -from moose_lib import simple_aggregated, Key, OlapTable, OlapConfig, AggregatingMergeTreeEngine -from pydantic import BaseModel -from datetime import datetime - -class DailyStats(BaseModel): - date: datetime - user_id: str - total_views: simple_aggregated('sum', int) - max_score: simple_aggregated('max', float) - last_seen: simple_aggregated('anyLast', datetime) - -stats_table = OlapTable[DailyStats]( - "daily_stats", - OlapConfig( - engine=AggregatingMergeTreeEngine(), - order_by_fields=["date", "user_id"] - ) -) -``` - - - -See [ClickHouse docs](https://clickhouse.com/docs/en/sql-reference/data-types/simpleaggregatefunction) for the complete list of functions. - -## Table Engines - -Moose supports all common ClickHouse table engines: - -| Engine | Python | Description | -|--------|------------|-------------| -| `MergeTree` | `ClickHouseEngines.MergeTree` | Default engine | -| `ReplacingMergeTree` | `ClickHouseEngines.ReplacingMergeTree` | Deduplication | -| `SummingMergeTree` | `ClickHouseEngines.SummingMergeTree` | Aggregates numeric columns | -| `AggregatingMergeTree` | `ClickHouseEngines.AggregatingMergeTree` | Advanced aggregation | -| `ReplicatedMergeTree` | `ClickHouseEngines.ReplicatedMergeTree` | Replicated version of MergeTree | -| `ReplicatedReplacingMergeTree` | `ClickHouseEngines.ReplicatedReplacingMergeTree` | Replicated with deduplication | -| `ReplicatedSummingMergeTree` | `ClickHouseEngines.ReplicatedSummingMergeTree` | Replicated with aggregation | -| `ReplicatedAggregatingMergeTree` | `ClickHouseEngines.ReplicatedAggregatingMergeTree` | Replicated with advanced aggregation | - - - -```typescript -import { ClickHouseEngines } from "@514labs/moose-lib"; - -const userTable = new OlapTable("users", { - engine: ClickHouseEngines.ReplacingMergeTree, - orderByFields: ["id", "updated_at"] -}); -``` - - -```python -from moose_lib import ClickHouseEngines - -user_table = OlapTable("users", { - "engine": ClickHouseEngines.ReplacingMergeTree, - "orderByFields": ["id", "updated_at"] -}) -``` - - - -## Best Practices - -### Type Selection - -- **Use specific integer types** when you know the value ranges to save storage -- **Prefer `Float64`** for most floating-point calculations unless storage is critical -- **Use `LowCardinality`** for string columns with repeated values -- **Choose appropriate DateTime precision** based on your accuracy needs - -### Performance Considerations - -- **Order columns by cardinality** (low to high) for better compression -- **Use `ReplacingMergeTree`** for tables with frequent updates -- **Specify `orderByFields` or `orderByExpression`** for optimal query performance -- **Consider `LowCardinality`** for string columns with < 10,000 unique values diff --git a/apps/framework-docs-v2/guides-specs/GUIDE-CONTENT-MODEL.md b/apps/framework-docs-v2/guides-specs/GUIDE-CONTENT-MODEL.md new file mode 100644 index 0000000000..47e5fa9932 --- /dev/null +++ b/apps/framework-docs-v2/guides-specs/GUIDE-CONTENT-MODEL.md @@ -0,0 +1,614 @@ +# Guide Content Model - API Specification + +**Status**: API defined, implementation pending + +This document defines the developer-facing API for technology-variant guides. + +**Capabilities:** +- Render guides with technology-variant content +- Export entire guide to Linear as a project with issues +- Export individual steps as agent prompts for coding assistants + +--- + +## Developer Experience + +Authors write MDX naturally, defining dimensions in frontmatter and using conditional components inline: + +## Quick Start + +```mdx +--- +title: Set Up Your Database +techSelector: + - dimension: oltp + label: Database + options: + - { value: postgresql, label: PostgreSQL, default: true } + - { value: mysql, label: MySQL } +--- + +import { TechContextProvider, TechSelector, When, Steps, Step } from "@514labs/design-system-components/guides"; + + + + + + + + + + + +Run this SQL command: + +```sql +SHOW wal_level; +``` + + + + + +Check your MySQL config: + +```sql +SHOW VARIABLES LIKE 'log_bin'; +``` + + + + + + + +Run the dev server: + +```bash +moose dev +``` + + + + + + +``` + +Step numbers are assigned automatically based on render order. Conditional steps are numbered correctly—if a step is hidden, subsequent steps renumber. + +--- + +## Frontmatter Config + +Define your dimensions in YAML frontmatter: + +```yaml +--- +title: My Guide +description: Guide description +techSelector: + - dimension: oltp + label: Source Database + options: + - { value: postgresql, label: PostgreSQL, default: true } + - { value: mysql, label: MySQL } + - dimension: language + label: Language + options: + - { value: typescript, label: TypeScript, default: true } + - { value: python, label: Python } +--- +``` + +Dimension names are **open-ended strings**. Use any name relevant to your guide: + +| Common Dimensions | Custom Examples | +|-------------------|-----------------| +| `language`, `oltp`, `olap`, `streaming`, `orm`, `deployment`, `cloud`, `packageManager` | `authProvider`, `paymentGateway`, `ciPlatform`, `containerRuntime` | + +--- + +## Conditional Components + +### `` - Show content for a specific value + +```mdx + + +PostgreSQL uses WAL (Write-Ahead Logging) for replication. + +```sql +ALTER SYSTEM SET wal_level = logical; +``` + + +``` + +### `` with multiple values + +```mdx + + +Install the npm package: + +```bash +npm install @514labs/moose-lib +``` + + +``` + +### `` - Show content when condition is NOT met + +```mdx + + +Since you're using an ORM, you can reuse your existing models. + + +``` + +### `` / `` - Mutually exclusive content + +When every option has distinct content: + +```mdx + + + +## PostgreSQL Setup + +Enable logical replication in `postgresql.conf`: + +```properties +wal_level = logical +``` + + + + +## MySQL Setup + +Enable binary logging in `my.cnf`: + +```properties +log_bin = mysql-bin +binlog_format = ROW +``` + + + +``` + +### `` - Inline dynamic text + +Insert the user's current selection: + +```mdx +Now that you've configured your database, +you can start streaming changes to ClickHouse. +``` + +Renders as: "Now that you've configured your **PostgreSQL** database..." + +Custom labels: + +```mdx + +``` + +### `` - Complex predicates + +For AND/OR/NOT logic: + +```mdx + + +Drizzle with TypeScript setup... + + + + + +ORM-specific instructions... + + + +This section only applies to cloud deployments.

    } +> + +Cloud deployment instructions... + + +``` + +--- + +## Complete Example + +```mdx +--- +title: Stream Data from Your Database with Debezium +description: Mirror your database to ClickHouse in real-time. +techSelector: + - dimension: oltp + label: Source Database + options: + - { value: postgresql, label: PostgreSQL, default: true } + - { value: mysql, label: MySQL } + - dimension: orm + label: Schema Source + options: + - { value: none, label: Generate from DB, default: true } + - { value: drizzle, label: Drizzle ORM } + - { value: prisma, label: Prisma } +--- + +import { + TechContextProvider, + TechSelector, + Steps, + Step, + When, + NotWhen, + TechSwitch, + TechCase, + TechRef +} from "@514labs/design-system-components/guides"; + + + + + +# Stream Data from Your Database with Debezium + +This guide shows you how to stream changes from your +database to ClickHouse in real-time. + + + + + +Copy the environment file and set your database credentials: + +```bash +cp .env.example .env.dev +``` + + + + +```properties +DB_HOST=your_postgres_host +DB_PORT=5432 +CDC_TOPIC_PREFIX=pg-cdc +``` + + + + +```properties +DB_HOST=your_mysql_host +DB_PORT=3306 +CDC_TOPIC_PREFIX=mysql-cdc +``` + + + + + + + + + + +Debezium needs PostgreSQL's logical replication. Check it's enabled: + +```sql +SHOW wal_level; +``` + +It must be `logical`. If not, update `postgresql.conf` and restart. + +Create a replication user: + +```sql +CREATE USER cdc_user WITH PASSWORD 'secure_password'; +ALTER USER cdc_user WITH REPLICATION; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO cdc_user; +``` + + + + + +Debezium needs MySQL's binary logging. Check it's enabled: + +```sql +SHOW VARIABLES LIKE 'log_bin'; +``` + +It must be `ON`. If not, update `my.cnf`: + +```properties +[mysqld] +server-id=1 +log_bin=mysql-bin +binlog_format=ROW +``` + +Create a CDC user: + +```sql +CREATE USER 'cdc_user'@'%' IDENTIFIED BY 'secure_password'; +GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'cdc_user'@'%'; +``` + + + + + + + + + +Since you're using , reuse your existing models: + + + + +```typescript +import { customerAddresses } from "./schema"; + +export type CustomerAddress = typeof customerAddresses.$inferSelect; +``` + + + + +```typescript +import type { CustomerAddress } from "@prisma/client"; + +export type { CustomerAddress }; +``` + + + + + + + + +Generate TypeScript types from your database: + + + + +```bash +npx kanel --connectionString $DATABASE_URL --output ./generated +``` + + + + +```bash +npx mysql-schema-ts mysql://user:pass@localhost/db --output ./generated +``` + + + + + + + + + + +## Verification + +Any change in your table will now appear in ClickHouse: + +```bash +moose query "SELECT * FROM customer_addresses" +``` + + +``` + +--- + +## Export Capabilities + +### Step Metadata for Export + +Each step can include metadata for Linear and agent exports: + +```tsx + + +...step content... + + +``` + +### Export to Linear + +Export the entire guide as a Linear project: + +```tsx +// In guide frontmatter +--- +project: + name: "CDC Pipeline Setup" + team: "Platform" + priority: 2 + labels: ["infrastructure", "q1-2024"] +--- +``` + +UI provides: +- "Export to Linear" button on guide page +- Creates project with issues for each step +- Acceptance criteria become issue checklists +- Dependencies map to issue links + +### Export as Agent Prompt + +Each step has a "Copy as Prompt" button that generates: + +```markdown +## Goal + +Set up PostgreSQL connection with Drizzle ORM + +## Files + +- `src/db/index.ts` +- `src/db/schema.ts` +- `.env` + +## Instructions + +[Step content rendered as markdown] + +## Commands + +```bash +pnpm db:generate +pnpm db:migrate +``` + +## Expected Outcome + +Database tables are created and queryable + +## Avoid + +- Don't commit .env file +- Don't use raw SQL +``` + +### Programmatic Export + +```tsx +import { + stepToLinearIssue, + stepToAgentPrompt, + stepsToLinearProject, + copyStepAsAgentPrompt +} from "@514labs/design-system-components/guides"; + +// Single step → Linear issue +const issue = stepToLinearIssue(stepData); + +// Single step → Agent prompt +const prompt = stepToAgentPrompt(stepData); + +// All steps → Linear project +const project = stepsToLinearProject(projectMeta, allSteps); + +// Copy to clipboard +await copyStepAsAgentPrompt(stepData); +``` + +--- + +## Best Practices + +**Keep variations minimal.** Most content should be unconditional. Only wrap the parts that genuinely differ. + +**Use `` for inline names.** Instead of `PostgreSQLMySQL`, just use ``. + +**Test all combinations.** Before publishing, cycle through each option and verify the content makes sense. + +**Nest markdown naturally.** The components work with standard markdown—code blocks, headers, lists all work inside conditionals. + +--- + +## Implementation Checklist + +### TechContextProvider +- [ ] Parse config from `frontmatter.techSelector` or `config` prop +- [ ] Initialize state with defaults from config +- [ ] Persist to localStorage when `storageKey` provided +- [ ] Hydrate from localStorage on mount (avoid SSR mismatch) +- [ ] Expose context via React Context + +### TechSelector +- [ ] Render dropdown for each dimension +- [ ] Update context on selection change +- [ ] Style: filter bar aesthetic, responsive + +### Conditional / When / NotWhen +- [ ] Evaluate predicates (`equals`, `oneOf`, `and`, `or`, `not`) +- [ ] Show/hide children based on evaluation +- [ ] Support `fallback` prop + +### TechSwitch / TechCase +- [ ] Match current dimension value to case +- [ ] Render matching case's children +- [ ] Support fallback when no match + +### TechRef +- [ ] Get current value for dimension +- [ ] Apply custom labels if provided +- [ ] Render inline (no wrapper element) + +### Steps / Step +- [ ] Track rendered steps in order +- [ ] Assign sequential numbers (skip hidden conditional steps) +- [ ] Style: number badge, title, content layout +- [ ] Handle dynamic re-numbering when conditionals change +- [ ] Render export buttons (Linear, Agent Prompt) +- [ ] Extract step content as markdown for export + +### Export - Linear +- [ ] "Export to Linear" button on guide page +- [ ] Convert steps to Linear project JSON +- [ ] Map estimates to story points +- [ ] Map acceptance criteria to checklist markdown +- [ ] Map dependencies to issue links +- [ ] Copy single issue markdown to clipboard + +### Export - Agent Prompt +- [ ] "Copy as Prompt" button on each step +- [ ] Generate structured prompt from step metadata +- [ ] Include files, commands, expected outcome +- [ ] Resolve conditional content based on current tech context +- [ ] Copy to clipboard with success feedback diff --git a/apps/framework-docs-v2/guides-specs/content-model.ts b/apps/framework-docs-v2/guides-specs/content-model.ts new file mode 100644 index 0000000000..297c98a91b --- /dev/null +++ b/apps/framework-docs-v2/guides-specs/content-model.ts @@ -0,0 +1,235 @@ +/** + * Guide Content Model - Type Definitions + * + * These types define the data structures for technology-variant guides. + * Supports: + * - MDX rendering with conditional content + * - Export to Linear as projects/issues + * - Export as coding agent prompts + */ + +// ============================================================================= +// TECHNOLOGY CONTEXT +// ============================================================================= + +export type TechDimension = string; + +export const CommonDimensions = { + language: "language", + framework: "framework", + scope: "scope", + oltp: "oltp", + olap: "olap", + streaming: "streaming", + deployment: "deployment", + cloud: "cloud", + orm: "orm", + packageManager: "packageManager", +} as const; + +export type TechContext = Record; + +export type TechPredicate = + | { dimension: TechDimension; equals: string } + | { dimension: TechDimension; oneOf: string[] } + | { and: TechPredicate[] } + | { or: TechPredicate[] } + | { not: TechPredicate }; + +// ============================================================================= +// FRONTMATTER CONFIG +// ============================================================================= + +export type TechSelectorConfig = { + dimensions: TechSelectorDimension[]; +}; + +export type TechSelectorDimension = { + dimension: TechDimension; + label: string; + options: TechSelectorOption[]; +}; + +export type TechSelectorOption = { + value: string; + label: string; + default?: boolean; +}; + +export type GuideFrontmatter = { + title: string; + description?: string; + techSelector?: TechSelectorDimension[]; + /** Linear project metadata for export */ + project?: ProjectMeta; + [key: string]: unknown; +}; + +// ============================================================================= +// PROJECT / TASK METADATA (for Linear export) +// ============================================================================= + +export type ProjectMeta = { + /** Linear project name */ + name: string; + /** Project description */ + description?: string; + /** Team or area label */ + team?: string; + /** Priority: 0 (urgent) - 4 (low) */ + priority?: 0 | 1 | 2 | 3 | 4; + /** Labels to apply to all issues */ + labels?: string[]; +}; + +export type TaskMeta = { + /** Issue title (defaults to step title) */ + title?: string; + /** Detailed description for the issue */ + description?: string; + /** Acceptance criteria as checklist items */ + acceptanceCriteria?: string[]; + /** Story points or t-shirt size */ + estimate?: "xs" | "s" | "m" | "l" | "xl" | number; + /** Labels for this specific task */ + labels?: string[]; + /** IDs of steps this depends on */ + dependsOn?: string[]; + /** Assignee hint (role or person) */ + assignee?: string; +}; + +// ============================================================================= +// AGENT PROMPT METADATA +// ============================================================================= + +/** + * Agent prompt metadata. + * + * Most fields are DERIVED from step content: + * - goal: defaults to step title + * - files: extracted from code blocks with filenames + * - commands: extracted from ```bash code blocks + * - context: extracted from prose paragraphs + * + * Only specify fields here to OVERRIDE or ADD to derived values. + */ +export type AgentPromptMeta = { + /** Override goal (defaults to step title) */ + goal?: string; + /** Additional files beyond those in code blocks */ + files?: string[]; + /** Additional commands beyond those in code blocks */ + commands?: string[]; + /** Expected outcome description */ + expectedOutcome?: string; + /** Additional context beyond prose content */ + context?: string; + /** Don't do these things */ + avoid?: string[]; +}; + +// ============================================================================= +// STEP PROPS (extended) +// ============================================================================= + +export type StepMeta = { + /** Unique identifier for dependencies */ + id?: string; + /** Step title */ + title: string; + /** Task metadata for Linear export */ + task?: TaskMeta; + /** Agent prompt metadata for coding assistant export */ + agent?: AgentPromptMeta; + /** Condition for showing this step */ + when?: TechPredicate; +}; + +// ============================================================================= +// BLOCK METADATA (for exportable content blocks) +// ============================================================================= + +export type CodeBlockMeta = { + /** Filename to create/modify */ + filename?: string; + /** Language hint */ + language?: string; + /** Description of what this code does */ + description?: string; + /** Is this the complete file or a snippet? */ + complete?: boolean; +}; + +export type CommandBlockMeta = { + /** Shell command(s) */ + command: string | string[]; + /** What this command does */ + description?: string; + /** Working directory hint */ + cwd?: string; + /** Expected output pattern */ + expectedOutput?: string; +}; + +// ============================================================================= +// DERIVED CONTENT (extracted from markdown) +// ============================================================================= + +/** + * Content derived from parsing step markdown. + * Used to auto-populate agent prompts and Linear issues. + */ +export type DerivedStepContent = { + /** Prose paragraphs (non-code content) */ + prose: string[]; + /** Code blocks with metadata */ + codeBlocks: { + language: string; + content: string; + filename?: string; + }[]; + /** Shell commands (from ```bash blocks) */ + commands: string[]; + /** File paths mentioned (from code block filenames or inline `path` refs) */ + files: string[]; + /** Headings within the step */ + headings: string[]; +}; + +// ============================================================================= +// EXPORT HELPERS (types for generated output) +// ============================================================================= + +export type LinearProject = { + name: string; + description?: string; + issues: LinearIssue[]; +}; + +export type LinearIssue = { + title: string; + description: string; + priority?: number; + estimate?: number; + labels?: string[]; + /** Markdown body */ + body: string; +}; + +/** + * Generated agent prompt. + * Combines explicit AgentPromptMeta with DerivedStepContent. + */ +export type AgentPrompt = { + /** One-line goal (from title or override) */ + goal: string; + /** Full prompt text */ + prompt: string; + /** Files mentioned (derived + explicit) */ + files: string[]; + /** Commands mentioned (derived + explicit) */ + commands: string[]; + /** Prose context (derived + explicit) */ + context: string; +}; diff --git a/apps/framework-docs-v2/guides-specs/debezium-guide-example.mdx b/apps/framework-docs-v2/guides-specs/debezium-guide-example.mdx new file mode 100644 index 0000000000..154ae871b2 --- /dev/null +++ b/apps/framework-docs-v2/guides-specs/debezium-guide-example.mdx @@ -0,0 +1,384 @@ +--- +title: Stream Data from Your Database with Debezium +description: Learn how to use the Debezium CDC template to stream data from your database to ClickHouse in real-time. + +techSelector: + - dimension: oltp + label: Source Database + options: + - { value: postgresql, label: PostgreSQL, default: true } + - { value: mysql, label: MySQL } + - dimension: orm + label: Schema Source + options: + - { value: none, label: Generate from DB, default: true } + - { value: drizzle, label: Drizzle ORM } + - { value: prisma, label: Prisma } + +project: + name: CDC Pipeline Setup + description: Set up real-time data streaming from OLTP database to ClickHouse + team: "{{ linear.issueAssignedTeam }}" ## THE USER MUST SET THIS WHEN THEY EXPORT TO LINEAR, THE SELECTION IS INJECTED HERE + priority: "{{ linear.issuePriority }}" ## THE USER MUST SET THIS WHEN THEY EXPORT TO LINEAR, THE SELECTION IS INJECTED HERE + labels: "{{ linear.issueLabels }}" ## THE USER MUST SET THIS WHEN THEY EXPORT TO LINEAR, THE SELECTION IS INJECTED HERE +--- + +import { FileTree } from "@/components/mdx"; +import { + TechContextProvider, + TechSelector, + Steps, + Step, + When, + NotWhen, + TechSwitch, + TechCase, + TechRef, +} from "@514labs/design-system-components/guides"; + + + +# Stream Data from Your Database with Debezium + + + +This guide shows you how to use the [**Debezium CDC Template**](https://github.com/514-labs/debezium-cdc). You will learn how to set up the Debezium connector with your database and mirror your data into ClickHouse in real-time. + +## Architecture Overview + +At a high level, the pipeline works like this: + +```txt +[Your Database] -> Kafka -> ClickHouse +``` + +- **Debezium** acts as the bridge between your database and Kafka. It watches for changes and publishes them to Kafka topics. +- **MooseStack** acts as the bridge between Kafka and ClickHouse. It serves as your "pipeline-as-code" layer where you define your ClickHouse tables, Kafka streams, and transformation logic. + + + + + +Clone the [Debezium CDC Template](https://github.com/514-labs/debezium-cdc) and install dependencies: + +```bash +git clone https://github.com/514-labs/debezium-cdc.git +cd debezium-cdc +pnpm install +``` + + + + + +The template uses environment variables for database passwords and connector settings. + +Copy the environment file: + +```bash +cp .env.example .env.dev +``` + +Open `.env.dev` and configure your database connection: + +```properties filename=".env.dev" +DB_HOST=your_database_host +DB_PORT=your_database_port +DB_NAME=your_database_name +DB_USER=your_database_user +DB_PASSWORD=your_database_password +``` + +Configure CDC settings: + + + + +```properties filename=".env.dev" +CDC_TABLE_INCLUDE_LIST=public.* +CDC_TOPIC_PREFIX=pg-cdc +``` + + + + +```properties filename=".env.dev" +CDC_TABLE_INCLUDE_LIST=mydb.* +CDC_TOPIC_PREFIX=mysql-cdc +``` + + + + + + + + + + +Debezium needs PostgreSQL's logical replication. + +Check `wal_level`: + +```sql +SHOW wal_level; +``` + +It must be `logical`. If not, update `postgresql.conf` and restart Postgres. + +Create a replication user: + +```sql +CREATE USER cdc_user WITH PASSWORD 'secure_password'; +ALTER USER cdc_user WITH REPLICATION; +GRANT USAGE ON SCHEMA public TO cdc_user; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO cdc_user; +``` + + + + + +Debezium needs MySQL's binary logging. + +Check binary logging: + +```sql +SHOW VARIABLES LIKE 'log_bin'; +``` + +It must be `ON`. If not, update `my.cnf`: + +```properties filename="my.cnf" +[mysqld] +server-id=1 +log_bin=mysql-bin +binlog_format=ROW +binlog_row_image=FULL +``` + +Create a CDC user: + +```sql +CREATE USER 'cdc_user'@'%' IDENTIFIED BY 'secure_password'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'cdc_user'@'%'; +``` + + + + + + + +Start the development environment: + +```bash +moose dev +``` + +Check the logs for: +- Infrastructure starting (Redpanda, Kafka Connect, ClickHouse) +- `setup-cdc.ts` running +- `✅ Connector registered!` + + + + + +Import Kafka topic definitions: + +```bash +moose-cli kafka pull localhost:19092 --path cdc-pipeline/1-sources +``` + + + +Export types from your schema: + + + + +```typescript filename="cdc-pipeline/oltp/schema.ts" +import { customerAddresses } from "../../postgres/src/schema"; + +export type CustomerAddress = typeof customerAddresses.$inferSelect; +``` + + + + +```typescript filename="cdc-pipeline/oltp/schema.ts" +import type { CustomerAddress as PrismaCustomerAddress } from "@prisma/client"; + +export type CustomerAddress = PrismaCustomerAddress; +``` + + + + + + + + +Generate TypeScript types from your database: + + + + +```bash +npx kanel --connectionString $DATABASE_URL --output ./cdc-pipeline/generated-models +``` + + + + +```bash +npx mysql-schema-ts mysql://user:pass@localhost/db --output ./cdc-pipeline/generated-models +``` + + + + + + +Create typed topics in `cdc-pipeline/1-sources/typed-topics.ts`: + +```typescript filename="cdc-pipeline/1-sources/typed-topics.ts" +import { Stream } from "@514labs/moose-lib"; +import { PgCdcPublicCustomerAddressesStream } from "./externalTopics"; +import { GenericCDCEvent } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +export const cdcCustomerAddresses = PgCdcPublicCustomerAddressesStream as Stream< + GenericCDCEvent +>; +``` + +Define the OLAP table: + +```typescript filename="cdc-pipeline/3-destinations/olap-tables.ts" +import { OlapTable, ClickHouseEngines, UInt64, UInt8 } from "@514labs/moose-lib"; +import { CustomerAddress } from "../../oltp/schema"; + +export type CdcFields = { + _is_deleted: UInt8; + ts_ms: UInt64; + lsn: UInt64; +}; + +export type OlapCustomerAddress = CustomerAddress & CdcFields; + +export const olapCustomerAddresses = new OlapTable( + "customer_addresses", + { + engine: ClickHouseEngines.ReplacingMergeTree, + ver: "lsn", + isDeleted: "_is_deleted", + orderByFields: ["id"], + } +); +``` + +Create the transform: + +```typescript filename="cdc-pipeline/2-transforms/customer-addresses.ts" +import { cdcCustomerAddresses } from "../1-sources/typed-topics"; +import { processedCustomerAddresses } from "../3-destinations/sink-topics"; +import { handleCDCPayload } from "./payload-handler"; +import { GenericCDCEvent, OlapCustomerAddress } from "../models"; +import { CustomerAddress } from "../../oltp/schema"; + +cdcCustomerAddresses.addTransform( + processedCustomerAddresses, + (message: GenericCDCEvent) => { + const result = handleCDCPayload(message); + return result as unknown as OlapCustomerAddress; + } +); +``` + + + + + +## Verification + +Any change in your table will now appear in ClickHouse: + +```bash +moose query "SELECT * FROM customer_addresses" +``` + + diff --git a/apps/framework-docs-v2/guides-specs/index.ts b/apps/framework-docs-v2/guides-specs/index.ts new file mode 100644 index 0000000000..edd26cbe4a --- /dev/null +++ b/apps/framework-docs-v2/guides-specs/index.ts @@ -0,0 +1,7 @@ +/** + * Guide Content Model - Type Definitions + * + * See GUIDE-CONTENT-MODEL.md for full specification. + */ + +export * from "./content-model"; diff --git a/apps/framework-docs-v2/src/config/navigation.ts b/apps/framework-docs-v2/src/config/navigation.ts index bb7c2bffa9..8ded671615 100644 --- a/apps/framework-docs-v2/src/config/navigation.ts +++ b/apps/framework-docs-v2/src/config/navigation.ts @@ -276,9 +276,17 @@ const moosestackNavigationConfig: NavigationConfig = [ }, { type: "page", - slug: "moosestack/olap/supported-types", - title: "Supported Types", + slug: "moosestack/data-types", + title: "Data Types", languages: ["typescript", "python"], + external: true, + }, + { + type: "page", + slug: "moosestack/engines", + title: "Table Engines", + languages: ["typescript", "python"], + external: true, }, { type: "separator" }, { type: "label", title: "External Data & Introspection" }, @@ -643,6 +651,155 @@ const moosestackNavigationConfig: NavigationConfig = [ // Reference section { type: "label", title: "Reference" }, + { + type: "page", + slug: "moosestack/data-types", + title: "Data Types", + icon: IconList, + languages: ["typescript", "python"], + children: [ + { type: "label", title: "Basic Types" }, + { + type: "page", + slug: "moosestack/data-types/strings", + title: "Strings", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/integers", + title: "Integers", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/floats", + title: "Floats", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/decimals", + title: "Decimals", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/booleans", + title: "Booleans", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/datetime", + title: "Date & Time", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/network", + title: "Network", + languages: ["typescript", "python"], + }, + { type: "separator" }, + { type: "label", title: "Complex Types" }, + { + type: "page", + slug: "moosestack/data-types/arrays", + title: "Arrays", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/maps", + title: "Maps", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/nested", + title: "Nested", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/tuples", + title: "Tuples", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/enums", + title: "Enums", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/geometry", + title: "Geometry", + languages: ["typescript", "python"], + }, + { type: "separator" }, + { type: "label", title: "Special Types" }, + { + type: "page", + slug: "moosestack/data-types/json", + title: "JSON", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/nullable", + title: "Nullable", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/data-types/aggregates", + title: "Aggregates", + languages: ["typescript", "python"], + }, + ], + }, + { + type: "page", + slug: "moosestack/engines", + title: "Table Engines", + icon: IconDatabase, + languages: ["typescript", "python"], + children: [ + { + type: "page", + slug: "moosestack/engines/merge-tree", + title: "MergeTree", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/engines/replacing-merge-tree", + title: "ReplacingMergeTree", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/engines/aggregating-merge-tree", + title: "AggregatingMergeTree", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/engines/summing-merge-tree", + title: "SummingMergeTree", + languages: ["typescript", "python"], + }, + { + type: "page", + slug: "moosestack/engines/replicated", + title: "Replicated Engines", + languages: ["typescript", "python"], + }, + ], + }, { type: "page", slug: "moosestack/reference",